diff --git a/hotspot/make/windows/projectfiles/common/Makefile b/hotspot/make/windows/projectfiles/common/Makefile index f14f9c4ba07..125923e67b2 100644 --- a/hotspot/make/windows/projectfiles/common/Makefile +++ b/hotspot/make/windows/projectfiles/common/Makefile @@ -71,41 +71,36 @@ default:: $(AdditionalTargets) $(JvmtiGeneratedFiles) !include $(HOTSPOTWORKSPACE)/make/hotspot_version -!if "$(HOTSPOT_RELEASE_VERSION)" != "" -HOTSPOT_RELEASE_VERSION="$(HOTSPOT_RELEASE_VERSION)" -!else -HOTSPOT_RELEASE_VERSION="$(HS_MAJOR_VER).$(HS_MINOR_VER)-b$(HS_BUILD_NUMBER)" -!endif !if "$(USER_RELEASE_SUFFIX)" != "" -HOTSPOT_BUILD_VERSION$(HOTSPOT_BUILD_VERSION) = internal-$(USER_RELEASE_SUFFIX) +HOTSPOT_BUILD_VERSION = internal-$(USER_RELEASE_SUFFIX) !else -HOTSPOT_BUILD_VERSION$(HOTSPOT_BUILD_VERSION) = internal +HOTSPOT_BUILD_VERSION = internal !endif -!if "$(HOTSPOT_BUILD_VERSION)" != "" -HOTSPOT_RELEASE_VERSION="$(HOTSPOT_RELEASE_VERSION)-$(HOTSPOT_BUILD_VERSION)" +!if "$(HOTSPOT_RELEASE_VERSION)" != "" +HOTSPOT_RELEASE_VERSION="\\\"$(HOTSPOT_RELEASE_VERSION)\\\"" +!else +HOTSPOT_RELEASE_VERSION="\\\"$(HS_MAJOR_VER).$(HS_MINOR_VER)-b$(HS_BUILD_NUMBER)-$(HOTSPOT_BUILD_VERSION)\\\"" !endif !if "$(JRE_RELEASE_VERSION)" != "" -JRE_RELEASE_VERSION="$(JRE_RELEASE_VERSION)" +JRE_RELEASE_VERSION="\\\"$(JRE_RELEASE_VERSION)\\\"" !else -JRE_RELEASE_VERSION="$(JDK_MAJOR_VER).$(JDK_MINOR_VER).$(JDK_MICRO_VER)" +JRE_RELEASE_VERSION="\\\"$(JDK_MAJOR_VER).$(JDK_MINOR_VER).$(JDK_MICRO_VER)\\\"" !endif # Define HOTSPOT_VM_DISTRO if HOTSPOT_VM_DISTRO is set, # and if it is not see if we have the src/closed directory !if "$(HOTSPOT_VM_DISTRO)" != "" -HOTSPOT_VM_DISTRO="$(HOTSPOT_VM_DISTRO)" +HOTSPOT_VM_DISTRO=$(HOTSPOT_VM_DISTRO) !else !if exists($(HOTSPOTWORKSPACE)\src\closed) -HOTSPOT_VM_DISTRO="Java HotSpot(TM)" +HOTSPOT_VM_DISTRO="\\\"Java HotSpot(TM)\\\"" !else -HOTSPOT_VM_DISTRO="OpenJDK" +HOTSPOT_VM_DISTRO="\\\"OpenJDK\\\"" !endif !endif -ProjectCreatorIDEOptions = $(ProjectCreatorIDEOptions) \ - -define HOTSPOT_RELEASE_VERSION=\\\"$(HOTSPOT_RELEASE_VERSION)\\\" \ - -define JRE_RELEASE_VERSION=\\\"$(JRE_RELEASE_VERSION)\\\" \ - -define HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\" +ReleaseOptions = -define HOTSPOT_RELEASE_VERSION=$(HOTSPOT_RELEASE_VERSION) -define JRE_RELEASE_VERSION=$(JRE_RELEASE_VERSION) -define HOTSPOT_VM_DISTRO=$(HOTSPOT_VM_DISTRO) +ProjectCreatorIDEOptions = $(ProjectCreatorIDEOptions) $(ReleaseOptions) $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class @$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions) diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp index 25c850c8b46..2cc945ad2a1 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp @@ -24,4985 +24,8 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" -#include "gc_interface/collectedHeap.inline.hpp" -#include "interpreter/interpreter.hpp" -#include "memory/cardTableModRefBS.hpp" -#include "memory/resourceArea.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/biasedLocking.hpp" -#include "runtime/interfaceSupport.hpp" -#include "runtime/objectMonitor.hpp" -#include "runtime/os.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#ifndef SERIALGC -#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" -#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" -#include "gc_implementation/g1/heapRegion.hpp" -#endif - -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#define STOP(error) stop(error) -#else -#define BLOCK_COMMENT(str) block_comment(str) -#define STOP(error) block_comment(error); stop(error) -#endif - -// Convert the raw encoding form into the form expected by the -// constructor for Address. -Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { - assert(scale == 0, "not supported"); - RelocationHolder rspec; - if (disp_reloc != relocInfo::none) { - rspec = Relocation::spec_simple(disp_reloc); - } - - Register rindex = as_Register(index); - if (rindex != G0) { - Address madr(as_Register(base), rindex); - madr._rspec = rspec; - return madr; - } else { - Address madr(as_Register(base), disp); - madr._rspec = rspec; - return madr; - } -} - -Address Argument::address_in_frame() const { - // Warning: In LP64 mode disp will occupy more than 10 bits, but - // op codes such as ld or ldx, only access disp() to get - // their simm13 argument. - int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS; - if (is_in()) - return Address(FP, disp); // In argument. - else - return Address(SP, disp); // Out argument. -} - -static const char* argumentNames[][2] = { - {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"}, - {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"}, - {"A(n>9)","P(n>9)"} -}; - -const char* Argument::name() const { - int nofArgs = sizeof argumentNames / sizeof argumentNames[0]; - int num = number(); - if (num >= nofArgs) num = nofArgs - 1; - return argumentNames[num][is_in() ? 1 : 0]; -} - -void Assembler::print_instruction(int inst) { - const char* s; - switch (inv_op(inst)) { - default: s = "????"; break; - case call_op: s = "call"; break; - case branch_op: - switch (inv_op2(inst)) { - case fb_op2: s = "fb"; break; - case fbp_op2: s = "fbp"; break; - case br_op2: s = "br"; break; - case bp_op2: s = "bp"; break; - case cb_op2: s = "cb"; break; - case bpr_op2: { - if (is_cbcond(inst)) { - s = is_cxb(inst) ? "cxb" : "cwb"; - } else { - s = "bpr"; - } - break; - } - default: s = "????"; break; - } - } - ::tty->print("%s", s); -} - - -// Patch instruction inst at offset inst_pos to refer to dest_pos -// and return the resulting instruction. -// We should have pcs, not offsets, but since all is relative, it will work out -// OK. -int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) { - - int m; // mask for displacement field - int v; // new value for displacement field - const int word_aligned_ones = -4; - switch (inv_op(inst)) { - default: ShouldNotReachHere(); - case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; - case branch_op: - switch (inv_op2(inst)) { - case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; - case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; - case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; - case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; - case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; - case bpr_op2: { - if (is_cbcond(inst)) { - m = wdisp10(word_aligned_ones, 0); - v = wdisp10(dest_pos, inst_pos); - } else { - m = wdisp16(word_aligned_ones, 0); - v = wdisp16(dest_pos, inst_pos); - } - break; - } - default: ShouldNotReachHere(); - } - } - return inst & ~m | v; -} - -// Return the offset of the branch destionation of instruction inst -// at offset pos. -// Should have pcs, but since all is relative, it works out. -int Assembler::branch_destination(int inst, int pos) { - int r; - switch (inv_op(inst)) { - default: ShouldNotReachHere(); - case call_op: r = inv_wdisp(inst, pos, 30); break; - case branch_op: - switch (inv_op2(inst)) { - case fbp_op2: r = inv_wdisp( inst, pos, 19); break; - case bp_op2: r = inv_wdisp( inst, pos, 19); break; - case fb_op2: r = inv_wdisp( inst, pos, 22); break; - case br_op2: r = inv_wdisp( inst, pos, 22); break; - case cb_op2: r = inv_wdisp( inst, pos, 22); break; - case bpr_op2: { - if (is_cbcond(inst)) { - r = inv_wdisp10(inst, pos); - } else { - r = inv_wdisp16(inst, pos); - } - break; - } - default: ShouldNotReachHere(); - } - } - return r; -} +#include "asm/assembler.inline.hpp" int AbstractAssembler::code_fill_byte() { return 0x00; // illegal instruction 0x00000000 } - -Assembler::Condition Assembler::reg_cond_to_cc_cond(Assembler::RCondition in) { - switch (in) { - case rc_z: return equal; - case rc_lez: return lessEqual; - case rc_lz: return less; - case rc_nz: return notEqual; - case rc_gz: return greater; - case rc_gez: return greaterEqual; - default: - ShouldNotReachHere(); - } - return equal; -} - -// Generate a bunch 'o stuff (including v9's -#ifndef PRODUCT -void Assembler::test_v9() { - add( G0, G1, G2 ); - add( G3, 0, G4 ); - - addcc( G5, G6, G7 ); - addcc( I0, 1, I1 ); - addc( I2, I3, I4 ); - addc( I5, -1, I6 ); - addccc( I7, L0, L1 ); - addccc( L2, (1 << 12) - 2, L3 ); - - Label lbl1, lbl2, lbl3; - - bind(lbl1); - - bpr( rc_z, true, pn, L4, pc(), relocInfo::oop_type ); - delayed()->nop(); - bpr( rc_lez, false, pt, L5, lbl1); - delayed()->nop(); - - fb( f_never, true, pc() + 4, relocInfo::none); - delayed()->nop(); - fb( f_notEqual, false, lbl2 ); - delayed()->nop(); - - fbp( f_notZero, true, fcc0, pn, pc() - 4, relocInfo::none); - delayed()->nop(); - fbp( f_lessOrGreater, false, fcc1, pt, lbl3 ); - delayed()->nop(); - - br( equal, true, pc() + 1024, relocInfo::none); - delayed()->nop(); - br( lessEqual, false, lbl1 ); - delayed()->nop(); - br( never, false, lbl1 ); - delayed()->nop(); - - bp( less, true, icc, pn, pc(), relocInfo::none); - delayed()->nop(); - bp( lessEqualUnsigned, false, xcc, pt, lbl2 ); - delayed()->nop(); - - call( pc(), relocInfo::none); - delayed()->nop(); - call( lbl3 ); - delayed()->nop(); - - - casa( L6, L7, O0 ); - casxa( O1, O2, O3, 0 ); - - udiv( O4, O5, O7 ); - udiv( G0, (1 << 12) - 1, G1 ); - sdiv( G1, G2, G3 ); - sdiv( G4, -((1 << 12) - 1), G5 ); - udivcc( G6, G7, I0 ); - udivcc( I1, -((1 << 12) - 2), I2 ); - sdivcc( I3, I4, I5 ); - sdivcc( I6, -((1 << 12) - 0), I7 ); - - done(); - retry(); - - fadd( FloatRegisterImpl::S, F0, F1, F2 ); - fsub( FloatRegisterImpl::D, F34, F0, F62 ); - - fcmp( FloatRegisterImpl::Q, fcc0, F0, F60); - fcmpe( FloatRegisterImpl::S, fcc1, F31, F30); - - ftox( FloatRegisterImpl::D, F2, F4 ); - ftoi( FloatRegisterImpl::Q, F4, F8 ); - - ftof( FloatRegisterImpl::S, FloatRegisterImpl::Q, F3, F12 ); - - fxtof( FloatRegisterImpl::S, F4, F5 ); - fitof( FloatRegisterImpl::D, F6, F8 ); - - fmov( FloatRegisterImpl::Q, F16, F20 ); - fneg( FloatRegisterImpl::S, F6, F7 ); - fabs( FloatRegisterImpl::D, F10, F12 ); - - fmul( FloatRegisterImpl::Q, F24, F28, F32 ); - fmul( FloatRegisterImpl::S, FloatRegisterImpl::D, F8, F9, F14 ); - fdiv( FloatRegisterImpl::S, F10, F11, F12 ); - - fsqrt( FloatRegisterImpl::S, F13, F14 ); - - flush( L0, L1 ); - flush( L2, -1 ); - - flushw(); - - illtrap( (1 << 22) - 2); - - impdep1( 17, (1 << 19) - 1 ); - impdep2( 3, 0 ); - - jmpl( L3, L4, L5 ); - delayed()->nop(); - jmpl( L6, -1, L7, Relocation::spec_simple(relocInfo::none)); - delayed()->nop(); - - - ldf( FloatRegisterImpl::S, O0, O1, F15 ); - ldf( FloatRegisterImpl::D, O2, -1, F14 ); - - - ldfsr( O3, O4 ); - ldfsr( O5, -1 ); - ldxfsr( O6, O7 ); - ldxfsr( I0, -1 ); - - ldfa( FloatRegisterImpl::D, I1, I2, 1, F16 ); - ldfa( FloatRegisterImpl::Q, I3, -1, F36 ); - - ldsb( I4, I5, I6 ); - ldsb( I7, -1, G0 ); - ldsh( G1, G3, G4 ); - ldsh( G5, -1, G6 ); - ldsw( G7, L0, L1 ); - ldsw( L2, -1, L3 ); - ldub( L4, L5, L6 ); - ldub( L7, -1, O0 ); - lduh( O1, O2, O3 ); - lduh( O4, -1, O5 ); - lduw( O6, O7, G0 ); - lduw( G1, -1, G2 ); - ldx( G3, G4, G5 ); - ldx( G6, -1, G7 ); - ldd( I0, I1, I2 ); - ldd( I3, -1, I4 ); - - ldsba( I5, I6, 2, I7 ); - ldsba( L0, -1, L1 ); - ldsha( L2, L3, 3, L4 ); - ldsha( L5, -1, L6 ); - ldswa( L7, O0, (1 << 8) - 1, O1 ); - ldswa( O2, -1, O3 ); - lduba( O4, O5, 0, O6 ); - lduba( O7, -1, I0 ); - lduha( I1, I2, 1, I3 ); - lduha( I4, -1, I5 ); - lduwa( I6, I7, 2, L0 ); - lduwa( L1, -1, L2 ); - ldxa( L3, L4, 3, L5 ); - ldxa( L6, -1, L7 ); - ldda( G0, G1, 4, G2 ); - ldda( G3, -1, G4 ); - - ldstub( G5, G6, G7 ); - ldstub( O0, -1, O1 ); - - ldstuba( O2, O3, 5, O4 ); - ldstuba( O5, -1, O6 ); - - and3( I0, L0, O0 ); - and3( G7, -1, O7 ); - andcc( L2, I2, G2 ); - andcc( L4, -1, G4 ); - andn( I5, I6, I7 ); - andn( I6, -1, I7 ); - andncc( I5, I6, I7 ); - andncc( I7, -1, I6 ); - or3( I5, I6, I7 ); - or3( I7, -1, I6 ); - orcc( I5, I6, I7 ); - orcc( I7, -1, I6 ); - orn( I5, I6, I7 ); - orn( I7, -1, I6 ); - orncc( I5, I6, I7 ); - orncc( I7, -1, I6 ); - xor3( I5, I6, I7 ); - xor3( I7, -1, I6 ); - xorcc( I5, I6, I7 ); - xorcc( I7, -1, I6 ); - xnor( I5, I6, I7 ); - xnor( I7, -1, I6 ); - xnorcc( I5, I6, I7 ); - xnorcc( I7, -1, I6 ); - - membar( Membar_mask_bits(StoreStore | LoadStore | StoreLoad | LoadLoad | Sync | MemIssue | Lookaside ) ); - membar( StoreStore ); - membar( LoadStore ); - membar( StoreLoad ); - membar( LoadLoad ); - membar( Sync ); - membar( MemIssue ); - membar( Lookaside ); - - fmov( FloatRegisterImpl::S, f_ordered, true, fcc2, F16, F17 ); - fmov( FloatRegisterImpl::D, rc_lz, L5, F18, F20 ); - - movcc( overflowClear, false, icc, I6, L4 ); - movcc( f_unorderedOrEqual, true, fcc2, (1 << 10) - 1, O0 ); - - movr( rc_nz, I5, I6, I7 ); - movr( rc_gz, L1, -1, L2 ); - - mulx( I5, I6, I7 ); - mulx( I7, -1, I6 ); - sdivx( I5, I6, I7 ); - sdivx( I7, -1, I6 ); - udivx( I5, I6, I7 ); - udivx( I7, -1, I6 ); - - umul( I5, I6, I7 ); - umul( I7, -1, I6 ); - smul( I5, I6, I7 ); - smul( I7, -1, I6 ); - umulcc( I5, I6, I7 ); - umulcc( I7, -1, I6 ); - smulcc( I5, I6, I7 ); - smulcc( I7, -1, I6 ); - - mulscc( I5, I6, I7 ); - mulscc( I7, -1, I6 ); - - nop(); - - - popc( G0, G1); - popc( -1, G2); - - prefetch( L1, L2, severalReads ); - prefetch( L3, -1, oneRead ); - prefetcha( O3, O2, 6, severalWritesAndPossiblyReads ); - prefetcha( G2, -1, oneWrite ); - - rett( I7, I7); - delayed()->nop(); - rett( G0, -1, relocInfo::none); - delayed()->nop(); - - save( I5, I6, I7 ); - save( I7, -1, I6 ); - restore( I5, I6, I7 ); - restore( I7, -1, I6 ); - - saved(); - restored(); - - sethi( 0xaaaaaaaa, I3, Relocation::spec_simple(relocInfo::none)); - - sll( I5, I6, I7 ); - sll( I7, 31, I6 ); - srl( I5, I6, I7 ); - srl( I7, 0, I6 ); - sra( I5, I6, I7 ); - sra( I7, 30, I6 ); - sllx( I5, I6, I7 ); - sllx( I7, 63, I6 ); - srlx( I5, I6, I7 ); - srlx( I7, 0, I6 ); - srax( I5, I6, I7 ); - srax( I7, 62, I6 ); - - sir( -1 ); - - stbar(); - - stf( FloatRegisterImpl::Q, F40, G0, I7 ); - stf( FloatRegisterImpl::S, F18, I3, -1 ); - - stfsr( L1, L2 ); - stfsr( I7, -1 ); - stxfsr( I6, I5 ); - stxfsr( L4, -1 ); - - stfa( FloatRegisterImpl::D, F22, I6, I7, 7 ); - stfa( FloatRegisterImpl::Q, F44, G0, -1 ); - - stb( L5, O2, I7 ); - stb( I7, I6, -1 ); - sth( L5, O2, I7 ); - sth( I7, I6, -1 ); - stw( L5, O2, I7 ); - stw( I7, I6, -1 ); - stx( L5, O2, I7 ); - stx( I7, I6, -1 ); - std( L5, O2, I7 ); - std( I7, I6, -1 ); - - stba( L5, O2, I7, 8 ); - stba( I7, I6, -1 ); - stha( L5, O2, I7, 9 ); - stha( I7, I6, -1 ); - stwa( L5, O2, I7, 0 ); - stwa( I7, I6, -1 ); - stxa( L5, O2, I7, 11 ); - stxa( I7, I6, -1 ); - stda( L5, O2, I7, 12 ); - stda( I7, I6, -1 ); - - sub( I5, I6, I7 ); - sub( I7, -1, I6 ); - subcc( I5, I6, I7 ); - subcc( I7, -1, I6 ); - subc( I5, I6, I7 ); - subc( I7, -1, I6 ); - subccc( I5, I6, I7 ); - subccc( I7, -1, I6 ); - - swap( I5, I6, I7 ); - swap( I7, -1, I6 ); - - swapa( G0, G1, 13, G2 ); - swapa( I7, -1, I6 ); - - taddcc( I5, I6, I7 ); - taddcc( I7, -1, I6 ); - taddcctv( I5, I6, I7 ); - taddcctv( I7, -1, I6 ); - - tsubcc( I5, I6, I7 ); - tsubcc( I7, -1, I6 ); - tsubcctv( I5, I6, I7 ); - tsubcctv( I7, -1, I6 ); - - trap( overflowClear, xcc, G0, G1 ); - trap( lessEqual, icc, I7, 17 ); - - bind(lbl2); - bind(lbl3); - - code()->decode(); -} - -// Generate a bunch 'o stuff unique to V8 -void Assembler::test_v8_onlys() { - Label lbl1; - - cb( cp_0or1or2, false, pc() - 4, relocInfo::none); - delayed()->nop(); - cb( cp_never, true, lbl1); - delayed()->nop(); - - cpop1(1, 2, 3, 4); - cpop2(5, 6, 7, 8); - - ldc( I0, I1, 31); - ldc( I2, -1, 0); - - lddc( I4, I4, 30); - lddc( I6, 0, 1 ); - - ldcsr( L0, L1, 0); - ldcsr( L1, (1 << 12) - 1, 17 ); - - stc( 31, L4, L5); - stc( 30, L6, -(1 << 12) ); - - stdc( 0, L7, G0); - stdc( 1, G1, 0 ); - - stcsr( 16, G2, G3); - stcsr( 17, G4, 1 ); - - stdcq( 4, G5, G6); - stdcq( 5, G7, -1 ); - - bind(lbl1); - - code()->decode(); -} -#endif - -// Implementation of MacroAssembler - -void MacroAssembler::null_check(Register reg, int offset) { - if (needs_explicit_null_check((intptr_t)offset)) { - // provoke OS NULL exception if reg = NULL by - // accessing M[reg] w/o changing any registers - ld_ptr(reg, 0, G0); - } - else { - // nothing to do, (later) access of M[reg + offset] - // will provoke OS NULL exception if reg = NULL - } -} - -// Ring buffer jumps - -#ifndef PRODUCT -void MacroAssembler::ret( bool trace ) { if (trace) { - mov(I7, O7); // traceable register - JMP(O7, 2 * BytesPerInstWord); - } else { - jmpl( I7, 2 * BytesPerInstWord, G0 ); - } - } - -void MacroAssembler::retl( bool trace ) { if (trace) JMP(O7, 2 * BytesPerInstWord); - else jmpl( O7, 2 * BytesPerInstWord, G0 ); } -#endif /* PRODUCT */ - - -void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) { - assert_not_delayed(); - // This can only be traceable if r1 & r2 are visible after a window save - if (TraceJumps) { -#ifndef PRODUCT - save_frame(0); - verify_thread(); - ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); - add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); - sll(O0, exact_log2(4*sizeof(intptr_t)), O2); - add(O2, O1, O1); - - add(r1->after_save(), r2->after_save(), O2); - set((intptr_t)file, O3); - set(line, O4); - Label L; - // get nearby pc, store jmp target - call(L, relocInfo::none); // No relocation for call to pc+0x8 - delayed()->st(O2, O1, 0); - bind(L); - - // store nearby pc - st(O7, O1, sizeof(intptr_t)); - // store file - st(O3, O1, 2*sizeof(intptr_t)); - // store line - st(O4, O1, 3*sizeof(intptr_t)); - add(O0, 1, O0); - and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); - st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); - restore(); -#endif /* PRODUCT */ - } - jmpl(r1, r2, G0); -} -void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) { - assert_not_delayed(); - // This can only be traceable if r1 is visible after a window save - if (TraceJumps) { -#ifndef PRODUCT - save_frame(0); - verify_thread(); - ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); - add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); - sll(O0, exact_log2(4*sizeof(intptr_t)), O2); - add(O2, O1, O1); - - add(r1->after_save(), offset, O2); - set((intptr_t)file, O3); - set(line, O4); - Label L; - // get nearby pc, store jmp target - call(L, relocInfo::none); // No relocation for call to pc+0x8 - delayed()->st(O2, O1, 0); - bind(L); - - // store nearby pc - st(O7, O1, sizeof(intptr_t)); - // store file - st(O3, O1, 2*sizeof(intptr_t)); - // store line - st(O4, O1, 3*sizeof(intptr_t)); - add(O0, 1, O0); - and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); - st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); - restore(); -#endif /* PRODUCT */ - } - jmp(r1, offset); -} - -// This code sequence is relocatable to any address, even on LP64. -void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) { - assert_not_delayed(); - // Force fixed length sethi because NativeJump and NativeFarCall don't handle - // variable length instruction streams. - patchable_sethi(addrlit, temp); - Address a(temp, addrlit.low10() + offset); // Add the offset to the displacement. - if (TraceJumps) { -#ifndef PRODUCT - // Must do the add here so relocation can find the remainder of the - // value to be relocated. - add(a.base(), a.disp(), a.base(), addrlit.rspec(offset)); - save_frame(0); - verify_thread(); - ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); - add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); - sll(O0, exact_log2(4*sizeof(intptr_t)), O2); - add(O2, O1, O1); - - set((intptr_t)file, O3); - set(line, O4); - Label L; - - // get nearby pc, store jmp target - call(L, relocInfo::none); // No relocation for call to pc+0x8 - delayed()->st(a.base()->after_save(), O1, 0); - bind(L); - - // store nearby pc - st(O7, O1, sizeof(intptr_t)); - // store file - st(O3, O1, 2*sizeof(intptr_t)); - // store line - st(O4, O1, 3*sizeof(intptr_t)); - add(O0, 1, O0); - and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); - st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); - restore(); - jmpl(a.base(), G0, d); -#else - jmpl(a.base(), a.disp(), d); -#endif /* PRODUCT */ - } else { - jmpl(a.base(), a.disp(), d); - } -} - -void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) { - jumpl(addrlit, temp, G0, offset, file, line); -} - - -// Conditional breakpoint (for assertion checks in assembly code) -void MacroAssembler::breakpoint_trap(Condition c, CC cc) { - trap(c, cc, G0, ST_RESERVED_FOR_USER_0); -} - -// We want to use ST_BREAKPOINT here, but the debugger is confused by it. -void MacroAssembler::breakpoint_trap() { - trap(ST_RESERVED_FOR_USER_0); -} - -// flush windows (except current) using flushw instruction if avail. -void MacroAssembler::flush_windows() { - if (VM_Version::v9_instructions_work()) flushw(); - else flush_windows_trap(); -} - -// Write serialization page so VM thread can do a pseudo remote membar -// We use the current thread pointer to calculate a thread specific -// offset to write to within the page. This minimizes bus traffic -// due to cache line collision. -void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { - srl(thread, os::get_serialize_page_shift_count(), tmp2); - if (Assembler::is_simm13(os::vm_page_size())) { - and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2); - } - else { - set((os::vm_page_size() - sizeof(int)), tmp1); - and3(tmp2, tmp1, tmp2); - } - set(os::get_memory_serialize_page(), tmp1); - st(G0, tmp1, tmp2); -} - - - -void MacroAssembler::enter() { - Unimplemented(); -} - -void MacroAssembler::leave() { - Unimplemented(); -} - -void MacroAssembler::mult(Register s1, Register s2, Register d) { - if(VM_Version::v9_instructions_work()) { - mulx (s1, s2, d); - } else { - smul (s1, s2, d); - } -} - -void MacroAssembler::mult(Register s1, int simm13a, Register d) { - if(VM_Version::v9_instructions_work()) { - mulx (s1, simm13a, d); - } else { - smul (s1, simm13a, d); - } -} - - -#ifdef ASSERT -void MacroAssembler::read_ccr_v8_assert(Register ccr_save) { - const Register s1 = G3_scratch; - const Register s2 = G4_scratch; - Label get_psr_test; - // Get the condition codes the V8 way. - read_ccr_trap(s1); - mov(ccr_save, s2); - // This is a test of V8 which has icc but not xcc - // so mask off the xcc bits - and3(s2, 0xf, s2); - // Compare condition codes from the V8 and V9 ways. - subcc(s2, s1, G0); - br(Assembler::notEqual, true, Assembler::pt, get_psr_test); - delayed()->breakpoint_trap(); - bind(get_psr_test); -} - -void MacroAssembler::write_ccr_v8_assert(Register ccr_save) { - const Register s1 = G3_scratch; - const Register s2 = G4_scratch; - Label set_psr_test; - // Write out the saved condition codes the V8 way - write_ccr_trap(ccr_save, s1, s2); - // Read back the condition codes using the V9 instruction - rdccr(s1); - mov(ccr_save, s2); - // This is a test of V8 which has icc but not xcc - // so mask off the xcc bits - and3(s2, 0xf, s2); - and3(s1, 0xf, s1); - // Compare the V8 way with the V9 way. - subcc(s2, s1, G0); - br(Assembler::notEqual, true, Assembler::pt, set_psr_test); - delayed()->breakpoint_trap(); - bind(set_psr_test); -} -#else -#define read_ccr_v8_assert(x) -#define write_ccr_v8_assert(x) -#endif // ASSERT - -void MacroAssembler::read_ccr(Register ccr_save) { - if (VM_Version::v9_instructions_work()) { - rdccr(ccr_save); - // Test code sequence used on V8. Do not move above rdccr. - read_ccr_v8_assert(ccr_save); - } else { - read_ccr_trap(ccr_save); - } -} - -void MacroAssembler::write_ccr(Register ccr_save) { - if (VM_Version::v9_instructions_work()) { - // Test code sequence used on V8. Do not move below wrccr. - write_ccr_v8_assert(ccr_save); - wrccr(ccr_save); - } else { - const Register temp_reg1 = G3_scratch; - const Register temp_reg2 = G4_scratch; - write_ccr_trap(ccr_save, temp_reg1, temp_reg2); - } -} - - -// Calls to C land - -#ifdef ASSERT -// a hook for debugging -static Thread* reinitialize_thread() { - return ThreadLocalStorage::thread(); -} -#else -#define reinitialize_thread ThreadLocalStorage::thread -#endif - -#ifdef ASSERT -address last_get_thread = NULL; -#endif - -// call this when G2_thread is not known to be valid -void MacroAssembler::get_thread() { - save_frame(0); // to avoid clobbering O0 - mov(G1, L0); // avoid clobbering G1 - mov(G5_method, L1); // avoid clobbering G5 - mov(G3, L2); // avoid clobbering G3 also - mov(G4, L5); // avoid clobbering G4 -#ifdef ASSERT - AddressLiteral last_get_thread_addrlit(&last_get_thread); - set(last_get_thread_addrlit, L3); - inc(L4, get_pc(L4) + 2 * BytesPerInstWord); // skip getpc() code + inc + st_ptr to point L4 at call - st_ptr(L4, L3, 0); -#endif - call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type); - delayed()->nop(); - mov(L0, G1); - mov(L1, G5_method); - mov(L2, G3); - mov(L5, G4); - restore(O0, 0, G2_thread); -} - -static Thread* verify_thread_subroutine(Thread* gthread_value) { - Thread* correct_value = ThreadLocalStorage::thread(); - guarantee(gthread_value == correct_value, "G2_thread value must be the thread"); - return correct_value; -} - -void MacroAssembler::verify_thread() { - if (VerifyThread) { - // NOTE: this chops off the heads of the 64-bit O registers. -#ifdef CC_INTERP - save_frame(0); -#else - // make sure G2_thread contains the right value - save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod for -Xprof) - mov(G1, L1); // avoid clobbering G1 - // G2 saved below - mov(G3, L3); // avoid clobbering G3 - mov(G4, L4); // avoid clobbering G4 - mov(G5_method, L5); // avoid clobbering G5_method -#endif /* CC_INTERP */ -#if defined(COMPILER2) && !defined(_LP64) - // Save & restore possible 64-bit Long arguments in G-regs - srlx(G1,32,L0); - srlx(G4,32,L6); -#endif - call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type); - delayed()->mov(G2_thread, O0); - - mov(L1, G1); // Restore G1 - // G2 restored below - mov(L3, G3); // restore G3 - mov(L4, G4); // restore G4 - mov(L5, G5_method); // restore G5_method -#if defined(COMPILER2) && !defined(_LP64) - // Save & restore possible 64-bit Long arguments in G-regs - sllx(L0,32,G2); // Move old high G1 bits high in G2 - srl(G1, 0,G1); // Clear current high G1 bits - or3 (G1,G2,G1); // Recover 64-bit G1 - sllx(L6,32,G2); // Move old high G4 bits high in G2 - srl(G4, 0,G4); // Clear current high G4 bits - or3 (G4,G2,G4); // Recover 64-bit G4 -#endif - restore(O0, 0, G2_thread); - } -} - - -void MacroAssembler::save_thread(const Register thread_cache) { - verify_thread(); - if (thread_cache->is_valid()) { - assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); - mov(G2_thread, thread_cache); - } - if (VerifyThread) { - // smash G2_thread, as if the VM were about to anyway - set(0x67676767, G2_thread); - } -} - - -void MacroAssembler::restore_thread(const Register thread_cache) { - if (thread_cache->is_valid()) { - assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); - mov(thread_cache, G2_thread); - verify_thread(); - } else { - // do it the slow way - get_thread(); - } -} - - -// %%% maybe get rid of [re]set_last_Java_frame -void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) { - assert_not_delayed(); - Address flags(G2_thread, JavaThread::frame_anchor_offset() + - JavaFrameAnchor::flags_offset()); - Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset()); - - // Always set last_Java_pc and flags first because once last_Java_sp is visible - // has_last_Java_frame is true and users will look at the rest of the fields. - // (Note: flags should always be zero before we get here so doesn't need to be set.) - -#ifdef ASSERT - // Verify that flags was zeroed on return to Java - Label PcOk; - save_frame(0); // to avoid clobbering O0 - ld_ptr(pc_addr, L0); - br_null_short(L0, Assembler::pt, PcOk); - STOP("last_Java_pc not zeroed before leaving Java"); - bind(PcOk); - - // Verify that flags was zeroed on return to Java - Label FlagsOk; - ld(flags, L0); - tst(L0); - br(Assembler::zero, false, Assembler::pt, FlagsOk); - delayed() -> restore(); - STOP("flags not zeroed before leaving Java"); - bind(FlagsOk); -#endif /* ASSERT */ - // - // When returning from calling out from Java mode the frame anchor's last_Java_pc - // will always be set to NULL. It is set here so that if we are doing a call to - // native (not VM) that we capture the known pc and don't have to rely on the - // native call having a standard frame linkage where we can find the pc. - - if (last_Java_pc->is_valid()) { - st_ptr(last_Java_pc, pc_addr); - } - -#ifdef _LP64 -#ifdef ASSERT - // Make sure that we have an odd stack - Label StackOk; - andcc(last_java_sp, 0x01, G0); - br(Assembler::notZero, false, Assembler::pt, StackOk); - delayed()->nop(); - STOP("Stack Not Biased in set_last_Java_frame"); - bind(StackOk); -#endif // ASSERT - assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); - add( last_java_sp, STACK_BIAS, G4_scratch ); - st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset()); -#else - st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset()); -#endif // _LP64 -} - -void MacroAssembler::reset_last_Java_frame(void) { - assert_not_delayed(); - - Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset()); - Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); - Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); - -#ifdef ASSERT - // check that it WAS previously set -#ifdef CC_INTERP - save_frame(0); -#else - save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame for -Xprof -#endif /* CC_INTERP */ - ld_ptr(sp_addr, L0); - tst(L0); - breakpoint_trap(Assembler::zero, Assembler::ptr_cc); - restore(); -#endif // ASSERT - - st_ptr(G0, sp_addr); - // Always return last_Java_pc to zero - st_ptr(G0, pc_addr); - // Always null flags after return to Java - st(G0, flags); -} - - -void MacroAssembler::call_VM_base( - Register oop_result, - Register thread_cache, - Register last_java_sp, - address entry_point, - int number_of_arguments, - bool check_exceptions) -{ - assert_not_delayed(); - - // determine last_java_sp register - if (!last_java_sp->is_valid()) { - last_java_sp = SP; - } - // debugging support - assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); - - // 64-bit last_java_sp is biased! - set_last_Java_frame(last_java_sp, noreg); - if (VerifyThread) mov(G2_thread, O0); // about to be smashed; pass early - save_thread(thread_cache); - // do the call - call(entry_point, relocInfo::runtime_call_type); - if (!VerifyThread) - delayed()->mov(G2_thread, O0); // pass thread as first argument - else - delayed()->nop(); // (thread already passed) - restore_thread(thread_cache); - reset_last_Java_frame(); - - // check for pending exceptions. use Gtemp as scratch register. - if (check_exceptions) { - check_and_forward_exception(Gtemp); - } - -#ifdef ASSERT - set(badHeapWordVal, G3); - set(badHeapWordVal, G4); - set(badHeapWordVal, G5); -#endif - - // get oop result if there is one and reset the value in the thread - if (oop_result->is_valid()) { - get_vm_result(oop_result); - } -} - -void MacroAssembler::check_and_forward_exception(Register scratch_reg) -{ - Label L; - - check_and_handle_popframe(scratch_reg); - check_and_handle_earlyret(scratch_reg); - - Address exception_addr(G2_thread, Thread::pending_exception_offset()); - ld_ptr(exception_addr, scratch_reg); - br_null_short(scratch_reg, pt, L); - // we use O7 linkage so that forward_exception_entry has the issuing PC - call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); - delayed()->nop(); - bind(L); -} - - -void MacroAssembler::check_and_handle_popframe(Register scratch_reg) { -} - - -void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) { -} - - -void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { - call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - call_VM(oop_result, entry_point, 1, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); - call_VM(oop_result, entry_point, 2, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); - mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); - call_VM(oop_result, entry_point, 3, check_exceptions); -} - - - -// Note: The following call_VM overloadings are useful when a "save" -// has already been performed by a stub, and the last Java frame is -// the previous one. In that case, last_java_sp must be passed as FP -// instead of SP. - - -void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { - call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); - call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); -} - - -void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { - // O0 is reserved for the thread - mov(arg_1, O1); - mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); - mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); - call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); -} - - - -void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) { - assert_not_delayed(); - save_thread(thread_cache); - // do the call - call(entry_point, relocInfo::runtime_call_type); - delayed()->nop(); - restore_thread(thread_cache); -#ifdef ASSERT - set(badHeapWordVal, G3); - set(badHeapWordVal, G4); - set(badHeapWordVal, G5); -#endif -} - - -void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) { - call_VM_leaf_base(thread_cache, entry_point, number_of_arguments); -} - - -void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) { - mov(arg_1, O0); - call_VM_leaf(thread_cache, entry_point, 1); -} - - -void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) { - mov(arg_1, O0); - mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); - call_VM_leaf(thread_cache, entry_point, 2); -} - - -void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) { - mov(arg_1, O0); - mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); - mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument"); - call_VM_leaf(thread_cache, entry_point, 3); -} - - -void MacroAssembler::get_vm_result(Register oop_result) { - verify_thread(); - Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); - ld_ptr( vm_result_addr, oop_result); - st_ptr(G0, vm_result_addr); - verify_oop(oop_result); -} - - -void MacroAssembler::get_vm_result_2(Register metadata_result) { - verify_thread(); - Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); - ld_ptr(vm_result_addr_2, metadata_result); - st_ptr(G0, vm_result_addr_2); -} - - -// We require that C code which does not return a value in vm_result will -// leave it undisturbed. -void MacroAssembler::set_vm_result(Register oop_result) { - verify_thread(); - Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); - verify_oop(oop_result); - -# ifdef ASSERT - // Check that we are not overwriting any other oop. -#ifdef CC_INTERP - save_frame(0); -#else - save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod for -Xprof -#endif /* CC_INTERP */ - ld_ptr(vm_result_addr, L0); - tst(L0); - restore(); - breakpoint_trap(notZero, Assembler::ptr_cc); - // } -# endif - - st_ptr(oop_result, vm_result_addr); -} - - -void MacroAssembler::ic_call(address entry, bool emit_delay) { - RelocationHolder rspec = virtual_call_Relocation::spec(pc()); - patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg); - relocate(rspec); - call(entry, relocInfo::none); - if (emit_delay) { - delayed()->nop(); - } -} - - -void MacroAssembler::card_table_write(jbyte* byte_map_base, - Register tmp, Register obj) { -#ifdef _LP64 - srlx(obj, CardTableModRefBS::card_shift, obj); -#else - srl(obj, CardTableModRefBS::card_shift, obj); -#endif - assert(tmp != obj, "need separate temp reg"); - set((address) byte_map_base, tmp); - stb(G0, tmp, obj); -} - - -void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { - address save_pc; - int shiftcnt; -#ifdef _LP64 -# ifdef CHECK_DELAY - assert_not_delayed((char*) "cannot put two instructions in delay slot"); -# endif - v9_dep(); - save_pc = pc(); - - int msb32 = (int) (addrlit.value() >> 32); - int lsb32 = (int) (addrlit.value()); - - if (msb32 == 0 && lsb32 >= 0) { - Assembler::sethi(lsb32, d, addrlit.rspec()); - } - else if (msb32 == -1) { - Assembler::sethi(~lsb32, d, addrlit.rspec()); - xor3(d, ~low10(~0), d); - } - else { - Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits - if (msb32 & 0x3ff) // Any bits? - or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32 - if (lsb32 & 0xFFFFFC00) { // done? - if ((lsb32 >> 20) & 0xfff) { // Any bits set? - sllx(d, 12, d); // Make room for next 12 bits - or3(d, (lsb32 >> 20) & 0xfff, d); // Or in next 12 - shiftcnt = 0; // We already shifted - } - else - shiftcnt = 12; - if ((lsb32 >> 10) & 0x3ff) { - sllx(d, shiftcnt + 10, d); // Make room for last 10 bits - or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10 - shiftcnt = 0; - } - else - shiftcnt = 10; - sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd - } - else - sllx(d, 32, d); - } - // Pad out the instruction sequence so it can be patched later. - if (ForceRelocatable || (addrlit.rtype() != relocInfo::none && - addrlit.rtype() != relocInfo::runtime_call_type)) { - while (pc() < (save_pc + (7 * BytesPerInstWord))) - nop(); - } -#else - Assembler::sethi(addrlit.value(), d, addrlit.rspec()); -#endif -} - - -void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) { - internal_sethi(addrlit, d, false); -} - - -void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) { - internal_sethi(addrlit, d, true); -} - - -int MacroAssembler::insts_for_sethi(address a, bool worst_case) { -#ifdef _LP64 - if (worst_case) return 7; - intptr_t iaddr = (intptr_t) a; - int msb32 = (int) (iaddr >> 32); - int lsb32 = (int) (iaddr); - int count; - if (msb32 == 0 && lsb32 >= 0) - count = 1; - else if (msb32 == -1) - count = 2; - else { - count = 2; - if (msb32 & 0x3ff) - count++; - if (lsb32 & 0xFFFFFC00 ) { - if ((lsb32 >> 20) & 0xfff) count += 2; - if ((lsb32 >> 10) & 0x3ff) count += 2; - } - } - return count; -#else - return 1; -#endif -} - -int MacroAssembler::worst_case_insts_for_set() { - return insts_for_sethi(NULL, true) + 1; -} - - -// Keep in sync with MacroAssembler::insts_for_internal_set -void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { - intptr_t value = addrlit.value(); - - if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) { - // can optimize - if (-4096 <= value && value <= 4095) { - or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended) - return; - } - if (inv_hi22(hi22(value)) == value) { - sethi(addrlit, d); - return; - } - } - assert_not_delayed((char*) "cannot put two instructions in delay slot"); - internal_sethi(addrlit, d, ForceRelocatable); - if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) { - add(d, addrlit.low10(), d, addrlit.rspec()); - } -} - -// Keep in sync with MacroAssembler::internal_set -int MacroAssembler::insts_for_internal_set(intptr_t value) { - // can optimize - if (-4096 <= value && value <= 4095) { - return 1; - } - if (inv_hi22(hi22(value)) == value) { - return insts_for_sethi((address) value); - } - int count = insts_for_sethi((address) value); - AddressLiteral al(value); - if (al.low10() != 0) { - count++; - } - return count; -} - -void MacroAssembler::set(const AddressLiteral& al, Register d) { - internal_set(al, d, false); -} - -void MacroAssembler::set(intptr_t value, Register d) { - AddressLiteral al(value); - internal_set(al, d, false); -} - -void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) { - AddressLiteral al(addr, rspec); - internal_set(al, d, false); -} - -void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) { - internal_set(al, d, true); -} - -void MacroAssembler::patchable_set(intptr_t value, Register d) { - AddressLiteral al(value); - internal_set(al, d, true); -} - - -void MacroAssembler::set64(jlong value, Register d, Register tmp) { - assert_not_delayed(); - v9_dep(); - - int hi = (int)(value >> 32); - int lo = (int)(value & ~0); - // (Matcher::isSimpleConstant64 knows about the following optimizations.) - if (Assembler::is_simm13(lo) && value == lo) { - or3(G0, lo, d); - } else if (hi == 0) { - Assembler::sethi(lo, d); // hardware version zero-extends to upper 32 - if (low10(lo) != 0) - or3(d, low10(lo), d); - } - else if (hi == -1) { - Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 - xor3(d, low10(lo) ^ ~low10(~0), d); - } - else if (lo == 0) { - if (Assembler::is_simm13(hi)) { - or3(G0, hi, d); - } else { - Assembler::sethi(hi, d); // hardware version zero-extends to upper 32 - if (low10(hi) != 0) - or3(d, low10(hi), d); - } - sllx(d, 32, d); - } - else { - Assembler::sethi(hi, tmp); - Assembler::sethi(lo, d); // macro assembler version sign-extends - if (low10(hi) != 0) - or3 (tmp, low10(hi), tmp); - if (low10(lo) != 0) - or3 ( d, low10(lo), d); - sllx(tmp, 32, tmp); - or3 (d, tmp, d); - } -} - -int MacroAssembler::insts_for_set64(jlong value) { - v9_dep(); - - int hi = (int) (value >> 32); - int lo = (int) (value & ~0); - int count = 0; - - // (Matcher::isSimpleConstant64 knows about the following optimizations.) - if (Assembler::is_simm13(lo) && value == lo) { - count++; - } else if (hi == 0) { - count++; - if (low10(lo) != 0) - count++; - } - else if (hi == -1) { - count += 2; - } - else if (lo == 0) { - if (Assembler::is_simm13(hi)) { - count++; - } else { - count++; - if (low10(hi) != 0) - count++; - } - count++; - } - else { - count += 2; - if (low10(hi) != 0) - count++; - if (low10(lo) != 0) - count++; - count += 2; - } - return count; -} - -// compute size in bytes of sparc frame, given -// number of extraWords -int MacroAssembler::total_frame_size_in_bytes(int extraWords) { - - int nWords = frame::memory_parameter_word_sp_offset; - - nWords += extraWords; - - if (nWords & 1) ++nWords; // round up to double-word - - return nWords * BytesPerWord; -} - - -// save_frame: given number of "extra" words in frame, -// issue approp. save instruction (p 200, v8 manual) - -void MacroAssembler::save_frame(int extraWords) { - int delta = -total_frame_size_in_bytes(extraWords); - if (is_simm13(delta)) { - save(SP, delta, SP); - } else { - set(delta, G3_scratch); - save(SP, G3_scratch, SP); - } -} - - -void MacroAssembler::save_frame_c1(int size_in_bytes) { - if (is_simm13(-size_in_bytes)) { - save(SP, -size_in_bytes, SP); - } else { - set(-size_in_bytes, G3_scratch); - save(SP, G3_scratch, SP); - } -} - - -void MacroAssembler::save_frame_and_mov(int extraWords, - Register s1, Register d1, - Register s2, Register d2) { - assert_not_delayed(); - - // The trick here is to use precisely the same memory word - // that trap handlers also use to save the register. - // This word cannot be used for any other purpose, but - // it works fine to save the register's value, whether or not - // an interrupt flushes register windows at any given moment! - Address s1_addr; - if (s1->is_valid() && (s1->is_in() || s1->is_local())) { - s1_addr = s1->address_in_saved_window(); - st_ptr(s1, s1_addr); - } - - Address s2_addr; - if (s2->is_valid() && (s2->is_in() || s2->is_local())) { - s2_addr = s2->address_in_saved_window(); - st_ptr(s2, s2_addr); - } - - save_frame(extraWords); - - if (s1_addr.base() == SP) { - ld_ptr(s1_addr.after_save(), d1); - } else if (s1->is_valid()) { - mov(s1->after_save(), d1); - } - - if (s2_addr.base() == SP) { - ld_ptr(s2_addr.after_save(), d2); - } else if (s2->is_valid()) { - mov(s2->after_save(), d2); - } -} - - -AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { - assert(oop_recorder() != NULL, "this assembler needs a Recorder"); - int index = oop_recorder()->allocate_metadata_index(obj); - RelocationHolder rspec = metadata_Relocation::spec(index); - return AddressLiteral((address)obj, rspec); -} - -AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { - assert(oop_recorder() != NULL, "this assembler needs a Recorder"); - int index = oop_recorder()->find_index(obj); - RelocationHolder rspec = metadata_Relocation::spec(index); - return AddressLiteral((address)obj, rspec); -} - - -AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); - assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop"); - int oop_index = oop_recorder()->find_index(obj); - return AddressLiteral(obj, oop_Relocation::spec(oop_index)); -} - -void MacroAssembler::set_narrow_oop(jobject obj, Register d) { - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - - assert_not_delayed(); - // Relocation with special format (see relocInfo_sparc.hpp). - relocate(rspec, 1); - // Assembler::sethi(0x3fffff, d); - emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) ); - // Don't add relocation for 'add'. Do patching during 'sethi' processing. - add(d, 0x3ff, d); - -} - -void MacroAssembler::set_narrow_klass(Klass* k, Register d) { - assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - narrowOop encoded_k = oopDesc::encode_klass(k); - - assert_not_delayed(); - // Relocation with special format (see relocInfo_sparc.hpp). - relocate(rspec, 1); - // Assembler::sethi(encoded_k, d); - emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) ); - // Don't add relocation for 'add'. Do patching during 'sethi' processing. - add(d, low10(encoded_k), d); - -} - -void MacroAssembler::align(int modulus) { - while (offset() % modulus != 0) nop(); -} - - -void MacroAssembler::safepoint() { - relocate(breakpoint_Relocation::spec(breakpoint_Relocation::safepoint)); -} - - -void RegistersForDebugging::print(outputStream* s) { - FlagSetting fs(Debugging, true); - int j; - for (j = 0; j < 8; ++j) { - if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); } - else { s->print( "fp = " ); os::print_location(s, i[j]); } - } - s->cr(); - - for (j = 0; j < 8; ++j) { - s->print("l%d = ", j); os::print_location(s, l[j]); - } - s->cr(); - - for (j = 0; j < 8; ++j) { - if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); } - else { s->print( "sp = " ); os::print_location(s, o[j]); } - } - s->cr(); - - for (j = 0; j < 8; ++j) { - s->print("g%d = ", j); os::print_location(s, g[j]); - } - s->cr(); - - // print out floats with compression - for (j = 0; j < 32; ) { - jfloat val = f[j]; - int last = j; - for ( ; last+1 < 32; ++last ) { - char b1[1024], b2[1024]; - sprintf(b1, "%f", val); - sprintf(b2, "%f", f[last+1]); - if (strcmp(b1, b2)) - break; - } - s->print("f%d", j); - if ( j != last ) s->print(" - f%d", last); - s->print(" = %f", val); - s->fill_to(25); - s->print_cr(" (0x%x)", val); - j = last + 1; - } - s->cr(); - - // and doubles (evens only) - for (j = 0; j < 32; ) { - jdouble val = d[j]; - int last = j; - for ( ; last+1 < 32; ++last ) { - char b1[1024], b2[1024]; - sprintf(b1, "%f", val); - sprintf(b2, "%f", d[last+1]); - if (strcmp(b1, b2)) - break; - } - s->print("d%d", 2 * j); - if ( j != last ) s->print(" - d%d", last); - s->print(" = %f", val); - s->fill_to(30); - s->print("(0x%x)", *(int*)&val); - s->fill_to(42); - s->print_cr("(0x%x)", *(1 + (int*)&val)); - j = last + 1; - } - s->cr(); -} - -void RegistersForDebugging::save_registers(MacroAssembler* a) { - a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0); - a->flush_windows(); - int i; - for (i = 0; i < 8; ++i) { - a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i)); - a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i)); - a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i)); - a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i)); - } - for (i = 0; i < 32; ++i) { - a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i)); - } - for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { - a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i)); - } -} - -void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) { - for (int i = 1; i < 8; ++i) { - a->ld_ptr(r, g_offset(i), as_gRegister(i)); - } - for (int j = 0; j < 32; ++j) { - a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j)); - } - for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) { - a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k)); - } -} - - -// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack -void MacroAssembler::push_fTOS() { - // %%%%%% need to implement this -} - -// pops double TOS element from CPU stack and pushes on FPU stack -void MacroAssembler::pop_fTOS() { - // %%%%%% need to implement this -} - -void MacroAssembler::empty_FPU_stack() { - // %%%%%% need to implement this -} - -void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) { - // plausibility check for oops - if (!VerifyOops) return; - - if (reg == G0) return; // always NULL, which is always an oop - - BLOCK_COMMENT("verify_oop {"); - char buffer[64]; -#ifdef COMPILER1 - if (CommentedAssembly) { - snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); - block_comment(buffer); - } -#endif - - int len = strlen(file) + strlen(msg) + 1 + 4; - sprintf(buffer, "%d", line); - len += strlen(buffer); - sprintf(buffer, " at offset %d ", offset()); - len += strlen(buffer); - char * real_msg = new char[len]; - sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); - - // Call indirectly to solve generation ordering problem - AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); - - // Make some space on stack above the current register window. - // Enough to hold 8 64-bit registers. - add(SP,-8*8,SP); - - // Save some 64-bit registers; a normal 'save' chops the heads off - // of 64-bit longs in the 32-bit build. - stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); - stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); - mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed - stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); - - // Size of set() should stay the same - patchable_set((intptr_t)real_msg, O1); - // Load address to call to into O7 - load_ptr_contents(a, O7); - // Register call to verify_oop_subroutine - callr(O7, G0); - delayed()->nop(); - // recover frame size - add(SP, 8*8,SP); - BLOCK_COMMENT("} verify_oop"); -} - -void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) { - // plausibility check for oops - if (!VerifyOops) return; - - char buffer[64]; - sprintf(buffer, "%d", line); - int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); - sprintf(buffer, " at SP+%d ", addr.disp()); - len += strlen(buffer); - char * real_msg = new char[len]; - sprintf(real_msg, "%s at SP+%d (%s:%d)", msg, addr.disp(), file, line); - - // Call indirectly to solve generation ordering problem - AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); - - // Make some space on stack above the current register window. - // Enough to hold 8 64-bit registers. - add(SP,-8*8,SP); - - // Save some 64-bit registers; a normal 'save' chops the heads off - // of 64-bit longs in the 32-bit build. - stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); - stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); - ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed - stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); - - // Size of set() should stay the same - patchable_set((intptr_t)real_msg, O1); - // Load address to call to into O7 - load_ptr_contents(a, O7); - // Register call to verify_oop_subroutine - callr(O7, G0); - delayed()->nop(); - // recover frame size - add(SP, 8*8,SP); -} - -// side-door communication with signalHandler in os_solaris.cpp -address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL }; - -// This macro is expanded just once; it creates shared code. Contract: -// receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY -// registers, including flags. May not use a register 'save', as this blows -// the high bits of the O-regs if they contain Long values. Acts as a 'leaf' -// call. -void MacroAssembler::verify_oop_subroutine() { - assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" ); - - // Leaf call; no frame. - Label succeed, fail, null_or_fail; - - // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home). - // O0 is now the oop to be checked. O7 is the return address. - Register O0_obj = O0; - - // Save some more registers for temps. - stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8); - stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8); - stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8); - stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8); - - // Save flags - Register O5_save_flags = O5; - rdccr( O5_save_flags ); - - { // count number of verifies - Register O2_adr = O2; - Register O3_accum = O3; - inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum); - } - - Register O2_mask = O2; - Register O3_bits = O3; - Register O4_temp = O4; - - // mark lower end of faulting range - assert(_verify_oop_implicit_branch[0] == NULL, "set once"); - _verify_oop_implicit_branch[0] = pc(); - - // We can't check the mark oop because it could be in the process of - // locking or unlocking while this is running. - set(Universe::verify_oop_mask (), O2_mask); - set(Universe::verify_oop_bits (), O3_bits); - - // assert((obj & oop_mask) == oop_bits); - and3(O0_obj, O2_mask, O4_temp); - cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); - - if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { - // the null_or_fail case is useless; must test for null separately - br_null_short(O0_obj, pn, succeed); - } - - // Check the Klass* of this object for being in the right area of memory. - // Cannot do the load in the delay above slot in case O0 is null - load_klass(O0_obj, O0_obj); - // assert((klass != NULL) - br_null_short(O0_obj, pn, fail); - // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers - - wrccr( O5_save_flags ); // Restore CCR's - - // mark upper end of faulting range - _verify_oop_implicit_branch[1] = pc(); - - //----------------------- - // all tests pass - bind(succeed); - - // Restore prior 64-bit registers - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0); - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1); - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2); - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3); - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4); - ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5); - - retl(); // Leaf return; restore prior O7 in delay slot - delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7); - - //----------------------- - bind(null_or_fail); // nulls are less common but OK - br_null(O0_obj, false, pt, succeed); - delayed()->wrccr( O5_save_flags ); // Restore CCR's - - //----------------------- - // report failure: - bind(fail); - _verify_oop_implicit_branch[2] = pc(); - - wrccr( O5_save_flags ); // Restore CCR's - - save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); - - // stop_subroutine expects message pointer in I1. - mov(I1, O1); - - // Restore prior 64-bit registers - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0); - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1); - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2); - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3); - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4); - ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5); - - // factor long stop-sequence into subroutine to save space - assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); - - // call indirectly to solve generation ordering problem - AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address()); - load_ptr_contents(al, O5); - jmpl(O5, 0, O7); - delayed()->nop(); -} - - -void MacroAssembler::stop(const char* msg) { - // save frame first to get O7 for return address - // add one word to size in case struct is odd number of words long - // It must be doubleword-aligned for storing doubles into it. - - save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); - - // stop_subroutine expects message pointer in I1. - // Size of set() should stay the same - patchable_set((intptr_t)msg, O1); - - // factor long stop-sequence into subroutine to save space - assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); - - // call indirectly to solve generation ordering problem - AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address()); - load_ptr_contents(a, O5); - jmpl(O5, 0, O7); - delayed()->nop(); - - breakpoint_trap(); // make stop actually stop rather than writing - // unnoticeable results in the output files. - - // restore(); done in callee to save space! -} - - -void MacroAssembler::warn(const char* msg) { - save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); - RegistersForDebugging::save_registers(this); - mov(O0, L0); - // Size of set() should stay the same - patchable_set((intptr_t)msg, O0); - call( CAST_FROM_FN_PTR(address, warning) ); - delayed()->nop(); -// ret(); -// delayed()->restore(); - RegistersForDebugging::restore_registers(this, L0); - restore(); -} - - -void MacroAssembler::untested(const char* what) { - // We must be able to turn interactive prompting off - // in order to run automated test scripts on the VM - // Use the flag ShowMessageBoxOnError - - char* b = new char[1024]; - sprintf(b, "untested: %s", what); - - if (ShowMessageBoxOnError) { STOP(b); } - else { warn(b); } -} - - -void MacroAssembler::stop_subroutine() { - RegistersForDebugging::save_registers(this); - - // for the sake of the debugger, stick a PC on the current frame - // (this assumes that the caller has performed an extra "save") - mov(I7, L7); - add(O7, -7 * BytesPerInt, I7); - - save_frame(); // one more save to free up another O7 register - mov(I0, O1); // addr of reg save area - - // We expect pointer to message in I1. Caller must set it up in O1 - mov(I1, O0); // get msg - call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); - delayed()->nop(); - - restore(); - - RegistersForDebugging::restore_registers(this, O0); - - save_frame(0); - call(CAST_FROM_FN_PTR(address,breakpoint)); - delayed()->nop(); - restore(); - - mov(L7, I7); - retl(); - delayed()->restore(); // see stop above -} - - -void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) { - if ( ShowMessageBoxOnError ) { - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); - { - // In order to get locks work, we need to fake a in_VM state - ttyLocker ttyl; - ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); - if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { - BytecodeCounter::print(); - } - if (os::message_box(msg, "Execution stopped, print registers?")) - regs->print(::tty); - } - BREAKPOINT; - ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); - } - else { - ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); - } - assert(false, err_msg("DEBUG MESSAGE: %s", msg)); -} - -#ifndef PRODUCT -void MacroAssembler::test() { - ResourceMark rm; - - CodeBuffer cb("test", 10000, 10000); - MacroAssembler* a = new MacroAssembler(&cb); - VM_Version::allow_all(); - a->test_v9(); - a->test_v8_onlys(); - VM_Version::revert(); - - StubRoutines::Sparc::test_stop_entry()(); -} -#endif - - -void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) { - subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words? - Label no_extras; - br( negative, true, pt, no_extras ); // if neg, clear reg - delayed()->set(0, Rresult); // annuled, so only if taken - bind( no_extras ); -} - - -void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) { -#ifdef _LP64 - add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult); -#else - add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult); -#endif - bclr(1, Rresult); - sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes -} - - -void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) { - calc_frame_size(Rextra_words, Rresult); - neg(Rresult); - save(SP, Rresult, SP); -} - - -// --------------------------------------------------------- -Assembler::RCondition cond2rcond(Assembler::Condition c) { - switch (c) { - /*case zero: */ - case Assembler::equal: return Assembler::rc_z; - case Assembler::lessEqual: return Assembler::rc_lez; - case Assembler::less: return Assembler::rc_lz; - /*case notZero:*/ - case Assembler::notEqual: return Assembler::rc_nz; - case Assembler::greater: return Assembler::rc_gz; - case Assembler::greaterEqual: return Assembler::rc_gez; - } - ShouldNotReachHere(); - return Assembler::rc_z; -} - -// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS -void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { - tst(s1); - br (c, a, p, L); -} - -// Compares a pointer register with zero and branches on null. -// Does a test & branch on 32-bit systems and a register-branch on 64-bit. -void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { - assert_not_delayed(); -#ifdef _LP64 - bpr( rc_z, a, p, s1, L ); -#else - tst(s1); - br ( zero, a, p, L ); -#endif -} - -void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) { - assert_not_delayed(); -#ifdef _LP64 - bpr( rc_nz, a, p, s1, L ); -#else - tst(s1); - br ( notZero, a, p, L ); -#endif -} - -// Compare registers and branch with nop in delay slot or cbcond without delay slot. - -// Compare integer (32 bit) values (icc only). -void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, - Predict p, Label& L) { - assert_not_delayed(); - if (use_cbcond(L)) { - Assembler::cbcond(c, icc, s1, s2, L); - } else { - cmp(s1, s2); - br(c, false, p, L); - delayed()->nop(); - } -} - -// Compare integer (32 bit) values (icc only). -void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, - Predict p, Label& L) { - assert_not_delayed(); - if (is_simm(simm13a,5) && use_cbcond(L)) { - Assembler::cbcond(c, icc, s1, simm13a, L); - } else { - cmp(s1, simm13a); - br(c, false, p, L); - delayed()->nop(); - } -} - -// Branch that tests xcc in LP64 and icc in !LP64 -void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, - Predict p, Label& L) { - assert_not_delayed(); - if (use_cbcond(L)) { - Assembler::cbcond(c, ptr_cc, s1, s2, L); - } else { - cmp(s1, s2); - brx(c, false, p, L); - delayed()->nop(); - } -} - -// Branch that tests xcc in LP64 and icc in !LP64 -void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, - Predict p, Label& L) { - assert_not_delayed(); - if (is_simm(simm13a,5) && use_cbcond(L)) { - Assembler::cbcond(c, ptr_cc, s1, simm13a, L); - } else { - cmp(s1, simm13a); - brx(c, false, p, L); - delayed()->nop(); - } -} - -// Short branch version for compares a pointer with zero. - -void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { - assert_not_delayed(); - if (use_cbcond(L)) { - Assembler::cbcond(zero, ptr_cc, s1, 0, L); - return; - } - br_null(s1, false, p, L); - delayed()->nop(); -} - -void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { - assert_not_delayed(); - if (use_cbcond(L)) { - Assembler::cbcond(notZero, ptr_cc, s1, 0, L); - return; - } - br_notnull(s1, false, p, L); - delayed()->nop(); -} - -// Unconditional short branch -void MacroAssembler::ba_short(Label& L) { - if (use_cbcond(L)) { - Assembler::cbcond(equal, icc, G0, G0, L); - return; - } - br(always, false, pt, L); - delayed()->nop(); -} - -// instruction sequences factored across compiler & interpreter - - -void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, - Register Rb_hi, Register Rb_low, - Register Rresult) { - - Label check_low_parts, done; - - cmp(Ra_hi, Rb_hi ); // compare hi parts - br(equal, true, pt, check_low_parts); - delayed()->cmp(Ra_low, Rb_low); // test low parts - - // And, with an unsigned comparison, it does not matter if the numbers - // are negative or not. - // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff. - // The second one is bigger (unsignedly). - - // Other notes: The first move in each triplet can be unconditional - // (and therefore probably prefetchable). - // And the equals case for the high part does not need testing, - // since that triplet is reached only after finding the high halves differ. - - if (VM_Version::v9_instructions_work()) { - mov(-1, Rresult); - ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); - } else { - br(less, true, pt, done); delayed()-> set(-1, Rresult); - br(greater, true, pt, done); delayed()-> set( 1, Rresult); - } - - bind( check_low_parts ); - - if (VM_Version::v9_instructions_work()) { - mov( -1, Rresult); - movcc(equal, false, icc, 0, Rresult); - movcc(greaterUnsigned, false, icc, 1, Rresult); - } else { - set(-1, Rresult); - br(equal, true, pt, done); delayed()->set( 0, Rresult); - br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); - } - bind( done ); -} - -void MacroAssembler::lneg( Register Rhi, Register Rlow ) { - subcc( G0, Rlow, Rlow ); - subc( G0, Rhi, Rhi ); -} - -void MacroAssembler::lshl( Register Rin_high, Register Rin_low, - Register Rcount, - Register Rout_high, Register Rout_low, - Register Rtemp ) { - - - Register Ralt_count = Rtemp; - Register Rxfer_bits = Rtemp; - - assert( Ralt_count != Rin_high - && Ralt_count != Rin_low - && Ralt_count != Rcount - && Rxfer_bits != Rin_low - && Rxfer_bits != Rin_high - && Rxfer_bits != Rcount - && Rxfer_bits != Rout_low - && Rout_low != Rin_high, - "register alias checks"); - - Label big_shift, done; - - // This code can be optimized to use the 64 bit shifts in V9. - // Here we use the 32 bit shifts. - - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); - br(greater, true, pn, big_shift); - delayed()->dec(Ralt_count); - - // shift < 32 bits, Ralt_count = Rcount-31 - - // We get the transfer bits by shifting right by 32-count the low - // register. This is done by shifting right by 31-count and then by one - // more to take care of the special (rare) case where count is zero - // (shifting by 32 would not work). - - neg(Ralt_count); - - // The order of the next two instructions is critical in the case where - // Rin and Rout are the same and should not be reversed. - - srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count - if (Rcount != Rout_low) { - sll(Rin_low, Rcount, Rout_low); // low half - } - sll(Rin_high, Rcount, Rout_high); - if (Rcount == Rout_low) { - sll(Rin_low, Rcount, Rout_low); // low half - } - srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more - ba(done); - delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low - - // shift >= 32 bits, Ralt_count = Rcount-32 - bind(big_shift); - sll(Rin_low, Ralt_count, Rout_high ); - clr(Rout_low); - - bind(done); -} - - -void MacroAssembler::lshr( Register Rin_high, Register Rin_low, - Register Rcount, - Register Rout_high, Register Rout_low, - Register Rtemp ) { - - Register Ralt_count = Rtemp; - Register Rxfer_bits = Rtemp; - - assert( Ralt_count != Rin_high - && Ralt_count != Rin_low - && Ralt_count != Rcount - && Rxfer_bits != Rin_low - && Rxfer_bits != Rin_high - && Rxfer_bits != Rcount - && Rxfer_bits != Rout_high - && Rout_high != Rin_low, - "register alias checks"); - - Label big_shift, done; - - // This code can be optimized to use the 64 bit shifts in V9. - // Here we use the 32 bit shifts. - - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); - br(greater, true, pn, big_shift); - delayed()->dec(Ralt_count); - - // shift < 32 bits, Ralt_count = Rcount-31 - - // We get the transfer bits by shifting left by 32-count the high - // register. This is done by shifting left by 31-count and then by one - // more to take care of the special (rare) case where count is zero - // (shifting by 32 would not work). - - neg(Ralt_count); - if (Rcount != Rout_low) { - srl(Rin_low, Rcount, Rout_low); - } - - // The order of the next two instructions is critical in the case where - // Rin and Rout are the same and should not be reversed. - - sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count - sra(Rin_high, Rcount, Rout_high ); // high half - sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more - if (Rcount == Rout_low) { - srl(Rin_low, Rcount, Rout_low); - } - ba(done); - delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high - - // shift >= 32 bits, Ralt_count = Rcount-32 - bind(big_shift); - - sra(Rin_high, Ralt_count, Rout_low); - sra(Rin_high, 31, Rout_high); // sign into hi - - bind( done ); -} - - - -void MacroAssembler::lushr( Register Rin_high, Register Rin_low, - Register Rcount, - Register Rout_high, Register Rout_low, - Register Rtemp ) { - - Register Ralt_count = Rtemp; - Register Rxfer_bits = Rtemp; - - assert( Ralt_count != Rin_high - && Ralt_count != Rin_low - && Ralt_count != Rcount - && Rxfer_bits != Rin_low - && Rxfer_bits != Rin_high - && Rxfer_bits != Rcount - && Rxfer_bits != Rout_high - && Rout_high != Rin_low, - "register alias checks"); - - Label big_shift, done; - - // This code can be optimized to use the 64 bit shifts in V9. - // Here we use the 32 bit shifts. - - and3( Rcount, 0x3f, Rcount); // take least significant 6 bits - subcc(Rcount, 31, Ralt_count); - br(greater, true, pn, big_shift); - delayed()->dec(Ralt_count); - - // shift < 32 bits, Ralt_count = Rcount-31 - - // We get the transfer bits by shifting left by 32-count the high - // register. This is done by shifting left by 31-count and then by one - // more to take care of the special (rare) case where count is zero - // (shifting by 32 would not work). - - neg(Ralt_count); - if (Rcount != Rout_low) { - srl(Rin_low, Rcount, Rout_low); - } - - // The order of the next two instructions is critical in the case where - // Rin and Rout are the same and should not be reversed. - - sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count - srl(Rin_high, Rcount, Rout_high ); // high half - sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more - if (Rcount == Rout_low) { - srl(Rin_low, Rcount, Rout_low); - } - ba(done); - delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high - - // shift >= 32 bits, Ralt_count = Rcount-32 - bind(big_shift); - - srl(Rin_high, Ralt_count, Rout_low); - clr(Rout_high); - - bind( done ); -} - -#ifdef _LP64 -void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { - cmp(Ra, Rb); - mov(-1, Rresult); - movcc(equal, false, xcc, 0, Rresult); - movcc(greater, false, xcc, 1, Rresult); -} -#endif - - -void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) { - switch (size_in_bytes) { - case 8: ld_long(src, dst); break; - case 4: ld( src, dst); break; - case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break; - case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break; - default: ShouldNotReachHere(); - } -} - -void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { - switch (size_in_bytes) { - case 8: st_long(src, dst); break; - case 4: st( src, dst); break; - case 2: sth( src, dst); break; - case 1: stb( src, dst); break; - default: ShouldNotReachHere(); - } -} - - -void MacroAssembler::float_cmp( bool is_float, int unordered_result, - FloatRegister Fa, FloatRegister Fb, - Register Rresult) { - - fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb); - - Condition lt = unordered_result == -1 ? f_unorderedOrLess : f_less; - Condition eq = f_equal; - Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; - - if (VM_Version::v9_instructions_work()) { - - mov(-1, Rresult); - movcc(eq, true, fcc0, 0, Rresult); - movcc(gt, true, fcc0, 1, Rresult); - - } else { - Label done; - - set( -1, Rresult ); - //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); - fb( eq, true, pn, done); delayed()->set( 0, Rresult ); - fb( gt, true, pn, done); delayed()->set( 1, Rresult ); - - bind (done); - } -} - - -void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) -{ - if (VM_Version::v9_instructions_work()) { - Assembler::fneg(w, s, d); - } else { - if (w == FloatRegisterImpl::S) { - Assembler::fneg(w, s, d); - } else if (w == FloatRegisterImpl::D) { - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); - - Assembler::fneg(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - } else { - assert(w == FloatRegisterImpl::Q, "Invalid float register width"); - - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); - - Assembler::fneg(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); - } - } -} - -void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) -{ - if (VM_Version::v9_instructions_work()) { - Assembler::fmov(w, s, d); - } else { - if (w == FloatRegisterImpl::S) { - Assembler::fmov(w, s, d); - } else if (w == FloatRegisterImpl::D) { - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); - - Assembler::fmov(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - } else { - assert(w == FloatRegisterImpl::Q, "Invalid float register width"); - - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); - - Assembler::fmov(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); - } - } -} - -void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) -{ - if (VM_Version::v9_instructions_work()) { - Assembler::fabs(w, s, d); - } else { - if (w == FloatRegisterImpl::S) { - Assembler::fabs(w, s, d); - } else if (w == FloatRegisterImpl::D) { - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); - - Assembler::fabs(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - } else { - assert(w == FloatRegisterImpl::Q, "Invalid float register width"); - - // number() does a sanity check on the alignment. - assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && - ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); - - Assembler::fabs(FloatRegisterImpl::S, s, d); - Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); - Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); - } - } -} - -void MacroAssembler::save_all_globals_into_locals() { - mov(G1,L1); - mov(G2,L2); - mov(G3,L3); - mov(G4,L4); - mov(G5,L5); - mov(G6,L6); - mov(G7,L7); -} - -void MacroAssembler::restore_globals_from_locals() { - mov(L1,G1); - mov(L2,G2); - mov(L3,G3); - mov(L4,G4); - mov(L5,G5); - mov(L6,G6); - mov(L7,G7); -} - -// Use for 64 bit operation. -void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) -{ - // store ptr_reg as the new top value -#ifdef _LP64 - casx(top_ptr_reg, top_reg, ptr_reg); -#else - cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm); -#endif // _LP64 -} - -// [RGV] This routine does not handle 64 bit operations. -// use casx_under_lock() or casx directly!!! -void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) -{ - // store ptr_reg as the new top value - if (VM_Version::v9_instructions_work()) { - cas(top_ptr_reg, top_reg, ptr_reg); - } else { - - // If the register is not an out nor global, it is not visible - // after the save. Allocate a register for it, save its - // value in the register save area (the save may not flush - // registers to the save area). - - Register top_ptr_reg_after_save; - Register top_reg_after_save; - Register ptr_reg_after_save; - - if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) { - top_ptr_reg_after_save = top_ptr_reg->after_save(); - } else { - Address reg_save_addr = top_ptr_reg->address_in_saved_window(); - top_ptr_reg_after_save = L0; - st(top_ptr_reg, reg_save_addr); - } - - if (top_reg->is_out() || top_reg->is_global()) { - top_reg_after_save = top_reg->after_save(); - } else { - Address reg_save_addr = top_reg->address_in_saved_window(); - top_reg_after_save = L1; - st(top_reg, reg_save_addr); - } - - if (ptr_reg->is_out() || ptr_reg->is_global()) { - ptr_reg_after_save = ptr_reg->after_save(); - } else { - Address reg_save_addr = ptr_reg->address_in_saved_window(); - ptr_reg_after_save = L2; - st(ptr_reg, reg_save_addr); - } - - const Register& lock_reg = L3; - const Register& lock_ptr_reg = L4; - const Register& value_reg = L5; - const Register& yield_reg = L6; - const Register& yieldall_reg = L7; - - save_frame(); - - if (top_ptr_reg_after_save == L0) { - ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save); - } - - if (top_reg_after_save == L1) { - ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save); - } - - if (ptr_reg_after_save == L2) { - ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save); - } - - Label(retry_get_lock); - Label(not_same); - Label(dont_yield); - - assert(lock_addr, "lock_address should be non null for v8"); - set((intptr_t)lock_addr, lock_ptr_reg); - // Initialize yield counter - mov(G0,yield_reg); - mov(G0, yieldall_reg); - set(StubRoutines::Sparc::locked, lock_reg); - - bind(retry_get_lock); - cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); - - if(use_call_vm) { - Untested("Need to verify global reg consistancy"); - call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); - } else { - // Save the regs and make space for a C call - save(SP, -96, SP); - save_all_globals_into_locals(); - call(CAST_FROM_FN_PTR(address,os::yield_all)); - delayed()->mov(yieldall_reg, O0); - restore_globals_from_locals(); - restore(); - } - - // reset the counter - mov(G0,yield_reg); - add(yieldall_reg, 1, yieldall_reg); - - bind(dont_yield); - // try to get lock - swap(lock_ptr_reg, 0, lock_reg); - - // did we get the lock? - cmp(lock_reg, StubRoutines::Sparc::unlocked); - br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); - delayed()->add(yield_reg,1,yield_reg); - - // yes, got lock. do we have the same top? - ld(top_ptr_reg_after_save, 0, value_reg); - cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); - - // yes, same top. - st(ptr_reg_after_save, top_ptr_reg_after_save, 0); - membar(Assembler::StoreStore); - - bind(not_same); - mov(value_reg, ptr_reg_after_save); - st(lock_reg, lock_ptr_reg, 0); // unlock - - restore(); - } -} - -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) - return RegisterOrConstant(value + offset); - - // load indirectly to solve generation ordering problem - AddressLiteral a(delayed_value_addr); - load_ptr_contents(a, tmp); - -#ifdef ASSERT - tst(tmp); - breakpoint_trap(zero, xcc); -#endif - - if (offset != 0) - add(tmp, offset, tmp); - - return RegisterOrConstant(tmp); -} - - -RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { - assert(d.register_or_noreg() != G0, "lost side effect"); - if ((s2.is_constant() && s2.as_constant() == 0) || - (s2.is_register() && s2.as_register() == G0)) { - // Do nothing, just move value. - if (s1.is_register()) { - if (d.is_constant()) d = temp; - mov(s1.as_register(), d.as_register()); - return d; - } else { - return s1; - } - } - - if (s1.is_register()) { - assert_different_registers(s1.as_register(), temp); - if (d.is_constant()) d = temp; - andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); - return d; - } else { - if (s2.is_register()) { - assert_different_registers(s2.as_register(), temp); - if (d.is_constant()) d = temp; - set(s1.as_constant(), temp); - andn(temp, s2.as_register(), d.as_register()); - return d; - } else { - intptr_t res = s1.as_constant() & ~s2.as_constant(); - return res; - } - } -} - -RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { - assert(d.register_or_noreg() != G0, "lost side effect"); - if ((s2.is_constant() && s2.as_constant() == 0) || - (s2.is_register() && s2.as_register() == G0)) { - // Do nothing, just move value. - if (s1.is_register()) { - if (d.is_constant()) d = temp; - mov(s1.as_register(), d.as_register()); - return d; - } else { - return s1; - } - } - - if (s1.is_register()) { - assert_different_registers(s1.as_register(), temp); - if (d.is_constant()) d = temp; - add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); - return d; - } else { - if (s2.is_register()) { - assert_different_registers(s2.as_register(), temp); - if (d.is_constant()) d = temp; - add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register()); - return d; - } else { - intptr_t res = s1.as_constant() + s2.as_constant(); - return res; - } - } -} - -RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { - assert(d.register_or_noreg() != G0, "lost side effect"); - if (!is_simm13(s2.constant_or_zero())) - s2 = (s2.as_constant() & 0xFF); - if ((s2.is_constant() && s2.as_constant() == 0) || - (s2.is_register() && s2.as_register() == G0)) { - // Do nothing, just move value. - if (s1.is_register()) { - if (d.is_constant()) d = temp; - mov(s1.as_register(), d.as_register()); - return d; - } else { - return s1; - } - } - - if (s1.is_register()) { - assert_different_registers(s1.as_register(), temp); - if (d.is_constant()) d = temp; - sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); - return d; - } else { - if (s2.is_register()) { - assert_different_registers(s2.as_register(), temp); - if (d.is_constant()) d = temp; - set(s1.as_constant(), temp); - sll_ptr(temp, s2.as_register(), d.as_register()); - return d; - } else { - intptr_t res = s1.as_constant() << s2.as_constant(); - return res; - } - } -} - - -// Look up the method for a megamorphic invokeinterface call. -// The target method is determined by . -// The receiver klass is in recv_klass. -// On success, the result will be in method_result, and execution falls through. -// On failure, execution transfers to the given label. -void MacroAssembler::lookup_interface_method(Register recv_klass, - Register intf_klass, - RegisterOrConstant itable_index, - Register method_result, - Register scan_temp, - Register sethi_temp, - Label& L_no_such_interface) { - assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); - assert(itable_index.is_constant() || itable_index.as_register() == method_result, - "caller must use same register for non-constant itable index as for method"); - - Label L_no_such_interface_restore; - bool did_save = false; - if (scan_temp == noreg || sethi_temp == noreg) { - Register recv_2 = recv_klass->is_global() ? recv_klass : L0; - Register intf_2 = intf_klass->is_global() ? intf_klass : L1; - assert(method_result->is_global(), "must be able to return value"); - scan_temp = L2; - sethi_temp = L3; - save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2); - recv_klass = recv_2; - intf_klass = intf_2; - did_save = true; - } - - // Compute start of first itableOffsetEntry (which is at the end of the vtable) - int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; - int scan_step = itableOffsetEntry::size() * wordSize; - int vte_size = vtableEntry::size() * wordSize; - - lduw(recv_klass, InstanceKlass::vtable_length_offset() * wordSize, scan_temp); - // %%% We should store the aligned, prescaled offset in the klassoop. - // Then the next several instructions would fold away. - - int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0); - int itb_offset = vtable_base; - if (round_to_unit != 0) { - // hoist first instruction of round_to(scan_temp, BytesPerLong): - itb_offset += round_to_unit - wordSize; - } - int itb_scale = exact_log2(vtableEntry::size() * wordSize); - sll(scan_temp, itb_scale, scan_temp); - add(scan_temp, itb_offset, scan_temp); - if (round_to_unit != 0) { - // Round up to align_object_offset boundary - // see code for InstanceKlass::start_of_itable! - // Was: round_to(scan_temp, BytesPerLong); - // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp); - and3(scan_temp, -round_to_unit, scan_temp); - } - add(recv_klass, scan_temp, scan_temp); - - // Adjust recv_klass by scaled itable_index, so we can free itable_index. - RegisterOrConstant itable_offset = itable_index; - itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset); - itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset); - add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass); - - // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { - // if (scan->interface() == intf) { - // result = (klass + scan->offset() + itable_index); - // } - // } - Label L_search, L_found_method; - - for (int peel = 1; peel >= 0; peel--) { - // %%%% Could load both offset and interface in one ldx, if they were - // in the opposite order. This would save a load. - ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result); - - // Check that this entry is non-null. A null entry means that - // the receiver class doesn't implement the interface, and wasn't the - // same as when the caller was compiled. - bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface); - delayed()->cmp(method_result, intf_klass); - - if (peel) { - brx(Assembler::equal, false, Assembler::pt, L_found_method); - } else { - brx(Assembler::notEqual, false, Assembler::pn, L_search); - // (invert the test to fall through to found_method...) - } - delayed()->add(scan_temp, scan_step, scan_temp); - - if (!peel) break; - - bind(L_search); - } - - bind(L_found_method); - - // Got a hit. - int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); - // scan_temp[-scan_step] points to the vtable offset we need - ito_offset -= scan_step; - lduw(scan_temp, ito_offset, scan_temp); - ld_ptr(recv_klass, scan_temp, method_result); - - if (did_save) { - Label L_done; - ba(L_done); - delayed()->restore(); - - bind(L_no_such_interface_restore); - ba(L_no_such_interface); - delayed()->restore(); - - bind(L_done); - } -} - - -// virtual method calling -void MacroAssembler::lookup_virtual_method(Register recv_klass, - RegisterOrConstant vtable_index, - Register method_result) { - assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); - Register sethi_temp = method_result; - const int base = (InstanceKlass::vtable_start_offset() * wordSize + - // method pointer offset within the vtable entry: - vtableEntry::method_offset_in_bytes()); - RegisterOrConstant vtable_offset = vtable_index; - // Each of the following three lines potentially generates an instruction. - // But the total number of address formation instructions will always be - // at most two, and will often be zero. In any case, it will be optimal. - // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x). - // If vtable_index is a constant, we will have at most (set B+X<is_global()) sub_2 = L0; - if (!sup_2->is_global()) sup_2 = L1; - bool did_save = false; - if (temp_reg == noreg || temp2_reg == noreg) { - temp_reg = L2; - temp2_reg = L3; - save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); - sub_klass = sub_2; - super_klass = sup_2; - did_save = true; - } - Label L_failure, L_pop_to_failure, L_pop_to_success; - check_klass_subtype_fast_path(sub_klass, super_klass, - temp_reg, temp2_reg, - (did_save ? &L_pop_to_success : &L_success), - (did_save ? &L_pop_to_failure : &L_failure), NULL); - - if (!did_save) - save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); - check_klass_subtype_slow_path(sub_2, sup_2, - L2, L3, L4, L5, - NULL, &L_pop_to_failure); - - // on success: - bind(L_pop_to_success); - restore(); - ba_short(L_success); - - // on failure: - bind(L_pop_to_failure); - restore(); - bind(L_failure); -} - - -void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - Label* L_slow_path, - RegisterOrConstant super_check_offset) { - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - int sco_offset = in_bytes(Klass::super_check_offset_offset()); - - bool must_load_sco = (super_check_offset.constant_or_zero() == -1); - bool need_slow_path = (must_load_sco || - super_check_offset.constant_or_zero() == sco_offset); - - assert_different_registers(sub_klass, super_klass, temp_reg); - if (super_check_offset.is_register()) { - assert_different_registers(sub_klass, super_klass, temp_reg, - super_check_offset.as_register()); - } else if (must_load_sco) { - assert(temp2_reg != noreg, "supply either a temp or a register offset"); - } - - Label L_fallthrough; - int label_nulls = 0; - if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } - if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } - if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1 || - (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), - "at most one NULL in the batch, usually"); - - // If the pointers are equal, we are done (e.g., String[] elements). - // This self-check enables sharing of secondary supertype arrays among - // non-primary types such as array-of-interface. Otherwise, each such - // type would need its own customized SSA. - // We move this check to the front of the fast path because many - // type checks are in fact trivially successful in this manner, - // so we get a nicely predicted branch right at the start of the check. - cmp(super_klass, sub_klass); - brx(Assembler::equal, false, Assembler::pn, *L_success); - delayed()->nop(); - - // Check the supertype display: - if (must_load_sco) { - // The super check offset is always positive... - lduw(super_klass, sco_offset, temp2_reg); - super_check_offset = RegisterOrConstant(temp2_reg); - // super_check_offset is register. - assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register()); - } - ld_ptr(sub_klass, super_check_offset, temp_reg); - cmp(super_klass, temp_reg); - - // This check has worked decisively for primary supers. - // Secondary supers are sought in the super_cache ('super_cache_addr'). - // (Secondary supers are interfaces and very deeply nested subtypes.) - // This works in the same check above because of a tricky aliasing - // between the super_cache and the primary super display elements. - // (The 'super_check_addr' can address either, as the case requires.) - // Note that the cache is updated below if it does not help us find - // what we need immediately. - // So if it was a primary super, we can just fail immediately. - // Otherwise, it's the slow path for us (no success at this point). - - // Hacked ba(), which may only be used just before L_fallthrough. -#define FINAL_JUMP(label) \ - if (&(label) != &L_fallthrough) { \ - ba(label); delayed()->nop(); \ - } - - if (super_check_offset.is_register()) { - brx(Assembler::equal, false, Assembler::pn, *L_success); - delayed()->cmp(super_check_offset.as_register(), sc_offset); - - if (L_failure == &L_fallthrough) { - brx(Assembler::equal, false, Assembler::pt, *L_slow_path); - delayed()->nop(); - } else { - brx(Assembler::notEqual, false, Assembler::pn, *L_failure); - delayed()->nop(); - FINAL_JUMP(*L_slow_path); - } - } else if (super_check_offset.as_constant() == sc_offset) { - // Need a slow path; fast failure is impossible. - if (L_slow_path == &L_fallthrough) { - brx(Assembler::equal, false, Assembler::pt, *L_success); - delayed()->nop(); - } else { - brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); - delayed()->nop(); - FINAL_JUMP(*L_success); - } - } else { - // No slow path; it's a fast decision. - if (L_failure == &L_fallthrough) { - brx(Assembler::equal, false, Assembler::pt, *L_success); - delayed()->nop(); - } else { - brx(Assembler::notEqual, false, Assembler::pn, *L_failure); - delayed()->nop(); - FINAL_JUMP(*L_success); - } - } - - bind(L_fallthrough); - -#undef FINAL_JUMP -} - - -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register count_temp, - Register scan_temp, - Register scratch_reg, - Register coop_reg, - Label* L_success, - Label* L_failure) { - assert_different_registers(sub_klass, super_klass, - count_temp, scan_temp, scratch_reg, coop_reg); - - Label L_fallthrough, L_loop; - int label_nulls = 0; - if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } - if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1, "at most one NULL in the batch"); - - // a couple of useful fields in sub_klass: - int ss_offset = in_bytes(Klass::secondary_supers_offset()); - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - - // Do a linear scan of the secondary super-klass chain. - // This code is rarely used, so simplicity is a virtue here. - -#ifndef PRODUCT - int* pst_counter = &SharedRuntime::_partial_subtype_ctr; - inc_counter((address) pst_counter, count_temp, scan_temp); -#endif - - // We will consult the secondary-super array. - ld_ptr(sub_klass, ss_offset, scan_temp); - - Register search_key = super_klass; - - // Load the array length. (Positive movl does right thing on LP64.) - lduw(scan_temp, Array::length_offset_in_bytes(), count_temp); - - // Check for empty secondary super list - tst(count_temp); - - // In the array of super classes elements are pointer sized. - int element_size = wordSize; - - // Top of search loop - bind(L_loop); - br(Assembler::equal, false, Assembler::pn, *L_failure); - delayed()->add(scan_temp, element_size, scan_temp); - - // Skip the array header in all array accesses. - int elem_offset = Array::base_offset_in_bytes(); - elem_offset -= element_size; // the scan pointer was pre-incremented also - - // Load next super to check - ld_ptr( scan_temp, elem_offset, scratch_reg ); - - // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list - cmp(scratch_reg, search_key); - - // A miss means we are NOT a subtype and need to keep looping - brx(Assembler::notEqual, false, Assembler::pn, L_loop); - delayed()->deccc(count_temp); // decrement trip counter in delay slot - - // Success. Cache the super we found and proceed in triumph. - st_ptr(super_klass, sub_klass, sc_offset); - - if (L_success != &L_fallthrough) { - ba(*L_success); - delayed()->nop(); - } - - bind(L_fallthrough); -} - - -RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, - Register temp_reg, - int extra_slot_offset) { - // cf. TemplateTable::prepare_invoke(), if (load_receiver). - int stackElementSize = Interpreter::stackElementSize; - int offset = extra_slot_offset * stackElementSize; - if (arg_slot.is_constant()) { - offset += arg_slot.as_constant() * stackElementSize; - return offset; - } else { - assert(temp_reg != noreg, "must specify"); - sll_ptr(arg_slot.as_register(), exact_log2(stackElementSize), temp_reg); - if (offset != 0) - add(temp_reg, offset, temp_reg); - return temp_reg; - } -} - - -Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, - Register temp_reg, - int extra_slot_offset) { - return Address(Gargs, argument_offset(arg_slot, temp_reg, extra_slot_offset)); -} - - -void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, - Register temp_reg, - Label& done, Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - - if (PrintBiasedLockingStatistics) { - assert_different_registers(obj_reg, mark_reg, temp_reg, O7); - if (counters == NULL) - counters = BiasedLocking::counters(); - } - - Label cas_label; - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); - cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); - - load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); - or3(G2_thread, temp_reg, temp_reg); - xor3(mark_reg, temp_reg, temp_reg); - andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); - if (counters != NULL) { - cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg); - // Reload mark_reg as we may need it later - ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg); - } - brx(Assembler::equal, true, Assembler::pt, done); - delayed()->nop(); - - Label try_revoke_bias; - Label try_rebias; - Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes()); - assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - btst(markOopDesc::biased_lock_mask_in_place, temp_reg); - brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg); - brx(Assembler::notZero, false, Assembler::pn, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - delayed()->and3(mark_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, - mark_reg); - or3(G2_thread, mark_reg, temp_reg); - casn(mark_addr.base(), mark_reg, temp_reg); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - cmp(mark_reg, temp_reg); - if (counters != NULL) { - cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg); - } - if (slow_case != NULL) { - brx(Assembler::notEqual, true, Assembler::pn, *slow_case); - delayed()->nop(); - } - ba_short(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); - or3(G2_thread, temp_reg, temp_reg); - casn(mark_addr.base(), mark_reg, temp_reg); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - cmp(mark_reg, temp_reg); - if (counters != NULL) { - cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg); - } - if (slow_case != NULL) { - brx(Assembler::notEqual, true, Assembler::pn, *slow_case); - delayed()->nop(); - } - ba_short(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_klass(obj_reg, temp_reg); - ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); - casn(mark_addr.base(), mark_reg, temp_reg); - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cmp(mark_reg, temp_reg); - cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg); - } - - bind(cas_label); -} - -void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done, - bool allow_delay_slot_filling) { - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - ld_ptr(mark_addr, temp_reg); - and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); - cmp(temp_reg, markOopDesc::biased_lock_pattern); - brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done); - delayed(); - if (!allow_delay_slot_filling) { - nop(); - } -} - - -// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by -// Solaris/SPARC's "as". Another apt name would be cas_ptr() - -void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { - casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); -} - - - -// compiler_lock_object() and compiler_unlock_object() are direct transliterations -// of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments. -// The code could be tightened up considerably. -// -// box->dhw disposition - post-conditions at DONE_LABEL. -// - Successful inflated lock: box->dhw != 0. -// Any non-zero value suffices. -// Consider G2_thread, rsp, boxReg, or unused_mark() -// - Successful Stack-lock: box->dhw == mark. -// box->dhw must contain the displaced mark word value -// - Failure -- icc.ZFlag == 0 and box->dhw is undefined. -// The slow-path fast_enter() and slow_enter() operators -// are responsible for setting box->dhw = NonZero (typically ::unused_mark). -// - Biased: box->dhw is undefined -// -// SPARC refworkload performance - specifically jetstream and scimark - are -// extremely sensitive to the size of the code emitted by compiler_lock_object -// and compiler_unlock_object. Critically, the key factor is code size, not path -// length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the -// effect). - - -void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, - Register Rbox, Register Rscratch, - BiasedLockingCounters* counters, - bool try_bias) { - Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); - - verify_oop(Roop); - Label done ; - - if (counters != NULL) { - inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); - } - - if (EmitSync & 1) { - mov(3, Rscratch); - st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - cmp(SP, G0); - return ; - } - - if (EmitSync & 2) { - - // Fetch object's markword - ld_ptr(mark_addr, Rmark); - - if (try_bias) { - biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); - } - - // Save Rbox in Rscratch to be used for the cas operation - mov(Rbox, Rscratch); - - // set Rmark to markOop | markOopDesc::unlocked_value - or3(Rmark, markOopDesc::unlocked_value, Rmark); - - // Initialize the box. (Must happen before we update the object mark!) - st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); - - // compare object markOop with Rmark and if equal exchange Rscratch with object markOop - assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - casx_under_lock(mark_addr.base(), Rmark, Rscratch, - (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - - // if compare/exchange succeeded we found an unlocked object and we now have locked it - // hence we are done - cmp(Rmark, Rscratch); -#ifdef _LP64 - sub(Rscratch, STACK_BIAS, Rscratch); -#endif - brx(Assembler::equal, false, Assembler::pt, done); - delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot - - // we did not find an unlocked object so see if this is a recursive case - // sub(Rscratch, SP, Rscratch); - assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - andcc(Rscratch, 0xfffff003, Rscratch); - st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - bind (done); - return ; - } - - Label Egress ; - - if (EmitSync & 256) { - Label IsInflated ; - - ld_ptr(mark_addr, Rmark); // fetch obj->mark - // Triage: biased, stack-locked, neutral, inflated - if (try_bias) { - biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); - // Invariant: if control reaches this point in the emitted stream - // then Rmark has not been modified. - } - - // Store mark into displaced mark field in the on-stack basic-lock "box" - // Critically, this must happen before the CAS - // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. - st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); - andcc(Rmark, 2, G0); - brx(Assembler::notZero, false, Assembler::pn, IsInflated); - delayed()-> - - // Try stack-lock acquisition. - // Beware: the 1st instruction is in a delay slot - mov(Rbox, Rscratch); - or3(Rmark, markOopDesc::unlocked_value, Rmark); - assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - casn(mark_addr.base(), Rmark, Rscratch); - cmp(Rmark, Rscratch); - brx(Assembler::equal, false, Assembler::pt, done); - delayed()->sub(Rscratch, SP, Rscratch); - - // Stack-lock attempt failed - check for recursive stack-lock. - // See the comments below about how we might remove this case. -#ifdef _LP64 - sub(Rscratch, STACK_BIAS, Rscratch); -#endif - assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - andcc(Rscratch, 0xfffff003, Rscratch); - br(Assembler::always, false, Assembler::pt, done); - delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - - bind(IsInflated); - if (EmitSync & 64) { - // If m->owner != null goto IsLocked - // Pessimistic form: Test-and-CAS vs CAS - // The optimistic form avoids RTS->RTO cache line upgrades. - ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - // m->owner == null : it's unlocked. - } - - // Try to CAS m->owner from null to Self - // Invariant: if we acquire the lock then _recursions should be 0. - add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); - mov(G2_thread, Rscratch); - casn(Rmark, G0, Rscratch); - cmp(Rscratch, G0); - // Intentional fall-through into done - } else { - // Aggressively avoid the Store-before-CAS penalty - // Defer the store into box->dhw until after the CAS - Label IsInflated, Recursive ; - -// Anticipate CAS -- Avoid RTS->RTO upgrade -// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); - - ld_ptr(mark_addr, Rmark); // fetch obj->mark - // Triage: biased, stack-locked, neutral, inflated - - if (try_bias) { - biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); - // Invariant: if control reaches this point in the emitted stream - // then Rmark has not been modified. - } - andcc(Rmark, 2, G0); - brx(Assembler::notZero, false, Assembler::pn, IsInflated); - delayed()-> // Beware - dangling delay-slot - - // Try stack-lock acquisition. - // Transiently install BUSY (0) encoding in the mark word. - // if the CAS of 0 into the mark was successful then we execute: - // ST box->dhw = mark -- save fetched mark in on-stack basiclock box - // ST obj->mark = box -- overwrite transient 0 value - // This presumes TSO, of course. - - mov(0, Rscratch); - or3(Rmark, markOopDesc::unlocked_value, Rmark); - assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - casn(mark_addr.base(), Rmark, Rscratch); -// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); - cmp(Rscratch, Rmark); - brx(Assembler::notZero, false, Assembler::pn, Recursive); - delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); - if (counters != NULL) { - cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); - } - ba(done); - delayed()->st_ptr(Rbox, mark_addr); - - bind(Recursive); - // Stack-lock attempt failed - check for recursive stack-lock. - // Tests show that we can remove the recursive case with no impact - // on refworkload 0.83. If we need to reduce the size of the code - // emitted by compiler_lock_object() the recursive case is perfect - // candidate. - // - // A more extreme idea is to always inflate on stack-lock recursion. - // This lets us eliminate the recursive checks in compiler_lock_object - // and compiler_unlock_object and the (box->dhw == 0) encoding. - // A brief experiment - requiring changes to synchronizer.cpp, interpreter, - // and showed a performance *increase*. In the same experiment I eliminated - // the fast-path stack-lock code from the interpreter and always passed - // control to the "slow" operators in synchronizer.cpp. - - // RScratch contains the fetched obj->mark value from the failed CASN. -#ifdef _LP64 - sub(Rscratch, STACK_BIAS, Rscratch); -#endif - sub(Rscratch, SP, Rscratch); - assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); - andcc(Rscratch, 0xfffff003, Rscratch); - if (counters != NULL) { - // Accounting needs the Rscratch register - st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); - ba_short(done); - } else { - ba(done); - delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); - } - - bind (IsInflated); - if (EmitSync & 64) { - // If m->owner != null goto IsLocked - // Test-and-CAS vs CAS - // Pessimistic form avoids futile (doomed) CAS attempts - // The optimistic form avoids RTS->RTO cache line upgrades. - ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - // m->owner == null : it's unlocked. - } - - // Try to CAS m->owner from null to Self - // Invariant: if we acquire the lock then _recursions should be 0. - add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); - mov(G2_thread, Rscratch); - casn(Rmark, G0, Rscratch); - cmp(Rscratch, G0); - // ST box->displaced_header = NonZero. - // Any non-zero value suffices: - // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. - st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); - // Intentional fall-through into done - } - - bind (done); -} - -void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, - Register Rbox, Register Rscratch, - bool try_bias) { - Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); - - Label done ; - - if (EmitSync & 4) { - cmp(SP, G0); - return ; - } - - if (EmitSync & 8) { - if (try_bias) { - biased_locking_exit(mark_addr, Rscratch, done); - } - - // Test first if it is a fast recursive unlock - ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); - br_null_short(Rmark, Assembler::pt, done); - - // Check if it is still a light weight lock, this is is true if we see - // the stack address of the basicLock in the markOop of the object - assert(mark_addr.disp() == 0, "cas must take a zero displacement"); - casx_under_lock(mark_addr.base(), Rbox, Rmark, - (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - ba(done); - delayed()->cmp(Rbox, Rmark); - bind(done); - return ; - } - - // Beware ... If the aggregate size of the code emitted by CLO and CUO is - // is too large performance rolls abruptly off a cliff. - // This could be related to inlining policies, code cache management, or - // I$ effects. - Label LStacked ; - - if (try_bias) { - // TODO: eliminate redundant LDs of obj->mark - biased_locking_exit(mark_addr, Rscratch, done); - } - - ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); - ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::zero, false, Assembler::pn, done); - delayed()->nop(); // consider: relocate fetch of mark, above, into this DS - andcc(Rmark, 2, G0); - brx(Assembler::zero, false, Assembler::pt, LStacked); - delayed()->nop(); - - // It's inflated - // Conceptually we need a #loadstore|#storestore "release" MEMBAR before - // the ST of 0 into _owner which releases the lock. This prevents loads - // and stores within the critical section from reordering (floating) - // past the store that releases the lock. But TSO is a strong memory model - // and that particular flavor of barrier is a noop, so we can safely elide it. - // Note that we use 1-0 locking by default for the inflated case. We - // close the resultant (and rare) race by having contented threads in - // monitorenter periodically poll _owner. - ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); - ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); - xor3(Rscratch, G2_thread, Rscratch); - orcc(Rbox, Rscratch, Rbox); - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()-> - ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); - ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); - orcc(Rbox, Rscratch, G0); - if (EmitSync & 65536) { - Label LSucc ; - brx(Assembler::notZero, false, Assembler::pn, LSucc); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); - - bind(LSucc); - st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); - if (os::is_MP()) { membar (StoreLoad); } - ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); - andcc(Rscratch, Rscratch, G0); - brx(Assembler::notZero, false, Assembler::pt, done); - delayed()->andcc(G0, G0, G0); - add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); - mov(G2_thread, Rscratch); - casn(Rmark, G0, Rscratch); - // invert icc.zf and goto done - br_notnull(Rscratch, false, Assembler::pt, done); - delayed()->cmp(G0, G0); - ba(done); - delayed()->cmp(G0, 1); - } else { - brx(Assembler::notZero, false, Assembler::pn, done); - delayed()->nop(); - ba(done); - delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); - } - - bind (LStacked); - // Consider: we could replace the expensive CAS in the exit - // path with a simple ST of the displaced mark value fetched from - // the on-stack basiclock box. That admits a race where a thread T2 - // in the slow lock path -- inflating with monitor M -- could race a - // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. - // More precisely T1 in the stack-lock unlock path could "stomp" the - // inflated mark value M installed by T2, resulting in an orphan - // object monitor M and T2 becoming stranded. We can remedy that situation - // by having T2 periodically poll the object's mark word using timed wait - // operations. If T2 discovers that a stomp has occurred it vacates - // the monitor M and wakes any other threads stranded on the now-orphan M. - // In addition the monitor scavenger, which performs deflation, - // would also need to check for orpan monitors and stranded threads. - // - // Finally, inflation is also used when T2 needs to assign a hashCode - // to O and O is stack-locked by T1. The "stomp" race could cause - // an assigned hashCode value to be lost. We can avoid that condition - // and provide the necessary hashCode stability invariants by ensuring - // that hashCode generation is idempotent between copying GCs. - // For example we could compute the hashCode of an object O as - // O's heap address XOR some high quality RNG value that is refreshed - // at GC-time. The monitor scavenger would install the hashCode - // found in any orphan monitors. Again, the mechanism admits a - // lost-update "stomp" WAW race but detects and recovers as needed. - // - // A prototype implementation showed excellent results, although - // the scavenger and timeout code was rather involved. - - casn(mark_addr.base(), Rbox, Rscratch); - cmp(Rbox, Rscratch); - // Intentional fall through into done ... - - bind(done); -} - - - -void MacroAssembler::print_CPU_state() { - // %%%%% need to implement this -} - -void MacroAssembler::verify_FPU(int stack_depth, const char* s) { - // %%%%% need to implement this -} - -void MacroAssembler::push_IU_state() { - // %%%%% need to implement this -} - - -void MacroAssembler::pop_IU_state() { - // %%%%% need to implement this -} - - -void MacroAssembler::push_FPU_state() { - // %%%%% need to implement this -} - - -void MacroAssembler::pop_FPU_state() { - // %%%%% need to implement this -} - - -void MacroAssembler::push_CPU_state() { - // %%%%% need to implement this -} - - -void MacroAssembler::pop_CPU_state() { - // %%%%% need to implement this -} - - - -void MacroAssembler::verify_tlab() { -#ifdef ASSERT - if (UseTLAB && VerifyOops) { - Label next, next2, ok; - Register t1 = L0; - Register t2 = L1; - Register t3 = L2; - - save_frame(0); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); - or3(t1, t2, t3); - cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); - STOP("assert(top >= start)"); - should_not_reach_here(); - - bind(next); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); - or3(t3, t2, t3); - cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); - STOP("assert(top <= end)"); - should_not_reach_here(); - - bind(next2); - and3(t3, MinObjAlignmentInBytesMask, t3); - cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); - STOP("assert(aligned)"); - should_not_reach_here(); - - bind(ok); - restore(); - } -#endif -} - - -void MacroAssembler::eden_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Register t2, // temp register - Label& slow_case // continuation point if fast allocation fails -){ - // make sure arguments make sense - assert_different_registers(obj, var_size_in_bytes, t1, t2); - assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); - assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); - - if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { - // No allocation in the shared eden. - ba_short(slow_case); - } else { - // get eden boundaries - // note: we need both top & top_addr! - const Register top_addr = t1; - const Register end = t2; - - CollectedHeap* ch = Universe::heap(); - set((intx)ch->top_addr(), top_addr); - intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); - ld_ptr(top_addr, delta, end); - ld_ptr(top_addr, 0, obj); - - // try to allocate - Label retry; - bind(retry); -#ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - btst(MinObjAlignmentInBytesMask, obj); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - STOP("eden top is not properly aligned"); - bind(L); - } -#endif // ASSERT - const Register free = end; - sub(end, obj, free); // compute amount of free space - if (var_size_in_bytes->is_valid()) { - // size is unknown at compile time - cmp(free, var_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, var_size_in_bytes, end); - } else { - // size is known at compile time - cmp(free, con_size_in_bytes); - br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case - delayed()->add(obj, con_size_in_bytes, end); - } - // Compare obj with the value at top_addr; if still equal, swap the value of - // end with the value at top_addr. If not equal, read the value at top_addr - // into end. - casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); - // if someone beat us on the allocation, try again, otherwise continue - cmp(obj, end); - brx(Assembler::notEqual, false, Assembler::pn, retry); - delayed()->mov(end, obj); // nop if successfull since obj == end - -#ifdef ASSERT - // make sure eden top is properly aligned - { - Label L; - const Register top_addr = t1; - - set((intx)ch->top_addr(), top_addr); - ld_ptr(top_addr, 0, top_addr); - btst(MinObjAlignmentInBytesMask, top_addr); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - STOP("eden top is not properly aligned"); - bind(L); - } -#endif // ASSERT - } -} - - -void MacroAssembler::tlab_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Label& slow_case // continuation point if fast allocation fails -){ - // make sure arguments make sense - assert_different_registers(obj, var_size_in_bytes, t1); - assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size"); - assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); - - const Register free = t1; - - verify_tlab(); - - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj); - - // calculate amount of free space - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free); - sub(free, obj, free); - - Label done; - if (var_size_in_bytes == noreg) { - cmp(free, con_size_in_bytes); - } else { - cmp(free, var_size_in_bytes); - } - br(Assembler::less, false, Assembler::pn, slow_case); - // calculate the new top pointer - if (var_size_in_bytes == noreg) { - delayed()->add(obj, con_size_in_bytes, free); - } else { - delayed()->add(obj, var_size_in_bytes, free); - } - - bind(done); - -#ifdef ASSERT - // make sure new free pointer is properly aligned - { - Label L; - btst(MinObjAlignmentInBytesMask, free); - br(Assembler::zero, false, Assembler::pt, L); - delayed()->nop(); - STOP("updated TLAB free is not properly aligned"); - bind(L); - } -#endif // ASSERT - - // update the tlab top pointer - st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset())); - verify_tlab(); -} - - -void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { - Register top = O0; - Register t1 = G1; - Register t2 = G3; - Register t3 = O1; - assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); - Label do_refill, discard_tlab; - - if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { - // No allocation in the shared eden. - ba_short(slow_case); - } - - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); - - // calculate amount of free space - sub(t1, top, t1); - srl_ptr(t1, LogHeapWordSize, t1); - - // Retain tlab and allocate object in shared space if - // the amount free in the tlab is too large to discard. - cmp(t1, t2); - brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab); - - // increment waste limit to prevent getting stuck on this slow path - delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2); - st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); - if (TLABStats) { - // increment number of slow_allocations - ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); - add(t2, 1, t2); - stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); - } - ba_short(try_eden); - - bind(discard_tlab); - if (TLABStats) { - // increment number of refills - ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); - add(t2, 1, t2); - stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset())); - // accumulate wastage - ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2); - add(t2, t1, t2); - stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); - } - - // if tlab is currently allocated (top or end != null) then - // fill [top, end + alignment_reserve) with array object - br_null_short(top, Assembler::pn, do_refill); - - set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); - st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word - // set klass to intArrayKlass - sub(t1, typeArrayOopDesc::header_size(T_INT), t1); - add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1); - sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1); - st(t1, top, arrayOopDesc::length_offset_in_bytes()); - set((intptr_t)Universe::intArrayKlassObj_addr(), t2); - ld_ptr(t2, 0, t2); - // store klass last. concurrent gcs assumes klass length is valid if - // klass field is not null. - store_klass(t2, top); - verify_oop(top); - - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1); - sub(top, t1, t1); // size of tlab's allocated portion - incr_allocated_bytes(t1, t2, t3); - - // refill the tlab with an eden allocation - bind(do_refill); - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1); - sll_ptr(t1, LogHeapWordSize, t1); - // allocate new tlab, address returned in top - eden_allocate(top, t1, 0, t2, t3, slow_case); - - st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset())); - st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset())); -#ifdef ASSERT - // check that tlab_size (t1) is still valid - { - Label ok; - ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); - sll_ptr(t2, LogHeapWordSize, t2); - cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok); - STOP("assert(t1 == tlab_size)"); - should_not_reach_here(); - - bind(ok); - } -#endif // ASSERT - add(top, t1, top); // t1 is tlab_size - sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); - st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); - verify_tlab(); - ba_short(retry); -} - -void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, - Register t1, Register t2) { - // Bump total bytes allocated by this thread - assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch - assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2); - // v8 support has gone the way of the dodo - ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1); - add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1); - stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset())); -} - -Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { - switch (cond) { - // Note some conditions are synonyms for others - case Assembler::never: return Assembler::always; - case Assembler::zero: return Assembler::notZero; - case Assembler::lessEqual: return Assembler::greater; - case Assembler::less: return Assembler::greaterEqual; - case Assembler::lessEqualUnsigned: return Assembler::greaterUnsigned; - case Assembler::lessUnsigned: return Assembler::greaterEqualUnsigned; - case Assembler::negative: return Assembler::positive; - case Assembler::overflowSet: return Assembler::overflowClear; - case Assembler::always: return Assembler::never; - case Assembler::notZero: return Assembler::zero; - case Assembler::greater: return Assembler::lessEqual; - case Assembler::greaterEqual: return Assembler::less; - case Assembler::greaterUnsigned: return Assembler::lessEqualUnsigned; - case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned; - case Assembler::positive: return Assembler::negative; - case Assembler::overflowClear: return Assembler::overflowSet; - } - - ShouldNotReachHere(); return Assembler::overflowClear; -} - -void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr, - Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) { - Condition negated_cond = negate_condition(cond); - Label L; - brx(negated_cond, false, Assembler::pt, L); - delayed()->nop(); - inc_counter(counter_ptr, Rtmp1, Rtmp2); - bind(L); -} - -void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) { - AddressLiteral addrlit(counter_addr); - sethi(addrlit, Rtmp1); // Move hi22 bits into temporary register. - Address addr(Rtmp1, addrlit.low10()); // Build an address with low10 bits. - ld(addr, Rtmp2); - inc(Rtmp2); - st(Rtmp2, addr); -} - -void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) { - inc_counter((address) counter_addr, Rtmp1, Rtmp2); -} - -SkipIfEqual::SkipIfEqual( - MacroAssembler* masm, Register temp, const bool* flag_addr, - Assembler::Condition condition) { - _masm = masm; - AddressLiteral flag(flag_addr); - _masm->sethi(flag, temp); - _masm->ldub(temp, flag.low10(), temp); - _masm->tst(temp); - _masm->br(condition, false, Assembler::pt, _label); - _masm->delayed()->nop(); -} - -SkipIfEqual::~SkipIfEqual() { - _masm->bind(_label); -} - - -// Writes to stack successive pages until offset reached to check for -// stack overflow + shadow pages. This clobbers tsp and scratch. -void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp, - Register Rscratch) { - // Use stack pointer in temp stack pointer - mov(SP, Rtsp); - - // Bang stack for total size given plus stack shadow page size. - // Bang one page at a time because a large size can overflow yellow and - // red zones (the bang will fail but stack overflow handling can't tell that - // it was a stack overflow bang vs a regular segv). - int offset = os::vm_page_size(); - Register Roffset = Rscratch; - - Label loop; - bind(loop); - set((-offset)+STACK_BIAS, Rscratch); - st(G0, Rtsp, Rscratch); - set(offset, Roffset); - sub(Rsize, Roffset, Rsize); - cmp(Rsize, G0); - br(Assembler::greater, false, Assembler::pn, loop); - delayed()->sub(Rtsp, Roffset, Rtsp); - - // Bang down shadow pages too. - // The -1 because we already subtracted 1 page. - for (int i = 0; i< StackShadowPages-1; i++) { - set((-i*offset)+STACK_BIAS, Rscratch); - st(G0, Rtsp, Rscratch); - } -} - -/////////////////////////////////////////////////////////////////////////////////// -#ifndef SERIALGC - -static address satb_log_enqueue_with_frame = NULL; -static u_char* satb_log_enqueue_with_frame_end = NULL; - -static address satb_log_enqueue_frameless = NULL; -static u_char* satb_log_enqueue_frameless_end = NULL; - -static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? - -static void generate_satb_log_enqueue(bool with_frame) { - BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); - CodeBuffer buf(bb); - MacroAssembler masm(&buf); - -#define __ masm. - - address start = __ pc(); - Register pre_val; - - Label refill, restart; - if (with_frame) { - __ save_frame(0); - pre_val = I0; // Was O0 before the save. - } else { - pre_val = O0; - } - - int satb_q_index_byte_offset = - in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_index()); - - int satb_q_buf_byte_offset = - in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_buf()); - - assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && - in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), - "check sizes in assembly below"); - - __ bind(restart); - - // Load the index into the SATB buffer. PtrQueue::_index is a size_t - // so ld_ptr is appropriate. - __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); - - // index == 0? - __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); - - __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); - __ sub(L0, oopSize, L0); - - __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 - if (!with_frame) { - // Use return-from-leaf - __ retl(); - __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); - } else { - // Not delayed. - __ st_ptr(L0, G2_thread, satb_q_index_byte_offset); - } - if (with_frame) { - __ ret(); - __ delayed()->restore(); - } - __ bind(refill); - - address handle_zero = - CAST_FROM_FN_PTR(address, - &SATBMarkQueueSet::handle_zero_index_for_thread); - // This should be rare enough that we can afford to save all the - // scratch registers that the calling context might be using. - __ mov(G1_scratch, L0); - __ mov(G3_scratch, L1); - __ mov(G4, L2); - // We need the value of O0 above (for the write into the buffer), so we - // save and restore it. - __ mov(O0, L3); - // Since the call will overwrite O7, we save and restore that, as well. - __ mov(O7, L4); - __ call_VM_leaf(L5, handle_zero, G2_thread); - __ mov(L0, G1_scratch); - __ mov(L1, G3_scratch); - __ mov(L2, G4); - __ mov(L3, O0); - __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); - __ delayed()->mov(L4, O7); - - if (with_frame) { - satb_log_enqueue_with_frame = start; - satb_log_enqueue_with_frame_end = __ pc(); - } else { - satb_log_enqueue_frameless = start; - satb_log_enqueue_frameless_end = __ pc(); - } - -#undef __ -} - -static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { - if (with_frame) { - if (satb_log_enqueue_with_frame == 0) { - generate_satb_log_enqueue(with_frame); - assert(satb_log_enqueue_with_frame != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated with-frame satb enqueue:"); - Disassembler::decode((u_char*)satb_log_enqueue_with_frame, - satb_log_enqueue_with_frame_end, - tty); - } - } - } else { - if (satb_log_enqueue_frameless == 0) { - generate_satb_log_enqueue(with_frame); - assert(satb_log_enqueue_frameless != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated frameless satb enqueue:"); - Disassembler::decode((u_char*)satb_log_enqueue_frameless, - satb_log_enqueue_frameless_end, - tty); - } - } - } -} - -void MacroAssembler::g1_write_barrier_pre(Register obj, - Register index, - int offset, - Register pre_val, - Register tmp, - bool preserve_o_regs) { - Label filtered; - - if (obj == noreg) { - // We are not loading the previous value so make - // sure that we don't trash the value in pre_val - // with the code below. - assert_different_registers(pre_val, tmp); - } else { - // We will be loading the previous value - // in this code so... - assert(offset == 0 || index == noreg, "choose one"); - assert(pre_val == noreg, "check this code"); - } - - // Is marking active? - if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { - ld(G2, - in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active()), - tmp); - } else { - guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, - "Assumption"); - ldsb(G2, - in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active()), - tmp); - } - - // Is marking active? - cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); - - // Do we need to load the previous value? - if (obj != noreg) { - // Load the previous value... - if (index == noreg) { - if (Assembler::is_simm13(offset)) { - load_heap_oop(obj, offset, tmp); - } else { - set(offset, tmp); - load_heap_oop(obj, tmp, tmp); - } - } else { - load_heap_oop(obj, index, tmp); - } - // Previous value has been loaded into tmp - pre_val = tmp; - } - - assert(pre_val != noreg, "must have a real register"); - - // Is the previous value null? - cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); - - // OK, it's not filtered, so we'll need to call enqueue. In the normal - // case, pre_val will be a scratch G-reg, but there are some cases in - // which it's an O-reg. In the first case, do a normal call. In the - // latter, do a save here and call the frameless version. - - guarantee(pre_val->is_global() || pre_val->is_out(), - "Or we need to think harder."); - - if (pre_val->is_global() && !preserve_o_regs) { - generate_satb_log_enqueue_if_necessary(true); // with frame - - call(satb_log_enqueue_with_frame); - delayed()->mov(pre_val, O0); - } else { - generate_satb_log_enqueue_if_necessary(false); // frameless - - save_frame(0); - call(satb_log_enqueue_frameless); - delayed()->mov(pre_val->after_save(), O0); - restore(); - } - - bind(filtered); -} - -static address dirty_card_log_enqueue = 0; -static u_char* dirty_card_log_enqueue_end = 0; - -// This gets to assume that o0 contains the object address. -static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { - BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); - CodeBuffer buf(bb); - MacroAssembler masm(&buf); -#define __ masm. - address start = __ pc(); - - Label not_already_dirty, restart, refill; - -#ifdef _LP64 - __ srlx(O0, CardTableModRefBS::card_shift, O0); -#else - __ srl(O0, CardTableModRefBS::card_shift, O0); -#endif - AddressLiteral addrlit(byte_map_base); - __ set(addrlit, O1); // O1 := - __ ldub(O0, O1, O2); // O2 := [O0 + O1] - - assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); - __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); - - // We didn't take the branch, so we're already dirty: return. - // Use return-from-leaf - __ retl(); - __ delayed()->nop(); - - // Not dirty. - __ bind(not_already_dirty); - - // Get O0 + O1 into a reg by itself - __ add(O0, O1, O3); - - // First, dirty it. - __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). - - int dirty_card_q_index_byte_offset = - in_bytes(JavaThread::dirty_card_queue_offset() + - PtrQueue::byte_offset_of_index()); - int dirty_card_q_buf_byte_offset = - in_bytes(JavaThread::dirty_card_queue_offset() + - PtrQueue::byte_offset_of_buf()); - __ bind(restart); - - // Load the index into the update buffer. PtrQueue::_index is - // a size_t so ld_ptr is appropriate here. - __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); - - // index == 0? - __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); - - __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); - __ sub(L0, oopSize, L0); - - __ st_ptr(O3, L1, L0); // [_buf + index] := I0 - // Use return-from-leaf - __ retl(); - __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); - - __ bind(refill); - address handle_zero = - CAST_FROM_FN_PTR(address, - &DirtyCardQueueSet::handle_zero_index_for_thread); - // This should be rare enough that we can afford to save all the - // scratch registers that the calling context might be using. - __ mov(G1_scratch, L3); - __ mov(G3_scratch, L5); - // We need the value of O3 above (for the write into the buffer), so we - // save and restore it. - __ mov(O3, L6); - // Since the call will overwrite O7, we save and restore that, as well. - __ mov(O7, L4); - - __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); - __ mov(L3, G1_scratch); - __ mov(L5, G3_scratch); - __ mov(L6, O3); - __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); - __ delayed()->mov(L4, O7); - - dirty_card_log_enqueue = start; - dirty_card_log_enqueue_end = __ pc(); - // XXX Should have a guarantee here about not going off the end! - // Does it already do so? Do an experiment... - -#undef __ - -} - -static inline void -generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { - if (dirty_card_log_enqueue == 0) { - generate_dirty_card_log_enqueue(byte_map_base); - assert(dirty_card_log_enqueue != 0, "postcondition."); - if (G1SATBPrintStubs) { - tty->print_cr("Generated dirty_card enqueue:"); - Disassembler::decode((u_char*)dirty_card_log_enqueue, - dirty_card_log_enqueue_end, - tty); - } - } -} - - -void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { - - Label filtered; - MacroAssembler* post_filter_masm = this; - - if (new_val == G0) return; - - G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::G1SATBCT || - bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); - - if (G1RSBarrierRegionFilter) { - xor3(store_addr, new_val, tmp); -#ifdef _LP64 - srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); -#else - srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); -#endif - - // XXX Should I predict this taken or not? Does it matter? - cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); - } - - // If the "store_addr" register is an "in" or "local" register, move it to - // a scratch reg so we can pass it as an argument. - bool use_scr = !(store_addr->is_global() || store_addr->is_out()); - // Pick a scratch register different from "tmp". - Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); - // Make sure we use up the delay slot! - if (use_scr) { - post_filter_masm->mov(store_addr, scr); - } else { - post_filter_masm->nop(); - } - generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); - save_frame(0); - call(dirty_card_log_enqueue); - if (use_scr) { - delayed()->mov(scr, O0); - } else { - delayed()->mov(store_addr->after_save(), O0); - } - restore(); - - bind(filtered); -} - -#endif // SERIALGC -/////////////////////////////////////////////////////////////////////////////////// - -void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { - // If we're writing constant NULL, we can skip the write barrier. - if (new_val == G0) return; - CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef || - bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); - card_table_write(bs->byte_map_base, tmp, store_addr); -} - -void MacroAssembler::load_klass(Register src_oop, Register klass) { - // The number of bytes in this code is used by - // MachCallDynamicJavaNode::ret_addr_offset() - // if this changes, change that. - if (UseCompressedKlassPointers) { - lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass); - decode_klass_not_null(klass); - } else { - ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass); - } -} - -void MacroAssembler::store_klass(Register klass, Register dst_oop) { - if (UseCompressedKlassPointers) { - assert(dst_oop != klass, "not enough registers"); - encode_klass_not_null(klass); - st(klass, dst_oop, oopDesc::klass_offset_in_bytes()); - } else { - st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes()); - } -} - -void MacroAssembler::store_klass_gap(Register s, Register d) { - if (UseCompressedKlassPointers) { - assert(s != d, "not enough registers"); - st(s, d, oopDesc::klass_gap_offset_in_bytes()); - } -} - -void MacroAssembler::load_heap_oop(const Address& s, Register d) { - if (UseCompressedOops) { - lduw(s, d); - decode_heap_oop(d); - } else { - ld_ptr(s, d); - } -} - -void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) { - if (UseCompressedOops) { - lduw(s1, s2, d); - decode_heap_oop(d, d); - } else { - ld_ptr(s1, s2, d); - } -} - -void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) { - if (UseCompressedOops) { - lduw(s1, simm13a, d); - decode_heap_oop(d, d); - } else { - ld_ptr(s1, simm13a, d); - } -} - -void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) { - if (s2.is_constant()) load_heap_oop(s1, s2.as_constant(), d); - else load_heap_oop(s1, s2.as_register(), d); -} - -void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) { - if (UseCompressedOops) { - assert(s1 != d && s2 != d, "not enough registers"); - encode_heap_oop(d); - st(d, s1, s2); - } else { - st_ptr(d, s1, s2); - } -} - -void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) { - if (UseCompressedOops) { - assert(s1 != d, "not enough registers"); - encode_heap_oop(d); - st(d, s1, simm13a); - } else { - st_ptr(d, s1, simm13a); - } -} - -void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) { - if (UseCompressedOops) { - assert(a.base() != d, "not enough registers"); - encode_heap_oop(d); - st(d, a, offset); - } else { - st_ptr(d, a, offset); - } -} - - -void MacroAssembler::encode_heap_oop(Register src, Register dst) { - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - verify_oop(src); - if (Universe::narrow_oop_base() == NULL) { - srlx(src, LogMinObjAlignmentInBytes, dst); - return; - } - Label done; - if (src == dst) { - // optimize for frequent case src == dst - bpr(rc_nz, true, Assembler::pt, src, done); - delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken - bind(done); - srlx(src, LogMinObjAlignmentInBytes, dst); - } else { - bpr(rc_z, false, Assembler::pn, src, done); - delayed() -> mov(G0, dst); - // could be moved before branch, and annulate delay, - // but may add some unneeded work decoding null - sub(src, G6_heapbase, dst); - srlx(dst, LogMinObjAlignmentInBytes, dst); - bind(done); - } -} - - -void MacroAssembler::encode_heap_oop_not_null(Register r) { - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - verify_oop(r); - if (Universe::narrow_oop_base() != NULL) - sub(r, G6_heapbase, r); - srlx(r, LogMinObjAlignmentInBytes, r); -} - -void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) { - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - verify_oop(src); - if (Universe::narrow_oop_base() == NULL) { - srlx(src, LogMinObjAlignmentInBytes, dst); - } else { - sub(src, G6_heapbase, dst); - srlx(dst, LogMinObjAlignmentInBytes, dst); - } -} - -// Same algorithm as oops.inline.hpp decode_heap_oop. -void MacroAssembler::decode_heap_oop(Register src, Register dst) { - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - sllx(src, LogMinObjAlignmentInBytes, dst); - if (Universe::narrow_oop_base() != NULL) { - Label done; - bpr(rc_nz, true, Assembler::pt, dst, done); - delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken - bind(done); - } - verify_oop(dst); -} - -void MacroAssembler::decode_heap_oop_not_null(Register r) { - // Do not add assert code to this unless you change vtableStubs_sparc.cpp - // pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - assert (UseCompressedOops, "must be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - sllx(r, LogMinObjAlignmentInBytes, r); - if (Universe::narrow_oop_base() != NULL) - add(r, G6_heapbase, r); -} - -void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) { - // Do not add assert code to this unless you change vtableStubs_sparc.cpp - // pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - assert (UseCompressedOops, "must be compressed"); - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - sllx(src, LogMinObjAlignmentInBytes, dst); - if (Universe::narrow_oop_base() != NULL) - add(dst, G6_heapbase, dst); -} - -void MacroAssembler::encode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - if (Universe::narrow_klass_base() != NULL) - sub(r, G6_heapbase, r); - srlx(r, LogKlassAlignmentInBytes, r); -} - -void MacroAssembler::encode_klass_not_null(Register src, Register dst) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - if (Universe::narrow_klass_base() == NULL) { - srlx(src, LogKlassAlignmentInBytes, dst); - } else { - sub(src, G6_heapbase, dst); - srlx(dst, LogKlassAlignmentInBytes, dst); - } -} - -void MacroAssembler::decode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - // Do not add assert code to this unless you change vtableStubs_sparc.cpp - // pd_code_size_limit. - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - sllx(r, LogKlassAlignmentInBytes, r); - if (Universe::narrow_klass_base() != NULL) - add(r, G6_heapbase, r); -} - -void MacroAssembler::decode_klass_not_null(Register src, Register dst) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - // Do not add assert code to this unless you change vtableStubs_sparc.cpp - // pd_code_size_limit. - assert (UseCompressedKlassPointers, "must be compressed"); - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - sllx(src, LogKlassAlignmentInBytes, dst); - if (Universe::narrow_klass_base() != NULL) - add(dst, G6_heapbase, dst); -} - -void MacroAssembler::reinit_heapbase() { - if (UseCompressedOops || UseCompressedKlassPointers) { - AddressLiteral base(Universe::narrow_ptrs_base_addr()); - load_ptr_contents(base, G6_heapbase); - } -} - -// Compare char[] arrays aligned to 4 bytes. -void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, - Register limit, Register result, - Register chr1, Register chr2, Label& Ldone) { - Label Lvector, Lloop; - assert(chr1 == result, "should be the same"); - - // Note: limit contains number of bytes (2*char_elements) != 0. - andcc(limit, 0x2, chr1); // trailing character ? - br(Assembler::zero, false, Assembler::pt, Lvector); - delayed()->nop(); - - // compare the trailing char - sub(limit, sizeof(jchar), limit); - lduh(ary1, limit, chr1); - lduh(ary2, limit, chr2); - cmp(chr1, chr2); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - - // only one char ? - cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); - delayed()->add(G0, 1, result); // zero-length arrays are equal - - // word by word compare, dont't need alignment check - bind(Lvector); - // Shift ary1 and ary2 to the end of the arrays, negate limit - add(ary1, limit, ary1); - add(ary2, limit, ary2); - neg(limit, limit); - - lduw(ary1, limit, chr1); - bind(Lloop); - lduw(ary2, limit, chr2); - cmp(chr1, chr2); - br(Assembler::notEqual, true, Assembler::pt, Ldone); - delayed()->mov(G0, result); // not equal - inccc(limit, 2*sizeof(jchar)); - // annul LDUW if branch is not taken to prevent access past end of array - br(Assembler::notZero, true, Assembler::pt, Lloop); - delayed()->lduw(ary1, limit, chr1); // hoisted - - // Caller should set it: - // add(G0, 1, result); // equals -} - -// Use BIS for zeroing (count is in bytes). -void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { - assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing"); - Register end = count; - int cache_line_size = VM_Version::prefetch_data_size(); - // Minimum count when BIS zeroing can be used since - // it needs membar which is expensive. - int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit); - - Label small_loop; - // Check if count is negative (dead code) or zero. - // Note, count uses 64bit in 64 bit VM. - cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone); - - // Use BIS zeroing only for big arrays since it requires membar. - if (Assembler::is_simm13(block_zero_size)) { // < 4096 - cmp(count, block_zero_size); - } else { - set(block_zero_size, temp); - cmp(count, temp); - } - br(Assembler::lessUnsigned, false, Assembler::pt, small_loop); - delayed()->add(to, count, end); - - // Note: size is >= three (32 bytes) cache lines. - - // Clean the beginning of space up to next cache line. - for (int offs = 0; offs < cache_line_size; offs += 8) { - stx(G0, to, offs); - } - - // align to next cache line - add(to, cache_line_size, to); - and3(to, -cache_line_size, to); - - // Note: size left >= two (32 bytes) cache lines. - - // BIS should not be used to zero tail (64 bytes) - // to avoid zeroing a header of the following object. - sub(end, (cache_line_size*2)-8, end); - - Label bis_loop; - bind(bis_loop); - stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY); - add(to, cache_line_size, to); - cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop); - - // BIS needs membar. - membar(Assembler::StoreLoad); - - add(end, (cache_line_size*2)-8, end); // restore end - cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone); - - // Clean the tail. - bind(small_loop); - stx(G0, to, 0); - add(to, 8, to); - cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); - nop(); // Separate short branches -} diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp index f9b893258be..6e4812abcd4 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp @@ -25,554 +25,13 @@ #ifndef CPU_SPARC_VM_ASSEMBLER_SPARC_HPP #define CPU_SPARC_VM_ASSEMBLER_SPARC_HPP -class BiasedLockingCounters; - -// promises that the system will not use traps 16-31 -#define ST_RESERVED_FOR_USER_0 0x10 - -/* Written: David Ungar 4/19/97 */ - -// Contains all the definitions needed for sparc assembly code generation. - -// Register aliases for parts of the system: - -// 64 bit values can be kept in g1-g5, o1-o5 and o7 and all 64 bits are safe -// across context switches in V8+ ABI. Of course, there are no 64 bit regs -// in V8 ABI. All 64 bits are preserved in V9 ABI for all registers. - -// g2-g4 are scratch registers called "application globals". Their -// meaning is reserved to the "compilation system"--which means us! -// They are are not supposed to be touched by ordinary C code, although -// highly-optimized C code might steal them for temps. They are safe -// across thread switches, and the ABI requires that they be safe -// across function calls. -// -// g1 and g3 are touched by more modules. V8 allows g1 to be clobbered -// across func calls, and V8+ also allows g5 to be clobbered across -// func calls. Also, g1 and g5 can get touched while doing shared -// library loading. -// -// We must not touch g7 (it is the thread-self register) and g6 is -// reserved for certain tools. g0, of course, is always zero. -// -// (Sources: SunSoft Compilers Group, thread library engineers.) - -// %%%% The interpreter should be revisited to reduce global scratch regs. - -// This global always holds the current JavaThread pointer: - -REGISTER_DECLARATION(Register, G2_thread , G2); -REGISTER_DECLARATION(Register, G6_heapbase , G6); - -// The following globals are part of the Java calling convention: - -REGISTER_DECLARATION(Register, G5_method , G5); -REGISTER_DECLARATION(Register, G5_megamorphic_method , G5_method); -REGISTER_DECLARATION(Register, G5_inline_cache_reg , G5_method); - -// The following globals are used for the new C1 & interpreter calling convention: -REGISTER_DECLARATION(Register, Gargs , G4); // pointing to the last argument - -// This local is used to preserve G2_thread in the interpreter and in stubs: -REGISTER_DECLARATION(Register, L7_thread_cache , L7); - -// These globals are used as scratch registers in the interpreter: - -REGISTER_DECLARATION(Register, Gframe_size , G1); // SAME REG as G1_scratch -REGISTER_DECLARATION(Register, G1_scratch , G1); // also SAME -REGISTER_DECLARATION(Register, G3_scratch , G3); -REGISTER_DECLARATION(Register, G4_scratch , G4); - -// These globals are used as short-lived scratch registers in the compiler: - -REGISTER_DECLARATION(Register, Gtemp , G5); - -// JSR 292 fixed register usages: -REGISTER_DECLARATION(Register, G5_method_type , G5); -REGISTER_DECLARATION(Register, G3_method_handle , G3); -REGISTER_DECLARATION(Register, L7_mh_SP_save , L7); - -// The compiler requires that G5_megamorphic_method is G5_inline_cache_klass, -// because a single patchable "set" instruction (NativeMovConstReg, -// or NativeMovConstPatching for compiler1) instruction -// serves to set up either quantity, depending on whether the compiled -// call site is an inline cache or is megamorphic. See the function -// CompiledIC::set_to_megamorphic. -// -// If a inline cache targets an interpreted method, then the -// G5 register will be used twice during the call. First, -// the call site will be patched to load a compiledICHolder -// into G5. (This is an ordered pair of ic_klass, method.) -// The c2i adapter will first check the ic_klass, then load -// G5_method with the method part of the pair just before -// jumping into the interpreter. -// -// Note that G5_method is only the method-self for the interpreter, -// and is logically unrelated to G5_megamorphic_method. -// -// Invariants on G2_thread (the JavaThread pointer): -// - it should not be used for any other purpose anywhere -// - it must be re-initialized by StubRoutines::call_stub() -// - it must be preserved around every use of call_VM - -// We can consider using g2/g3/g4 to cache more values than the -// JavaThread, such as the card-marking base or perhaps pointers into -// Eden. It's something of a waste to use them as scratch temporaries, -// since they are not supposed to be volatile. (Of course, if we find -// that Java doesn't benefit from application globals, then we can just -// use them as ordinary temporaries.) -// -// Since g1 and g5 (and/or g6) are the volatile (caller-save) registers, -// it makes sense to use them routinely for procedure linkage, -// whenever the On registers are not applicable. Examples: G5_method, -// G5_inline_cache_klass, and a double handful of miscellaneous compiler -// stubs. This means that compiler stubs, etc., should be kept to a -// maximum of two or three G-register arguments. - - -// stub frames - -REGISTER_DECLARATION(Register, Lentry_args , L0); // pointer to args passed to callee (interpreter) not stub itself - -// Interpreter frames - -#ifdef CC_INTERP -REGISTER_DECLARATION(Register, Lstate , L0); // interpreter state object pointer -REGISTER_DECLARATION(Register, L1_scratch , L1); // scratch -REGISTER_DECLARATION(Register, Lmirror , L1); // mirror (for native methods only) -REGISTER_DECLARATION(Register, L2_scratch , L2); -REGISTER_DECLARATION(Register, L3_scratch , L3); -REGISTER_DECLARATION(Register, L4_scratch , L4); -REGISTER_DECLARATION(Register, Lscratch , L5); // C1 uses -REGISTER_DECLARATION(Register, Lscratch2 , L6); // C1 uses -REGISTER_DECLARATION(Register, L7_scratch , L7); // constant pool cache -REGISTER_DECLARATION(Register, O5_savedSP , O5); -REGISTER_DECLARATION(Register, I5_savedSP , I5); // Saved SP before bumping for locals. This is simply - // a copy SP, so in 64-bit it's a biased value. The bias - // is added and removed as needed in the frame code. -// Interface to signature handler -REGISTER_DECLARATION(Register, Llocals , L7); // pointer to locals for signature handler -REGISTER_DECLARATION(Register, Lmethod , L6); // Method* when calling signature handler - -#else -REGISTER_DECLARATION(Register, Lesp , L0); // expression stack pointer -REGISTER_DECLARATION(Register, Lbcp , L1); // pointer to next bytecode -REGISTER_DECLARATION(Register, Lmethod , L2); -REGISTER_DECLARATION(Register, Llocals , L3); -REGISTER_DECLARATION(Register, Largs , L3); // pointer to locals for signature handler - // must match Llocals in asm interpreter -REGISTER_DECLARATION(Register, Lmonitors , L4); -REGISTER_DECLARATION(Register, Lbyte_code , L5); -// When calling out from the interpreter we record SP so that we can remove any extra stack -// space allocated during adapter transitions. This register is only live from the point -// of the call until we return. -REGISTER_DECLARATION(Register, Llast_SP , L5); -REGISTER_DECLARATION(Register, Lscratch , L5); -REGISTER_DECLARATION(Register, Lscratch2 , L6); -REGISTER_DECLARATION(Register, LcpoolCache , L6); // constant pool cache - -REGISTER_DECLARATION(Register, O5_savedSP , O5); -REGISTER_DECLARATION(Register, I5_savedSP , I5); // Saved SP before bumping for locals. This is simply - // a copy SP, so in 64-bit it's a biased value. The bias - // is added and removed as needed in the frame code. -REGISTER_DECLARATION(Register, IdispatchTables , I4); // Base address of the bytecode dispatch tables -REGISTER_DECLARATION(Register, IdispatchAddress , I3); // Register which saves the dispatch address for each bytecode -REGISTER_DECLARATION(Register, ImethodDataPtr , I2); // Pointer to the current method data -#endif /* CC_INTERP */ - -// NOTE: Lscratch2 and LcpoolCache point to the same registers in -// the interpreter code. If Lscratch2 needs to be used for some -// purpose than LcpoolCache should be restore after that for -// the interpreter to work right -// (These assignments must be compatible with L7_thread_cache; see above.) - -// Since Lbcp points into the middle of the method object, -// it is temporarily converted into a "bcx" during GC. - -// Exception processing -// These registers are passed into exception handlers. -// All exception handlers require the exception object being thrown. -// In addition, an nmethod's exception handler must be passed -// the address of the call site within the nmethod, to allow -// proper selection of the applicable catch block. -// (Interpreter frames use their own bcp() for this purpose.) -// -// The Oissuing_pc value is not always needed. When jumping to a -// handler that is known to be interpreted, the Oissuing_pc value can be -// omitted. An actual catch block in compiled code receives (from its -// nmethod's exception handler) the thrown exception in the Oexception, -// but it doesn't need the Oissuing_pc. -// -// If an exception handler (either interpreted or compiled) -// discovers there is no applicable catch block, it updates -// the Oissuing_pc to the continuation PC of its own caller, -// pops back to that caller's stack frame, and executes that -// caller's exception handler. Obviously, this process will -// iterate until the control stack is popped back to a method -// containing an applicable catch block. A key invariant is -// that the Oissuing_pc value is always a value local to -// the method whose exception handler is currently executing. -// -// Note: The issuing PC value is __not__ a raw return address (I7 value). -// It is a "return pc", the address __following__ the call. -// Raw return addresses are converted to issuing PCs by frame::pc(), -// or by stubs. Issuing PCs can be used directly with PC range tables. -// -REGISTER_DECLARATION(Register, Oexception , O0); // exception being thrown -REGISTER_DECLARATION(Register, Oissuing_pc , O1); // where the exception is coming from - - -// These must occur after the declarations above -#ifndef DONT_USE_REGISTER_DEFINES - -#define Gthread AS_REGISTER(Register, Gthread) -#define Gmethod AS_REGISTER(Register, Gmethod) -#define Gmegamorphic_method AS_REGISTER(Register, Gmegamorphic_method) -#define Ginline_cache_reg AS_REGISTER(Register, Ginline_cache_reg) -#define Gargs AS_REGISTER(Register, Gargs) -#define Lthread_cache AS_REGISTER(Register, Lthread_cache) -#define Gframe_size AS_REGISTER(Register, Gframe_size) -#define Gtemp AS_REGISTER(Register, Gtemp) - -#ifdef CC_INTERP -#define Lstate AS_REGISTER(Register, Lstate) -#define Lesp AS_REGISTER(Register, Lesp) -#define L1_scratch AS_REGISTER(Register, L1_scratch) -#define Lmirror AS_REGISTER(Register, Lmirror) -#define L2_scratch AS_REGISTER(Register, L2_scratch) -#define L3_scratch AS_REGISTER(Register, L3_scratch) -#define L4_scratch AS_REGISTER(Register, L4_scratch) -#define Lscratch AS_REGISTER(Register, Lscratch) -#define Lscratch2 AS_REGISTER(Register, Lscratch2) -#define L7_scratch AS_REGISTER(Register, L7_scratch) -#define Ostate AS_REGISTER(Register, Ostate) -#else -#define Lesp AS_REGISTER(Register, Lesp) -#define Lbcp AS_REGISTER(Register, Lbcp) -#define Lmethod AS_REGISTER(Register, Lmethod) -#define Llocals AS_REGISTER(Register, Llocals) -#define Lmonitors AS_REGISTER(Register, Lmonitors) -#define Lbyte_code AS_REGISTER(Register, Lbyte_code) -#define Lscratch AS_REGISTER(Register, Lscratch) -#define Lscratch2 AS_REGISTER(Register, Lscratch2) -#define LcpoolCache AS_REGISTER(Register, LcpoolCache) -#endif /* ! CC_INTERP */ - -#define Lentry_args AS_REGISTER(Register, Lentry_args) -#define I5_savedSP AS_REGISTER(Register, I5_savedSP) -#define O5_savedSP AS_REGISTER(Register, O5_savedSP) -#define IdispatchAddress AS_REGISTER(Register, IdispatchAddress) -#define ImethodDataPtr AS_REGISTER(Register, ImethodDataPtr) -#define IdispatchTables AS_REGISTER(Register, IdispatchTables) - -#define Oexception AS_REGISTER(Register, Oexception) -#define Oissuing_pc AS_REGISTER(Register, Oissuing_pc) - - -#endif - -// Address is an abstraction used to represent a memory location. -// -// Note: A register location is represented via a Register, not -// via an address for efficiency & simplicity reasons. - -class Address VALUE_OBJ_CLASS_SPEC { - private: - Register _base; // Base register. - RegisterOrConstant _index_or_disp; // Index register or constant displacement. - RelocationHolder _rspec; - - public: - Address() : _base(noreg), _index_or_disp(noreg) {} - - Address(Register base, RegisterOrConstant index_or_disp) - : _base(base), - _index_or_disp(index_or_disp) { - } - - Address(Register base, Register index) - : _base(base), - _index_or_disp(index) { - } - - Address(Register base, int disp) - : _base(base), - _index_or_disp(disp) { - } - -#ifdef ASSERT - // ByteSize is only a class when ASSERT is defined, otherwise it's an int. - Address(Register base, ByteSize disp) - : _base(base), - _index_or_disp(in_bytes(disp)) { - } -#endif - - // accessors - Register base() const { return _base; } - Register index() const { return _index_or_disp.as_register(); } - int disp() const { return _index_or_disp.as_constant(); } - - bool has_index() const { return _index_or_disp.is_register(); } - bool has_disp() const { return _index_or_disp.is_constant(); } - - bool uses(Register reg) const { return base() == reg || (has_index() && index() == reg); } - - const relocInfo::relocType rtype() { return _rspec.type(); } - const RelocationHolder& rspec() { return _rspec; } - - RelocationHolder rspec(int offset) const { - return offset == 0 ? _rspec : _rspec.plus(offset); - } - - inline bool is_simm13(int offset = 0); // check disp+offset for overflow - - Address plus_disp(int plusdisp) const { // bump disp by a small amount - assert(_index_or_disp.is_constant(), "must have a displacement"); - Address a(base(), disp() + plusdisp); - return a; - } - bool is_same_address(Address a) const { - // disregard _rspec - return base() == a.base() && (has_index() ? index() == a.index() : disp() == a.disp()); - } - - Address after_save() const { - Address a = (*this); - a._base = a._base->after_save(); - return a; - } - - Address after_restore() const { - Address a = (*this); - a._base = a._base->after_restore(); - return a; - } - - // Convert the raw encoding form into the form expected by the - // constructor for Address. - static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); - - friend class Assembler; -}; - - -class AddressLiteral VALUE_OBJ_CLASS_SPEC { - private: - address _address; - RelocationHolder _rspec; - - RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { - switch (rtype) { - case relocInfo::external_word_type: - return external_word_Relocation::spec(addr); - case relocInfo::internal_word_type: - return internal_word_Relocation::spec(addr); -#ifdef _LP64 - case relocInfo::opt_virtual_call_type: - return opt_virtual_call_Relocation::spec(); - case relocInfo::static_call_type: - return static_call_Relocation::spec(); - case relocInfo::runtime_call_type: - return runtime_call_Relocation::spec(); -#endif - case relocInfo::none: - return RelocationHolder(); - default: - ShouldNotReachHere(); - return RelocationHolder(); - } - } - - protected: - // creation - AddressLiteral() : _address(NULL), _rspec(NULL) {} - - public: - AddressLiteral(address addr, RelocationHolder const& rspec) - : _address(addr), - _rspec(rspec) {} - - // Some constructors to avoid casting at the call site. - AddressLiteral(jobject obj, RelocationHolder const& rspec) - : _address((address) obj), - _rspec(rspec) {} - - AddressLiteral(intptr_t value, RelocationHolder const& rspec) - : _address((address) value), - _rspec(rspec) {} - - AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - // Some constructors to avoid casting at the call site. - AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - -#ifdef _LP64 - // 32-bit complains about a multiple declaration for int*. - AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} -#endif - - AddressLiteral(Metadata* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(Metadata** addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none) - : _address((address) addr), - _rspec(rspec_from_rtype(rtype, (address) addr)) {} - - intptr_t value() const { return (intptr_t) _address; } - int low10() const; - - const relocInfo::relocType rtype() const { return _rspec.type(); } - const RelocationHolder& rspec() const { return _rspec; } - - RelocationHolder rspec(int offset) const { - return offset == 0 ? _rspec : _rspec.plus(offset); - } -}; - -// Convenience classes -class ExternalAddress: public AddressLiteral { - private: - static relocInfo::relocType reloc_for_target(address target) { - // Sometimes ExternalAddress is used for values which aren't - // exactly addresses, like the card table base. - // external_word_type can't be used for values in the first page - // so just skip the reloc in that case. - return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; - } - - public: - ExternalAddress(address target) : AddressLiteral(target, reloc_for_target( target)) {} - ExternalAddress(Metadata** target) : AddressLiteral(target, reloc_for_target((address) target)) {} -}; - -inline Address RegisterImpl::address_in_saved_window() const { - return (Address(SP, (sp_offset_in_saved_window() * wordSize) + STACK_BIAS)); -} - - - -// Argument is an abstraction used to represent an outgoing -// actual argument or an incoming formal parameter, whether -// it resides in memory or in a register, in a manner consistent -// with the SPARC Application Binary Interface, or ABI. This is -// often referred to as the native or C calling convention. - -class Argument VALUE_OBJ_CLASS_SPEC { - private: - int _number; - bool _is_in; - - public: -#ifdef _LP64 - enum { - n_register_parameters = 6, // only 6 registers may contain integer parameters - n_float_register_parameters = 16 // Can have up to 16 floating registers - }; -#else - enum { - n_register_parameters = 6 // only 6 registers may contain integer parameters - }; -#endif - - // creation - Argument(int number, bool is_in) : _number(number), _is_in(is_in) {} - - int number() const { return _number; } - bool is_in() const { return _is_in; } - bool is_out() const { return !is_in(); } - - Argument successor() const { return Argument(number() + 1, is_in()); } - Argument as_in() const { return Argument(number(), true ); } - Argument as_out() const { return Argument(number(), false); } - - // locating register-based arguments: - bool is_register() const { return _number < n_register_parameters; } - -#ifdef _LP64 - // locating Floating Point register-based arguments: - bool is_float_register() const { return _number < n_float_register_parameters; } - - FloatRegister as_float_register() const { - assert(is_float_register(), "must be a register argument"); - return as_FloatRegister(( number() *2 ) + 1); - } - FloatRegister as_double_register() const { - assert(is_float_register(), "must be a register argument"); - return as_FloatRegister(( number() *2 )); - } -#endif - - Register as_register() const { - assert(is_register(), "must be a register argument"); - return is_in() ? as_iRegister(number()) : as_oRegister(number()); - } - - // locating memory-based arguments - Address as_address() const { - assert(!is_register(), "must be a memory argument"); - return address_in_frame(); - } - - // When applied to a register-based argument, give the corresponding address - // into the 6-word area "into which callee may store register arguments" - // (This is a different place than the corresponding register-save area location.) - Address address_in_frame() const; - - // debugging - const char* name() const; - - friend class Assembler; -}; - +#include "asm/register.hpp" // The SPARC Assembler: Pure assembler doing NO optimizations on the instruction // level; i.e., what you write // is what you get. The Assembler is generating code into a CodeBuffer. class Assembler : public AbstractAssembler { - protected: - - static void print_instruction(int inst); - static int patched_branch(int dest_pos, int inst, int inst_pos); - static int branch_destination(int inst, int pos); - - friend class AbstractAssembler; friend class AddressLiteral; @@ -1230,10 +689,7 @@ public: // pp 135 (addc was addx in v8) inline void add(Register s1, Register s2, Register d ); - inline void add(Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none); - inline void add(Register s1, int simm13a, Register d, RelocationHolder const& rspec); - inline void add(Register s1, RegisterOrConstant s2, Register d, int offset = 0); - inline void add(const Address& a, Register d, int offset = 0); + inline void add(Register s1, int simm13a, Register d ); void addcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); } void addcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } @@ -1395,12 +851,9 @@ public: // 171 - inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); inline void ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d); inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec = RelocationHolder()); - inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); - inline void ldfsr( Register s1, Register s2 ); inline void ldfsr( Register s1, int simm13a); @@ -1438,36 +891,9 @@ public: inline void lduw( Register s1, int simm13a, Register d); inline void ldx( Register s1, Register s2, Register d ); inline void ldx( Register s1, int simm13a, Register d); - inline void ld( Register s1, Register s2, Register d ); - inline void ld( Register s1, int simm13a, Register d); inline void ldd( Register s1, Register s2, Register d ); inline void ldd( Register s1, int simm13a, Register d); -#ifdef ASSERT - // ByteSize is only a class when ASSERT is defined, otherwise it's an int. - inline void ld( Register s1, ByteSize simm13a, Register d); -#endif - - inline void ldsb(const Address& a, Register d, int offset = 0); - inline void ldsh(const Address& a, Register d, int offset = 0); - inline void ldsw(const Address& a, Register d, int offset = 0); - inline void ldub(const Address& a, Register d, int offset = 0); - inline void lduh(const Address& a, Register d, int offset = 0); - inline void lduw(const Address& a, Register d, int offset = 0); - inline void ldx( const Address& a, Register d, int offset = 0); - inline void ld( const Address& a, Register d, int offset = 0); - inline void ldd( const Address& a, Register d, int offset = 0); - - inline void ldub( Register s1, RegisterOrConstant s2, Register d ); - inline void ldsb( Register s1, RegisterOrConstant s2, Register d ); - inline void lduh( Register s1, RegisterOrConstant s2, Register d ); - inline void ldsh( Register s1, RegisterOrConstant s2, Register d ); - inline void lduw( Register s1, RegisterOrConstant s2, Register d ); - inline void ldsw( Register s1, RegisterOrConstant s2, Register d ); - inline void ldx( Register s1, RegisterOrConstant s2, Register d ); - inline void ld( Register s1, RegisterOrConstant s2, Register d ); - inline void ldd( Register s1, RegisterOrConstant s2, Register d ); - // pp 177 void ldsba( Register s1, Register s2, int ia, Register d ) { emit_long( op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); } @@ -1505,7 +931,6 @@ public: void andcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(and_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } void andn( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | rs2(s2) ); } void andn( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } - void andn( Register s1, RegisterOrConstant s2, Register d); void andncc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); } void andncc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(andn_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } void or3( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(or_op3 ) | rs1(s1) | rs2(s2) ); } @@ -1584,13 +1009,12 @@ public: // pp 203 - void prefetch( Register s1, Register s2, PrefetchFcn f); - void prefetch( Register s1, int simm13a, PrefetchFcn f); + void prefetch( Register s1, Register s2, PrefetchFcn f) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); } + void prefetch( Register s1, int simm13a, PrefetchFcn f) { v9_only(); emit_data( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } + void prefetcha( Register s1, Register s2, int ia, PrefetchFcn f ) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); } void prefetcha( Register s1, int simm13a, PrefetchFcn f ) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3 | alt_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } - inline void prefetch(const Address& a, PrefetchFcn F, int offset = 0); - // pp 208 // not implementing read privileged register @@ -1653,10 +1077,8 @@ public: // pp 222 - inline void stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2); inline void stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2); inline void stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a); - inline void stf( FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset = 0); inline void stfsr( Register s1, Register s2 ); inline void stfsr( Register s1, int simm13a); @@ -1676,32 +1098,11 @@ public: inline void sth( Register d, Register s1, int simm13a); inline void stw( Register d, Register s1, Register s2 ); inline void stw( Register d, Register s1, int simm13a); - inline void st( Register d, Register s1, Register s2 ); - inline void st( Register d, Register s1, int simm13a); inline void stx( Register d, Register s1, Register s2 ); inline void stx( Register d, Register s1, int simm13a); inline void std( Register d, Register s1, Register s2 ); inline void std( Register d, Register s1, int simm13a); -#ifdef ASSERT - // ByteSize is only a class when ASSERT is defined, otherwise it's an int. - inline void st( Register d, Register s1, ByteSize simm13a); -#endif - - inline void stb( Register d, const Address& a, int offset = 0 ); - inline void sth( Register d, const Address& a, int offset = 0 ); - inline void stw( Register d, const Address& a, int offset = 0 ); - inline void stx( Register d, const Address& a, int offset = 0 ); - inline void st( Register d, const Address& a, int offset = 0 ); - inline void std( Register d, const Address& a, int offset = 0 ); - - inline void stb( Register d, Register s1, RegisterOrConstant s2 ); - inline void sth( Register d, Register s1, RegisterOrConstant s2 ); - inline void stw( Register d, Register s1, RegisterOrConstant s2 ); - inline void stx( Register d, Register s1, RegisterOrConstant s2 ); - inline void std( Register d, Register s1, RegisterOrConstant s2 ); - inline void st( Register d, Register s1, RegisterOrConstant s2 ); - // pp 177 void stba( Register d, Register s1, Register s2, int ia ) { emit_long( op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2) ); } @@ -1731,9 +1132,6 @@ public: void sub( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | rs2(s2) ); } void sub( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } - // Note: offset is added to s2. - inline void sub(Register s1, RegisterOrConstant s2, Register d, int offset = 0); - void subcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | rs2(s2) ); } void subcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(sub_op3 | cc_bit_op3 ) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } void subc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(subc_op3 ) | rs1(s1) | rs2(s2) ); } @@ -1745,7 +1143,6 @@ public: inline void swap( Register s1, Register s2, Register d ); inline void swap( Register s1, int simm13a, Register d); - inline void swap( Address& a, Register d, int offset = 0 ); // pp 232 @@ -1799,879 +1196,12 @@ public: void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_long( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); } void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_long( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); } - - - - // For a given register condition, return the appropriate condition code - // Condition (the one you would use to get the same effect after "tst" on - // the target register.) - Assembler::Condition reg_cond_to_cc_cond(RCondition in); - - // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY delay_state = no_delay; #endif } - - // Testing -#ifndef PRODUCT - void test_v9(); - void test_v8_onlys(); -#endif }; - -class RegistersForDebugging : public StackObj { - public: - intptr_t i[8], l[8], o[8], g[8]; - float f[32]; - double d[32]; - - void print(outputStream* s); - - static int i_offset(int j) { return offset_of(RegistersForDebugging, i[j]); } - static int l_offset(int j) { return offset_of(RegistersForDebugging, l[j]); } - static int o_offset(int j) { return offset_of(RegistersForDebugging, o[j]); } - static int g_offset(int j) { return offset_of(RegistersForDebugging, g[j]); } - static int f_offset(int j) { return offset_of(RegistersForDebugging, f[j]); } - static int d_offset(int j) { return offset_of(RegistersForDebugging, d[j / 2]); } - - // gen asm code to save regs - static void save_registers(MacroAssembler* a); - - // restore global registers in case C code disturbed them - static void restore_registers(MacroAssembler* a, Register r); - - -}; - - -// MacroAssembler extends Assembler by a few frequently used macros. -// -// Most of the standard SPARC synthetic ops are defined here. -// Instructions for which a 'better' code sequence exists depending -// on arguments should also go in here. - -#define JMP2(r1, r2) jmp(r1, r2, __FILE__, __LINE__) -#define JMP(r1, off) jmp(r1, off, __FILE__, __LINE__) -#define JUMP(a, temp, off) jump(a, temp, off, __FILE__, __LINE__) -#define JUMPL(a, temp, d, off) jumpl(a, temp, d, off, __FILE__, __LINE__) - - -class MacroAssembler: public Assembler { - protected: - // Support for VM calls - // This is the base routine called by the different versions of call_VM_leaf. The interpreter - // may customize this version by overriding it for its purposes (e.g., to save/restore - // additional registers when doing a VM call). -#ifdef CC_INTERP - #define VIRTUAL -#else - #define VIRTUAL virtual -#endif - - VIRTUAL void call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments); - - // - // It is imperative that all calls into the VM are handled via the call_VM macros. - // They make sure that the stack linkage is setup correctly. call_VM's correspond - // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. - // - // This is the base routine called by the different versions of call_VM. The interpreter - // may customize this version by overriding it for its purposes (e.g., to save/restore - // additional registers when doing a VM call). - // - // A non-volatile java_thread_cache register should be specified so - // that the G2_thread value can be preserved across the call. - // (If java_thread_cache is noreg, then a slow get_thread call - // will re-initialize the G2_thread.) call_VM_base returns the register that contains the - // thread. - // - // If no last_java_sp is specified (noreg) than SP will be used instead. - - virtual void call_VM_base( - Register oop_result, // where an oop-result ends up if any; use noreg otherwise - Register java_thread_cache, // the thread if computed before ; use noreg otherwise - Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise - address entry_point, // the entry point - int number_of_arguments, // the number of arguments (w/o thread) to pop after call - bool check_exception=true // flag which indicates if exception should be checked - ); - - // This routine should emit JVMTI PopFrame and ForceEarlyReturn handling code. - // The implementation is only non-empty for the InterpreterMacroAssembler, - // as only the interpreter handles and ForceEarlyReturn PopFrame requests. - virtual void check_and_handle_popframe(Register scratch_reg); - virtual void check_and_handle_earlyret(Register scratch_reg); - - public: - MacroAssembler(CodeBuffer* code) : Assembler(code) {} - - // Support for NULL-checks - // - // Generates code that causes a NULL OS exception if the content of reg is NULL. - // If the accessed location is M[reg + offset] and the offset is known, provide the - // offset. No explicit code generation is needed if the offset is within a certain - // range (0 <= offset <= page_size). - // - // %%%%%% Currently not done for SPARC - - void null_check(Register reg, int offset = -1); - static bool needs_explicit_null_check(intptr_t offset); - - // support for delayed instructions - MacroAssembler* delayed() { Assembler::delayed(); return this; } - - // branches that use right instruction for v8 vs. v9 - inline void br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); - inline void br( Condition c, bool a, Predict p, Label& L ); - - inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); - inline void fb( Condition c, bool a, Predict p, Label& L ); - - // compares register with zero (32 bit) and branches (V9 and V8 instructions) - void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn ); - // Compares a pointer register with zero and branches on (not)null. - // Does a test & branch on 32-bit systems and a register-branch on 64-bit. - void br_null ( Register s1, bool a, Predict p, Label& L ); - void br_notnull( Register s1, bool a, Predict p, Label& L ); - - // - // Compare registers and branch with nop in delay slot or cbcond without delay slot. - // - // ATTENTION: use these instructions with caution because cbcond instruction - // has very short distance: 512 instructions (2Kbyte). - - // Compare integer (32 bit) values (icc only). - void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L); - void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L); - // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64). - void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L); - void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L); - - // Short branch version for compares a pointer pwith zero. - void br_null_short ( Register s1, Predict p, Label& L ); - void br_notnull_short( Register s1, Predict p, Label& L ); - - // unconditional short branch - void ba_short(Label& L); - - inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); - inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); - - // Branch that tests xcc in LP64 and icc in !LP64 - inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); - inline void brx( Condition c, bool a, Predict p, Label& L ); - - // unconditional branch - inline void ba( Label& L ); - - // Branch that tests fp condition codes - inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); - inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L ); - - // get PC the best way - inline int get_pc( Register d ); - - // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual) - inline void cmp( Register s1, Register s2 ) { subcc( s1, s2, G0 ); } - inline void cmp( Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); } - - inline void jmp( Register s1, Register s2 ); - inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); - - // Check if the call target is out of wdisp30 range (relative to the code cache) - static inline bool is_far_target(address d); - inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type ); - inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type ); - inline void callr( Register s1, Register s2 ); - inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); - - // Emits nothing on V8 - inline void iprefetch( address d, relocInfo::relocType rt = relocInfo::none ); - inline void iprefetch( Label& L); - - inline void tst( Register s ) { orcc( G0, s, G0 ); } - -#ifdef PRODUCT - inline void ret( bool trace = TraceJumps ) { if (trace) { - mov(I7, O7); // traceable register - JMP(O7, 2 * BytesPerInstWord); - } else { - jmpl( I7, 2 * BytesPerInstWord, G0 ); - } - } - - inline void retl( bool trace = TraceJumps ) { if (trace) JMP(O7, 2 * BytesPerInstWord); - else jmpl( O7, 2 * BytesPerInstWord, G0 ); } -#else - void ret( bool trace = TraceJumps ); - void retl( bool trace = TraceJumps ); -#endif /* PRODUCT */ - - // Required platform-specific helpers for Label::patch_instructions. - // They _shadow_ the declarations in AbstractAssembler, which are undefined. - void pd_patch_instruction(address branch, address target); -#ifndef PRODUCT - static void pd_print_patched_instruction(address branch); -#endif - - // sethi Macro handles optimizations and relocations -private: - void internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable); -public: - void sethi(const AddressLiteral& addrlit, Register d); - void patchable_sethi(const AddressLiteral& addrlit, Register d); - - // compute the number of instructions for a sethi/set - static int insts_for_sethi( address a, bool worst_case = false ); - static int worst_case_insts_for_set(); - - // set may be either setsw or setuw (high 32 bits may be zero or sign) -private: - void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable); - static int insts_for_internal_set(intptr_t value); -public: - void set(const AddressLiteral& addrlit, Register d); - void set(intptr_t value, Register d); - void set(address addr, Register d, RelocationHolder const& rspec); - static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); } - - void patchable_set(const AddressLiteral& addrlit, Register d); - void patchable_set(intptr_t value, Register d); - void set64(jlong value, Register d, Register tmp); - static int insts_for_set64(jlong value); - - // sign-extend 32 to 64 - inline void signx( Register s, Register d ) { sra( s, G0, d); } - inline void signx( Register d ) { sra( d, G0, d); } - - inline void not1( Register s, Register d ) { xnor( s, G0, d ); } - inline void not1( Register d ) { xnor( d, G0, d ); } - - inline void neg( Register s, Register d ) { sub( G0, s, d ); } - inline void neg( Register d ) { sub( G0, d, d ); } - - inline void cas( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY); } - inline void casx( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY); } - // Functions for isolating 64 bit atomic swaps for LP64 - // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's - inline void cas_ptr( Register s1, Register s2, Register d) { -#ifdef _LP64 - casx( s1, s2, d ); -#else - cas( s1, s2, d ); -#endif - } - - // Functions for isolating 64 bit shifts for LP64 - inline void sll_ptr( Register s1, Register s2, Register d ); - inline void sll_ptr( Register s1, int imm6a, Register d ); - inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d ); - inline void srl_ptr( Register s1, Register s2, Register d ); - inline void srl_ptr( Register s1, int imm6a, Register d ); - - // little-endian - inline void casl( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY_LITTLE); } - inline void casxl( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY_LITTLE); } - - inline void inc( Register d, int const13 = 1 ) { add( d, const13, d); } - inline void inccc( Register d, int const13 = 1 ) { addcc( d, const13, d); } - - inline void dec( Register d, int const13 = 1 ) { sub( d, const13, d); } - inline void deccc( Register d, int const13 = 1 ) { subcc( d, const13, d); } - - inline void btst( Register s1, Register s2 ) { andcc( s1, s2, G0 ); } - inline void btst( int simm13a, Register s ) { andcc( s, simm13a, G0 ); } - - inline void bset( Register s1, Register s2 ) { or3( s1, s2, s2 ); } - inline void bset( int simm13a, Register s ) { or3( s, simm13a, s ); } - - inline void bclr( Register s1, Register s2 ) { andn( s1, s2, s2 ); } - inline void bclr( int simm13a, Register s ) { andn( s, simm13a, s ); } - - inline void btog( Register s1, Register s2 ) { xor3( s1, s2, s2 ); } - inline void btog( int simm13a, Register s ) { xor3( s, simm13a, s ); } - - inline void clr( Register d ) { or3( G0, G0, d ); } - - inline void clrb( Register s1, Register s2); - inline void clrh( Register s1, Register s2); - inline void clr( Register s1, Register s2); - inline void clrx( Register s1, Register s2); - - inline void clrb( Register s1, int simm13a); - inline void clrh( Register s1, int simm13a); - inline void clr( Register s1, int simm13a); - inline void clrx( Register s1, int simm13a); - - // copy & clear upper word - inline void clruw( Register s, Register d ) { srl( s, G0, d); } - // clear upper word - inline void clruwu( Register d ) { srl( d, G0, d); } - - // membar psuedo instruction. takes into account target memory model. - inline void membar( Assembler::Membar_mask_bits const7a ); - - // returns if membar generates anything. - inline bool membar_has_effect( Assembler::Membar_mask_bits const7a ); - - // mov pseudo instructions - inline void mov( Register s, Register d) { - if ( s != d ) or3( G0, s, d); - else assert_not_delayed(); // Put something useful in the delay slot! - } - - inline void mov_or_nop( Register s, Register d) { - if ( s != d ) or3( G0, s, d); - else nop(); - } - - inline void mov( int simm13a, Register d) { or3( G0, simm13a, d); } - - // address pseudos: make these names unlike instruction names to avoid confusion - inline intptr_t load_pc_address( Register reg, int bytes_to_skip ); - inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0); - inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0); - inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0); - inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); - inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); - inline void jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset = 0); - inline void jump_to(const AddressLiteral& addrlit, Register temp, int offset = 0); - inline void jump_indirect_to(Address& a, Register temp, int ld_offset = 0, int jmp_offset = 0); - - // ring buffer traceable jumps - - void jmp2( Register r1, Register r2, const char* file, int line ); - void jmp ( Register r1, int offset, const char* file, int line ); - - void jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line); - void jump (const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line); - - - // argument pseudos: - - inline void load_argument( Argument& a, Register d ); - inline void store_argument( Register s, Argument& a ); - inline void store_ptr_argument( Register s, Argument& a ); - inline void store_float_argument( FloatRegister s, Argument& a ); - inline void store_double_argument( FloatRegister s, Argument& a ); - inline void store_long_argument( Register s, Argument& a ); - - // handy macros: - - inline void round_to( Register r, int modulus ) { - assert_not_delayed(); - inc( r, modulus - 1 ); - and3( r, -modulus, r ); - } - - // -------------------------------------------------- - - // Functions for isolating 64 bit loads for LP64 - // ld_ptr will perform ld for 32 bit VM's and ldx for 64 bit VM's - // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's - inline void ld_ptr(Register s1, Register s2, Register d); - inline void ld_ptr(Register s1, int simm13a, Register d); - inline void ld_ptr(Register s1, RegisterOrConstant s2, Register d); - inline void ld_ptr(const Address& a, Register d, int offset = 0); - inline void st_ptr(Register d, Register s1, Register s2); - inline void st_ptr(Register d, Register s1, int simm13a); - inline void st_ptr(Register d, Register s1, RegisterOrConstant s2); - inline void st_ptr(Register d, const Address& a, int offset = 0); - -#ifdef ASSERT - // ByteSize is only a class when ASSERT is defined, otherwise it's an int. - inline void ld_ptr(Register s1, ByteSize simm13a, Register d); - inline void st_ptr(Register d, Register s1, ByteSize simm13a); -#endif - - // ld_long will perform ldd for 32 bit VM's and ldx for 64 bit VM's - // st_long will perform std for 32 bit VM's and stx for 64 bit VM's - inline void ld_long(Register s1, Register s2, Register d); - inline void ld_long(Register s1, int simm13a, Register d); - inline void ld_long(Register s1, RegisterOrConstant s2, Register d); - inline void ld_long(const Address& a, Register d, int offset = 0); - inline void st_long(Register d, Register s1, Register s2); - inline void st_long(Register d, Register s1, int simm13a); - inline void st_long(Register d, Register s1, RegisterOrConstant s2); - inline void st_long(Register d, const Address& a, int offset = 0); - - // Helpers for address formation. - // - They emit only a move if s2 is a constant zero. - // - If dest is a constant and either s1 or s2 is a register, the temp argument is required and becomes the result. - // - If dest is a register and either s1 or s2 is a non-simm13 constant, the temp argument is required and used to materialize the constant. - RegisterOrConstant regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); - RegisterOrConstant regcon_inc_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); - RegisterOrConstant regcon_sll_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); - - RegisterOrConstant ensure_simm13_or_reg(RegisterOrConstant src, Register temp) { - if (is_simm13(src.constant_or_zero())) - return src; // register or short constant - guarantee(temp != noreg, "constant offset overflow"); - set(src.as_constant(), temp); - return temp; - } - - // -------------------------------------------------- - - public: - // traps as per trap.h (SPARC ABI?) - - void breakpoint_trap(); - void breakpoint_trap(Condition c, CC cc); - void flush_windows_trap(); - void clean_windows_trap(); - void get_psr_trap(); - void set_psr_trap(); - - // V8/V9 flush_windows - void flush_windows(); - - // Support for serializing memory accesses between threads - void serialize_memory(Register thread, Register tmp1, Register tmp2); - - // Stack frame creation/removal - void enter(); - void leave(); - - // V8/V9 integer multiply - void mult(Register s1, Register s2, Register d); - void mult(Register s1, int simm13a, Register d); - - // V8/V9 read and write of condition codes. - void read_ccr(Register d); - void write_ccr(Register s); - - // Manipulation of C++ bools - // These are idioms to flag the need for care with accessing bools but on - // this platform we assume byte size - - inline void stbool(Register d, const Address& a) { stb(d, a); } - inline void ldbool(const Address& a, Register d) { ldub(a, d); } - inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); } - - // klass oop manipulations if compressed - void load_klass(Register src_oop, Register klass); - void store_klass(Register klass, Register dst_oop); - void store_klass_gap(Register s, Register dst_oop); - - // oop manipulations - void load_heap_oop(const Address& s, Register d); - void load_heap_oop(Register s1, Register s2, Register d); - void load_heap_oop(Register s1, int simm13a, Register d); - void load_heap_oop(Register s1, RegisterOrConstant s2, Register d); - void store_heap_oop(Register d, Register s1, Register s2); - void store_heap_oop(Register d, Register s1, int simm13a); - void store_heap_oop(Register d, const Address& a, int offset = 0); - - void encode_heap_oop(Register src, Register dst); - void encode_heap_oop(Register r) { - encode_heap_oop(r, r); - } - void decode_heap_oop(Register src, Register dst); - void decode_heap_oop(Register r) { - decode_heap_oop(r, r); - } - void encode_heap_oop_not_null(Register r); - void decode_heap_oop_not_null(Register r); - void encode_heap_oop_not_null(Register src, Register dst); - void decode_heap_oop_not_null(Register src, Register dst); - - void encode_klass_not_null(Register r); - void decode_klass_not_null(Register r); - void encode_klass_not_null(Register src, Register dst); - void decode_klass_not_null(Register src, Register dst); - - // Support for managing the JavaThread pointer (i.e.; the reference to - // thread-local information). - void get_thread(); // load G2_thread - void verify_thread(); // verify G2_thread contents - void save_thread (const Register threache); // save to cache - void restore_thread(const Register thread_cache); // restore from cache - - // Support for last Java frame (but use call_VM instead where possible) - void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); - void reset_last_Java_frame(void); - - // Call into the VM. - // Passes the thread pointer (in O0) as a prepended argument. - // Makes sure oop return values are visible to the GC. - void call_VM(Register oop_result, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); - void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); - void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); - void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); - - // these overloadings are not presently used on SPARC: - void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); - void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); - void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); - void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); - - void call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments = 0); - void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1); - void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2); - void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3); - - void get_vm_result (Register oop_result); - void get_vm_result_2(Register metadata_result); - - // vm result is currently getting hijacked to for oop preservation - void set_vm_result(Register oop_result); - - // Emit the CompiledIC call idiom - void ic_call(address entry, bool emit_delay = true); - - // if call_VM_base was called with check_exceptions=false, then call - // check_and_forward_exception to handle exceptions when it is safe - void check_and_forward_exception(Register scratch_reg); - - private: - // For V8 - void read_ccr_trap(Register ccr_save); - void write_ccr_trap(Register ccr_save1, Register scratch1, Register scratch2); - -#ifdef ASSERT - // For V8 debugging. Uses V8 instruction sequence and checks - // result with V9 insturctions rdccr and wrccr. - // Uses Gscatch and Gscatch2 - void read_ccr_v8_assert(Register ccr_save); - void write_ccr_v8_assert(Register ccr_save); -#endif // ASSERT - - public: - - // Write to card table for - register is destroyed afterwards. - void card_table_write(jbyte* byte_map_base, Register tmp, Register obj); - - void card_write_barrier_post(Register store_addr, Register new_val, Register tmp); - -#ifndef SERIALGC - // General G1 pre-barrier generator. - void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs); - - // General G1 post-barrier generator - void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp); -#endif // SERIALGC - - // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack - void push_fTOS(); - - // pops double TOS element from CPU stack and pushes on FPU stack - void pop_fTOS(); - - void empty_FPU_stack(); - - void push_IU_state(); - void pop_IU_state(); - - void push_FPU_state(); - void pop_FPU_state(); - - void push_CPU_state(); - void pop_CPU_state(); - - // if heap base register is used - reinit it with the correct value - void reinit_heapbase(); - - // Debugging - void _verify_oop(Register reg, const char * msg, const char * file, int line); - void _verify_oop_addr(Address addr, const char * msg, const char * file, int line); - - // TODO: verify_method and klass metadata (compare against vptr?) - void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} - void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} - -#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__) -#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr ", __FILE__, __LINE__) -#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) -#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) - - // only if +VerifyOops - void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); - // only if +VerifyFPU - void stop(const char* msg); // prints msg, dumps registers and stops execution - void warn(const char* msg); // prints msg, but don't stop - void untested(const char* what = ""); - void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } - void should_not_reach_here() { stop("should not reach here"); } - void print_CPU_state(); - - // oops in code - AddressLiteral allocate_oop_address(jobject obj); // allocate_index - AddressLiteral constant_oop_address(jobject obj); // find_index - inline void set_oop (jobject obj, Register d); // uses allocate_oop_address - inline void set_oop_constant (jobject obj, Register d); // uses constant_oop_address - inline void set_oop (const AddressLiteral& obj_addr, Register d); // same as load_address - - // metadata in code that we have to keep track of - AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index - AddressLiteral constant_metadata_address(Metadata* obj); // find_index - inline void set_metadata (Metadata* obj, Register d); // uses allocate_metadata_address - inline void set_metadata_constant (Metadata* obj, Register d); // uses constant_metadata_address - inline void set_metadata (const AddressLiteral& obj_addr, Register d); // same as load_address - - void set_narrow_oop( jobject obj, Register d ); - void set_narrow_klass( Klass* k, Register d ); - - // nop padding - void align(int modulus); - - // declare a safepoint - void safepoint(); - - // factor out part of stop into subroutine to save space - void stop_subroutine(); - // factor out part of verify_oop into subroutine to save space - void verify_oop_subroutine(); - - // side-door communication with signalHandler in os_solaris.cpp - static address _verify_oop_implicit_branch[3]; - -#ifndef PRODUCT - static void test(); -#endif - - int total_frame_size_in_bytes(int extraWords); - - // used when extraWords known statically - void save_frame(int extraWords = 0); - void save_frame_c1(int size_in_bytes); - // make a frame, and simultaneously pass up one or two register value - // into the new register window - void save_frame_and_mov(int extraWords, Register s1, Register d1, Register s2 = Register(), Register d2 = Register()); - - // give no. (outgoing) params, calc # of words will need on frame - void calc_mem_param_words(Register Rparam_words, Register Rresult); - - // used to calculate frame size dynamically - // result is in bytes and must be negated for save inst - void calc_frame_size(Register extraWords, Register resultReg); - - // calc and also save - void calc_frame_size_and_save(Register extraWords, Register resultReg); - - static void debug(char* msg, RegistersForDebugging* outWindow); - - // implementations of bytecodes used by both interpreter and compiler - - void lcmp( Register Ra_hi, Register Ra_low, - Register Rb_hi, Register Rb_low, - Register Rresult); - - void lneg( Register Rhi, Register Rlow ); - - void lshl( Register Rin_high, Register Rin_low, Register Rcount, - Register Rout_high, Register Rout_low, Register Rtemp ); - - void lshr( Register Rin_high, Register Rin_low, Register Rcount, - Register Rout_high, Register Rout_low, Register Rtemp ); - - void lushr( Register Rin_high, Register Rin_low, Register Rcount, - Register Rout_high, Register Rout_low, Register Rtemp ); - -#ifdef _LP64 - void lcmp( Register Ra, Register Rb, Register Rresult); -#endif - - // Load and store values by size and signed-ness - void load_sized_value( Address src, Register dst, size_t size_in_bytes, bool is_signed); - void store_sized_value(Register src, Address dst, size_t size_in_bytes); - - void float_cmp( bool is_float, int unordered_result, - FloatRegister Fa, FloatRegister Fb, - Register Rresult); - - void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); - void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { Assembler::fneg(w, sd); } - void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); - void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); - - void save_all_globals_into_locals(); - void restore_globals_from_locals(); - - void casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, - address lock_addr=0, bool use_call_vm=false); - void cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, - address lock_addr=0, bool use_call_vm=false); - void casn (Register addr_reg, Register cmp_reg, Register set_reg) ; - - // These set the icc condition code to equal if the lock succeeded - // and notEqual if it failed and requires a slow case - void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, - Register Rscratch, - BiasedLockingCounters* counters = NULL, - bool try_bias = UseBiasedLocking); - void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, - Register Rscratch, - bool try_bias = UseBiasedLocking); - - // Biased locking support - // Upon entry, lock_reg must point to the lock record on the stack, - // obj_reg must contain the target object, and mark_reg must contain - // the target object's header. - // Destroys mark_reg if an attempt is made to bias an anonymously - // biased lock. In this case a failure will go either to the slow - // case or fall through with the notEqual condition code set with - // the expectation that the slow case in the runtime will be called. - // In the fall-through case where the CAS-based lock is done, - // mark_reg is not destroyed. - void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, - Label& done, Label* slow_case = NULL, - BiasedLockingCounters* counters = NULL); - // Upon entry, the base register of mark_addr must contain the oop. - // Destroys temp_reg. - - // If allow_delay_slot_filling is set to true, the next instruction - // emitted after this one will go in an annulled delay slot if the - // biased locking exit case failed. - void biased_locking_exit(Address mark_addr, Register temp_reg, Label& done, bool allow_delay_slot_filling = false); - - // allocation - void eden_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Register t2, // temp register - Label& slow_case // continuation point if fast allocation fails - ); - void tlab_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Label& slow_case // continuation point if fast allocation fails - ); - void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); - void incr_allocated_bytes(RegisterOrConstant size_in_bytes, - Register t1, Register t2); - - // interface method calling - void lookup_interface_method(Register recv_klass, - Register intf_klass, - RegisterOrConstant itable_index, - Register method_result, - Register temp_reg, Register temp2_reg, - Label& no_such_interface); - - // virtual method calling - void lookup_virtual_method(Register recv_klass, - RegisterOrConstant vtable_index, - Register method_result); - - // Test sub_klass against super_klass, with fast and slow paths. - - // The fast path produces a tri-state answer: yes / no / maybe-slow. - // One of the three labels can be NULL, meaning take the fall-through. - // If super_check_offset is -1, the value is loaded up from super_klass. - // No registers are killed, except temp_reg and temp2_reg. - // If super_check_offset is not -1, temp2_reg is not used and can be noreg. - void check_klass_subtype_fast_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - Label* L_slow_path, - RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); - - // The rest of the type check; must be wired to a corresponding fast path. - // It does not repeat the fast path logic, so don't use it standalone. - // The temp_reg can be noreg, if no temps are available. - // It can also be sub_klass or super_klass, meaning it's OK to kill that one. - // Updates the sub's secondary super cache as necessary. - void check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Register temp3_reg, - Register temp4_reg, - Label* L_success, - Label* L_failure); - - // Simplified, combined version, good for typical uses. - // Falls through on failure. - void check_klass_subtype(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label& L_success); - - // method handles (JSR 292) - // offset relative to Gargs of argument at tos[arg_slot]. - // (arg_slot == 0 means the last argument, not the first). - RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, - Register temp_reg, - int extra_slot_offset = 0); - // Address of Gargs and argument_offset. - Address argument_address(RegisterOrConstant arg_slot, - Register temp_reg = noreg, - int extra_slot_offset = 0); - - // Stack overflow checking - - // Note: this clobbers G3_scratch - void bang_stack_with_offset(int offset) { - // stack grows down, caller passes positive offset - assert(offset > 0, "must bang with negative offset"); - set((-offset)+STACK_BIAS, G3_scratch); - st(G0, SP, G3_scratch); - } - - // Writes to stack successive pages until offset reached to check for - // stack overflow + shadow pages. Clobbers tsp and scratch registers. - void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch); - - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); - - void verify_tlab(); - - Condition negate_condition(Condition cond); - - // Helper functions for statistics gathering. - // Conditionally (non-atomically) increments passed counter address, preserving condition codes. - void cond_inc(Condition cond, address counter_addr, Register Rtemp1, Register Rtemp2); - // Unconditional increment. - void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2); - void inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2); - - // Compare char[] arrays aligned to 4 bytes. - void char_arrays_equals(Register ary1, Register ary2, - Register limit, Register result, - Register chr1, Register chr2, Label& Ldone); - // Use BIS for zeroing - void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); - -#undef VIRTUAL - -}; - -/** - * class SkipIfEqual: - * - * Instantiating this class will result in assembly code being output that will - * jump around any code emitted between the creation of the instance and it's - * automatic destruction at the end of a scope block, depending on the value of - * the flag passed to the constructor, which will be checked at run-time. - */ -class SkipIfEqual : public StackObj { - private: - MacroAssembler* _masm; - Label _label; - - public: - // 'temp' is a temp register that this object can use (and trash) - SkipIfEqual(MacroAssembler*, Register temp, - const bool* flag_addr, Assembler::Condition condition); - ~SkipIfEqual(); -}; - -#ifdef ASSERT -// On RISC, there's no benefit to verifying instruction boundaries. -inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } -#endif - #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_HPP diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp index 63359d511eb..fac53f2cfd7 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp @@ -25,33 +25,8 @@ #ifndef CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP #define CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP -#include "asm/assembler.inline.hpp" -#include "asm/codeBuffer.hpp" -#include "code/codeCache.hpp" -#include "runtime/handles.inline.hpp" +#include "asm/assembler.hpp" -inline void MacroAssembler::pd_patch_instruction(address branch, address target) { - jint& stub_inst = *(jint*) branch; - stub_inst = patched_branch(target - branch, stub_inst, 0); -} - -#ifndef PRODUCT -inline void MacroAssembler::pd_print_patched_instruction(address branch) { - jint stub_inst = *(jint*) branch; - print_instruction(stub_inst); - ::tty->print("%s", " (unresolved)"); -} -#endif // PRODUCT - -inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); } - - -inline int AddressLiteral::low10() const { - return Assembler::low10(value()); -} - - -// inlines for SPARC assembler -- dmu 5/97 inline void Assembler::check_delay() { # ifdef CHECK_DELAY @@ -76,9 +51,8 @@ inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { } -inline void Assembler::add(Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); } -inline void Assembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rtype ); } -inline void Assembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { emit_data( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec ); } +inline void Assembler::add(Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); } +inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); cti(); emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt); has_delay_slot(); } inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); } @@ -111,16 +85,9 @@ inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti(); emit_long( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti(); emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); has_delay_slot(); } -inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) { - if (s2.is_register()) ldf(w, s1, s2.as_register(), d); - else ldf(w, s1, s2.as_constant(), d); -} - inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); } inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); } -inline void Assembler::ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset) { relocate(a.rspec(offset)); ldf( w, a.base(), a.disp() + offset, d); } - inline void Assembler::ldfsr( Register s1, Register s2) { v9_dep(); emit_long( op(ldst_op) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::ldfsr( Register s1, int simm13a) { v9_dep(); emit_data( op(ldst_op) | op3(ldfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } inline void Assembler::ldxfsr( Register s1, Register s2) { v9_only(); emit_long( op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2) ); } @@ -152,98 +119,9 @@ inline void Assembler::ldx( Register s1, int simm13a, Register d) { v9_only(); inline void Assembler::ldd( Register s1, Register s2, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::ldd( Register s1, int simm13a, Register d) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -#ifdef _LP64 -// Make all 32 bit loads signed so 64 bit registers maintain proper sign -inline void Assembler::ld( Register s1, Register s2, Register d) { ldsw( s1, s2, d); } -inline void Assembler::ld( Register s1, int simm13a, Register d) { ldsw( s1, simm13a, d); } -#else -inline void Assembler::ld( Register s1, Register s2, Register d) { lduw( s1, s2, d); } -inline void Assembler::ld( Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); } -#endif - -#ifdef ASSERT - // ByteSize is only a class when ASSERT is defined, otherwise it's an int. -# ifdef _LP64 -inline void Assembler::ld( Register s1, ByteSize simm13a, Register d) { ldsw( s1, in_bytes(simm13a), d); } -# else -inline void Assembler::ld( Register s1, ByteSize simm13a, Register d) { lduw( s1, in_bytes(simm13a), d); } -# endif -#endif - -inline void Assembler::ld( const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ld( a.base(), a.index(), d); } - else { ld( a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldsb(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldsb(a.base(), a.index(), d); } - else { ldsb(a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldsh(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldsh(a.base(), a.index(), d); } - else { ldsh(a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldsw(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldsw(a.base(), a.index(), d); } - else { ldsw(a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldub(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldub(a.base(), a.index(), d); } - else { ldub(a.base(), a.disp() + offset, d); } -} -inline void Assembler::lduh(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); lduh(a.base(), a.index(), d); } - else { lduh(a.base(), a.disp() + offset, d); } -} -inline void Assembler::lduw(const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); lduw(a.base(), a.index(), d); } - else { lduw(a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldd( const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldd( a.base(), a.index(), d); } - else { ldd( a.base(), a.disp() + offset, d); } -} -inline void Assembler::ldx( const Address& a, Register d, int offset) { - if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(), d); } - else { ldx( a.base(), a.disp() + offset, d); } -} - -inline void Assembler::ldub(Register s1, RegisterOrConstant s2, Register d) { ldub(Address(s1, s2), d); } -inline void Assembler::ldsb(Register s1, RegisterOrConstant s2, Register d) { ldsb(Address(s1, s2), d); } -inline void Assembler::lduh(Register s1, RegisterOrConstant s2, Register d) { lduh(Address(s1, s2), d); } -inline void Assembler::ldsh(Register s1, RegisterOrConstant s2, Register d) { ldsh(Address(s1, s2), d); } -inline void Assembler::lduw(Register s1, RegisterOrConstant s2, Register d) { lduw(Address(s1, s2), d); } -inline void Assembler::ldsw(Register s1, RegisterOrConstant s2, Register d) { ldsw(Address(s1, s2), d); } -inline void Assembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); } -inline void Assembler::ld( Register s1, RegisterOrConstant s2, Register d) { ld( Address(s1, s2), d); } -inline void Assembler::ldd( Register s1, RegisterOrConstant s2, Register d) { ldd( Address(s1, s2), d); } - -// form effective addresses this way: -inline void Assembler::add(const Address& a, Register d, int offset) { - if (a.has_index()) add(a.base(), a.index(), d); - else { add(a.base(), a.disp() + offset, d, a.rspec(offset)); offset = 0; } - if (offset != 0) add(d, offset, d); -} -inline void Assembler::add(Register s1, RegisterOrConstant s2, Register d, int offset) { - if (s2.is_register()) add(s1, s2.as_register(), d); - else { add(s1, s2.as_constant() + offset, d); offset = 0; } - if (offset != 0) add(d, offset, d); -} - -inline void Assembler::andn(Register s1, RegisterOrConstant s2, Register d) { - if (s2.is_register()) andn(s1, s2.as_register(), d); - else andn(s1, s2.as_constant(), d); -} - inline void Assembler::ldstub( Register s1, Register s2, Register d) { emit_long( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::ldstub( Register s1, int simm13a, Register d) { emit_data( op(ldst_op) | rd(d) | op3(ldstub_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } - -inline void Assembler::prefetch(Register s1, Register s2, PrefetchFcn f) { v9_only(); emit_long( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | rs2(s2) ); } -inline void Assembler::prefetch(Register s1, int simm13a, PrefetchFcn f) { v9_only(); emit_data( op(ldst_op) | fcn(f) | op3(prefetch_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } - -inline void Assembler::prefetch(const Address& a, PrefetchFcn f, int offset) { v9_only(); relocate(a.rspec(offset)); prefetch(a.base(), a.disp() + offset, f); } - - inline void Assembler::rett( Register s1, Register s2 ) { cti(); emit_long( op(arith_op) | op3(rett_op3) | rs1(s1) | rs2(s2)); has_delay_slot(); } inline void Assembler::rett( Register s1, int simm13a, relocInfo::relocType rt) { cti(); emit_data( op(arith_op) | op3(rett_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rt); has_delay_slot(); } @@ -251,20 +129,9 @@ inline void Assembler::sethi( int imm22a, Register d, RelocationHolder const& rs // pp 222 -inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2) { - if (s2.is_register()) stf(w, d, s1, s2.as_register()); - else stf(w, d, s1, s2.as_constant()); -} - inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2) { emit_long( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | rs2(s2) ); } inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::stf( FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset) { - relocate(a.rspec(offset)); - if (a.has_index()) { assert(offset == 0, ""); stf(w, d, a.base(), a.index() ); } - else { stf(w, d, a.base(), a.disp() + offset); } -} - inline void Assembler::stfsr( Register s1, Register s2) { v9_dep(); emit_long( op(ldst_op) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::stfsr( Register s1, int simm13a) { v9_dep(); emit_data( op(ldst_op) | op3(stfsr_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } inline void Assembler::stxfsr( Register s1, Register s2) { v9_only(); emit_long( op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2) ); } @@ -285,46 +152,6 @@ inline void Assembler::stx( Register d, Register s1, int simm13a) { v9_only(); inline void Assembler::std( Register d, Register s1, Register s2) { v9_dep(); assert(d->is_even(), "not even"); emit_long( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::std( Register d, Register s1, int simm13a) { v9_dep(); assert(d->is_even(), "not even"); emit_data( op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::st( Register d, Register s1, Register s2) { stw(d, s1, s2); } -inline void Assembler::st( Register d, Register s1, int simm13a) { stw(d, s1, simm13a); } - -#ifdef ASSERT -// ByteSize is only a class when ASSERT is defined, otherwise it's an int. -inline void Assembler::st( Register d, Register s1, ByteSize simm13a) { stw(d, s1, in_bytes(simm13a)); } -#endif - -inline void Assembler::stb(Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); stb(d, a.base(), a.index() ); } - else { stb(d, a.base(), a.disp() + offset); } -} -inline void Assembler::sth(Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); sth(d, a.base(), a.index() ); } - else { sth(d, a.base(), a.disp() + offset); } -} -inline void Assembler::stw(Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); stw(d, a.base(), a.index() ); } - else { stw(d, a.base(), a.disp() + offset); } -} -inline void Assembler::st( Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); st( d, a.base(), a.index() ); } - else { st( d, a.base(), a.disp() + offset); } -} -inline void Assembler::std(Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); std(d, a.base(), a.index() ); } - else { std(d, a.base(), a.disp() + offset); } -} -inline void Assembler::stx(Register d, const Address& a, int offset) { - if (a.has_index()) { assert(offset == 0, ""); stx(d, a.base(), a.index() ); } - else { stx(d, a.base(), a.disp() + offset); } -} - -inline void Assembler::stb(Register d, Register s1, RegisterOrConstant s2) { stb(d, Address(s1, s2)); } -inline void Assembler::sth(Register d, Register s1, RegisterOrConstant s2) { sth(d, Address(s1, s2)); } -inline void Assembler::stw(Register d, Register s1, RegisterOrConstant s2) { stw(d, Address(s1, s2)); } -inline void Assembler::stx(Register d, Register s1, RegisterOrConstant s2) { stx(d, Address(s1, s2)); } -inline void Assembler::std(Register d, Register s1, RegisterOrConstant s2) { std(d, Address(s1, s2)); } -inline void Assembler::st( Register d, Register s1, RegisterOrConstant s2) { st( d, Address(s1, s2)); } - // v8 p 99 inline void Assembler::stc( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stc_op3 ) | rs1(s1) | rs2(s2) ); } @@ -336,561 +163,9 @@ inline void Assembler::stcsr( int crd, Register s1, int simm13a) { v8_only(); inline void Assembler::stdcq( int crd, Register s1, Register s2) { v8_only(); emit_long( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::stdcq( int crd, Register s1, int simm13a) { v8_only(); emit_data( op(ldst_op) | fcn(crd) | op3(stdcq_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::sub(Register s1, RegisterOrConstant s2, Register d, int offset) { - if (s2.is_register()) sub(s1, s2.as_register(), d); - else { sub(s1, s2.as_constant() + offset, d); offset = 0; } - if (offset != 0) sub(d, offset, d); -} - // pp 231 inline void Assembler::swap( Register s1, Register s2, Register d) { v9_dep(); emit_long( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | rs2(s2) ); } inline void Assembler::swap( Register s1, int simm13a, Register d) { v9_dep(); emit_data( op(ldst_op) | rd(d) | op3(swap_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); } -inline void Assembler::swap( Address& a, Register d, int offset ) { - relocate(a.rspec(offset)); - if (a.has_index()) { assert(offset == 0, ""); swap( a.base(), a.index(), d ); } - else { swap( a.base(), a.disp() + offset, d ); } -} - - -// Use the right loads/stores for the platform -inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, s2, d); -#else - Assembler::ld( s1, s2, d); -#endif -} - -inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, simm13a, d); -#else - Assembler::ld( s1, simm13a, d); -#endif -} - -#ifdef ASSERT -// ByteSize is only a class when ASSERT is defined, otherwise it's an int. -inline void MacroAssembler::ld_ptr( Register s1, ByteSize simm13a, Register d ) { - ld_ptr(s1, in_bytes(simm13a), d); -} -#endif - -inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, s2, d); -#else - Assembler::ld( s1, s2, d); -#endif -} - -inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) { -#ifdef _LP64 - Assembler::ldx(a, d, offset); -#else - Assembler::ld( a, d, offset); -#endif -} - -inline void MacroAssembler::st_ptr( Register d, Register s1, Register s2 ) { -#ifdef _LP64 - Assembler::stx(d, s1, s2); -#else - Assembler::st( d, s1, s2); -#endif -} - -inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) { -#ifdef _LP64 - Assembler::stx(d, s1, simm13a); -#else - Assembler::st( d, s1, simm13a); -#endif -} - -#ifdef ASSERT -// ByteSize is only a class when ASSERT is defined, otherwise it's an int. -inline void MacroAssembler::st_ptr( Register d, Register s1, ByteSize simm13a ) { - st_ptr(d, s1, in_bytes(simm13a)); -} -#endif - -inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) { -#ifdef _LP64 - Assembler::stx(d, s1, s2); -#else - Assembler::st( d, s1, s2); -#endif -} - -inline void MacroAssembler::st_ptr(Register d, const Address& a, int offset) { -#ifdef _LP64 - Assembler::stx(d, a, offset); -#else - Assembler::st( d, a, offset); -#endif -} - -// Use the right loads/stores for the platform -inline void MacroAssembler::ld_long( Register s1, Register s2, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, s2, d); -#else - Assembler::ldd(s1, s2, d); -#endif -} - -inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, simm13a, d); -#else - Assembler::ldd(s1, simm13a, d); -#endif -} - -inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) { -#ifdef _LP64 - Assembler::ldx(s1, s2, d); -#else - Assembler::ldd(s1, s2, d); -#endif -} - -inline void MacroAssembler::ld_long(const Address& a, Register d, int offset) { -#ifdef _LP64 - Assembler::ldx(a, d, offset); -#else - Assembler::ldd(a, d, offset); -#endif -} - -inline void MacroAssembler::st_long( Register d, Register s1, Register s2 ) { -#ifdef _LP64 - Assembler::stx(d, s1, s2); -#else - Assembler::std(d, s1, s2); -#endif -} - -inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) { -#ifdef _LP64 - Assembler::stx(d, s1, simm13a); -#else - Assembler::std(d, s1, simm13a); -#endif -} - -inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) { -#ifdef _LP64 - Assembler::stx(d, s1, s2); -#else - Assembler::std(d, s1, s2); -#endif -} - -inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) { -#ifdef _LP64 - Assembler::stx(d, a, offset); -#else - Assembler::std(d, a, offset); -#endif -} - -// Functions for isolating 64 bit shifts for LP64 - -inline void MacroAssembler::sll_ptr( Register s1, Register s2, Register d ) { -#ifdef _LP64 - Assembler::sllx(s1, s2, d); -#else - Assembler::sll( s1, s2, d); -#endif -} - -inline void MacroAssembler::sll_ptr( Register s1, int imm6a, Register d ) { -#ifdef _LP64 - Assembler::sllx(s1, imm6a, d); -#else - Assembler::sll( s1, imm6a, d); -#endif -} - -inline void MacroAssembler::srl_ptr( Register s1, Register s2, Register d ) { -#ifdef _LP64 - Assembler::srlx(s1, s2, d); -#else - Assembler::srl( s1, s2, d); -#endif -} - -inline void MacroAssembler::srl_ptr( Register s1, int imm6a, Register d ) { -#ifdef _LP64 - Assembler::srlx(s1, imm6a, d); -#else - Assembler::srl( s1, imm6a, d); -#endif -} - -inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) { - if (s2.is_register()) sll_ptr(s1, s2.as_register(), d); - else sll_ptr(s1, s2.as_constant(), d); -} - -// Use the right branch for the platform - -inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { - if (VM_Version::v9_instructions_work()) - Assembler::bp(c, a, icc, p, d, rt); - else - Assembler::br(c, a, d, rt); -} - -inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) { - br(c, a, p, target(L)); -} - - -// Branch that tests either xcc or icc depending on the -// architecture compiled (LP64 or not) -inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { -#ifdef _LP64 - Assembler::bp(c, a, xcc, p, d, rt); -#else - MacroAssembler::br(c, a, p, d, rt); -#endif -} - -inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) { - brx(c, a, p, target(L)); -} - -inline void MacroAssembler::ba( Label& L ) { - br(always, false, pt, L); -} - -// Warning: V9 only functions -inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { - Assembler::bp(c, a, cc, p, d, rt); -} - -inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { - Assembler::bp(c, a, cc, p, L); -} - -inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { - if (VM_Version::v9_instructions_work()) - fbp(c, a, fcc0, p, d, rt); - else - Assembler::fb(c, a, d, rt); -} - -inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) { - fb(c, a, p, target(L)); -} - -inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { - Assembler::fbp(c, a, cc, p, d, rt); -} - -inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { - Assembler::fbp(c, a, cc, p, L); -} - -inline void MacroAssembler::jmp( Register s1, Register s2 ) { jmpl( s1, s2, G0 ); } -inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); } - -inline bool MacroAssembler::is_far_target(address d) { - if (ForceUnreachable) { - // References outside the code cache should be treated as far - return d < CodeCache::low_bound() || d > CodeCache::high_bound(); - } - return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound()); -} - -// Call with a check to see if we need to deal with the added -// expense of relocation and if we overflow the displacement -// of the quick call instruction. -inline void MacroAssembler::call( address d, relocInfo::relocType rt ) { -#ifdef _LP64 - intptr_t disp; - // NULL is ok because it will be relocated later. - // Must change NULL to a reachable address in order to - // pass asserts here and in wdisp. - if ( d == NULL ) - d = pc(); - - // Is this address within range of the call instruction? - // If not, use the expensive instruction sequence - if (is_far_target(d)) { - relocate(rt); - AddressLiteral dest(d); - jumpl_to(dest, O7, O7); - } else { - Assembler::call(d, rt); - } -#else - Assembler::call( d, rt ); -#endif -} - -inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) { - MacroAssembler::call( target(L), rt); -} - - - -inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); } -inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); } - -// prefetch instruction -inline void MacroAssembler::iprefetch( address d, relocInfo::relocType rt ) { - if (VM_Version::v9_instructions_work()) - Assembler::bp( never, true, xcc, pt, d, rt ); -} -inline void MacroAssembler::iprefetch( Label& L) { iprefetch( target(L) ); } - - -// clobbers o7 on V8!! -// returns delta from gotten pc to addr after -inline int MacroAssembler::get_pc( Register d ) { - int x = offset(); - if (VM_Version::v9_instructions_work()) - rdpc(d); - else { - Label lbl; - Assembler::call(lbl, relocInfo::none); // No relocation as this is call to pc+0x8 - if (d == O7) delayed()->nop(); - else delayed()->mov(O7, d); - bind(lbl); - } - return offset() - x; -} - - -// Note: All MacroAssembler::set_foo functions are defined out-of-line. - - -// Loads the current PC of the following instruction as an immediate value in -// 2 instructions. All PCs in the CodeCache are within 2 Gig of each other. -inline intptr_t MacroAssembler::load_pc_address( Register reg, int bytes_to_skip ) { - intptr_t thepc = (intptr_t)pc() + 2*BytesPerInstWord + bytes_to_skip; -#ifdef _LP64 - Unimplemented(); -#else - Assembler::sethi( thepc & ~0x3ff, reg, internal_word_Relocation::spec((address)thepc)); - Assembler::add(reg,thepc & 0x3ff, reg, internal_word_Relocation::spec((address)thepc)); -#endif - return thepc; -} - - -inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) { - assert_not_delayed(); - if (ForceUnreachable) { - patchable_sethi(addrlit, d); - } else { - sethi(addrlit, d); - } - ld(d, addrlit.low10() + offset, d); -} - - -inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) { - assert_not_delayed(); - if (ForceUnreachable) { - patchable_sethi(addrlit, d); - } else { - sethi(addrlit, d); - } - ldub(d, addrlit.low10() + offset, d); -} - - -inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) { - assert_not_delayed(); - if (ForceUnreachable) { - patchable_sethi(addrlit, d); - } else { - sethi(addrlit, d); - } - ld_ptr(d, addrlit.low10() + offset, d); -} - - -inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { - assert_not_delayed(); - if (ForceUnreachable) { - patchable_sethi(addrlit, temp); - } else { - sethi(addrlit, temp); - } - st(s, temp, addrlit.low10() + offset); -} - - -inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { - assert_not_delayed(); - if (ForceUnreachable) { - patchable_sethi(addrlit, temp); - } else { - sethi(addrlit, temp); - } - st_ptr(s, temp, addrlit.low10() + offset); -} - - -// This code sequence is relocatable to any address, even on LP64. -inline void MacroAssembler::jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset) { - assert_not_delayed(); - // Force fixed length sethi because NativeJump and NativeFarCall don't handle - // variable length instruction streams. - patchable_sethi(addrlit, temp); - jmpl(temp, addrlit.low10() + offset, d); -} - - -inline void MacroAssembler::jump_to(const AddressLiteral& addrlit, Register temp, int offset) { - jumpl_to(addrlit, temp, G0, offset); -} - - -inline void MacroAssembler::jump_indirect_to(Address& a, Register temp, - int ld_offset, int jmp_offset) { - assert_not_delayed(); - //sethi(al); // sethi is caller responsibility for this one - ld_ptr(a, temp, ld_offset); - jmp(temp, jmp_offset); -} - - -inline void MacroAssembler::set_metadata(Metadata* obj, Register d) { - set_metadata(allocate_metadata_address(obj), d); -} - -inline void MacroAssembler::set_metadata_constant(Metadata* obj, Register d) { - set_metadata(constant_metadata_address(obj), d); -} - -inline void MacroAssembler::set_metadata(const AddressLiteral& obj_addr, Register d) { - assert(obj_addr.rspec().type() == relocInfo::metadata_type, "must be a metadata reloc"); - set(obj_addr, d); -} - -inline void MacroAssembler::set_oop(jobject obj, Register d) { - set_oop(allocate_oop_address(obj), d); -} - - -inline void MacroAssembler::set_oop_constant(jobject obj, Register d) { - set_oop(constant_oop_address(obj), d); -} - - -inline void MacroAssembler::set_oop(const AddressLiteral& obj_addr, Register d) { - assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); - set(obj_addr, d); -} - - -inline void MacroAssembler::load_argument( Argument& a, Register d ) { - if (a.is_register()) - mov(a.as_register(), d); - else - ld (a.as_address(), d); -} - -inline void MacroAssembler::store_argument( Register s, Argument& a ) { - if (a.is_register()) - mov(s, a.as_register()); - else - st_ptr (s, a.as_address()); // ABI says everything is right justified. -} - -inline void MacroAssembler::store_ptr_argument( Register s, Argument& a ) { - if (a.is_register()) - mov(s, a.as_register()); - else - st_ptr (s, a.as_address()); -} - - -#ifdef _LP64 -inline void MacroAssembler::store_float_argument( FloatRegister s, Argument& a ) { - if (a.is_float_register()) -// V9 ABI has F1, F3, F5 are used to pass instead of O0, O1, O2 - fmov(FloatRegisterImpl::S, s, a.as_float_register() ); - else - // Floats are stored in the high half of the stack entry - // The low half is undefined per the ABI. - stf(FloatRegisterImpl::S, s, a.as_address(), sizeof(jfloat)); -} - -inline void MacroAssembler::store_double_argument( FloatRegister s, Argument& a ) { - if (a.is_float_register()) -// V9 ABI has D0, D2, D4 are used to pass instead of O0, O1, O2 - fmov(FloatRegisterImpl::D, s, a.as_double_register() ); - else - stf(FloatRegisterImpl::D, s, a.as_address()); -} - -inline void MacroAssembler::store_long_argument( Register s, Argument& a ) { - if (a.is_register()) - mov(s, a.as_register()); - else - stx(s, a.as_address()); -} -#endif - -inline void MacroAssembler::clrb( Register s1, Register s2) { stb( G0, s1, s2 ); } -inline void MacroAssembler::clrh( Register s1, Register s2) { sth( G0, s1, s2 ); } -inline void MacroAssembler::clr( Register s1, Register s2) { stw( G0, s1, s2 ); } -inline void MacroAssembler::clrx( Register s1, Register s2) { stx( G0, s1, s2 ); } - -inline void MacroAssembler::clrb( Register s1, int simm13a) { stb( G0, s1, simm13a); } -inline void MacroAssembler::clrh( Register s1, int simm13a) { sth( G0, s1, simm13a); } -inline void MacroAssembler::clr( Register s1, int simm13a) { stw( G0, s1, simm13a); } -inline void MacroAssembler::clrx( Register s1, int simm13a) { stx( G0, s1, simm13a); } - -// returns if membar generates anything, obviously this code should mirror -// membar below. -inline bool MacroAssembler::membar_has_effect( Membar_mask_bits const7a ) { - if( !os::is_MP() ) return false; // Not needed on single CPU - if( VM_Version::v9_instructions_work() ) { - const Membar_mask_bits effective_mask = - Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); - return (effective_mask != 0); - } else { - return true; - } -} - -inline void MacroAssembler::membar( Membar_mask_bits const7a ) { - // Uniprocessors do not need memory barriers - if (!os::is_MP()) return; - // Weakened for current Sparcs and TSO. See the v9 manual, sections 8.4.3, - // 8.4.4.3, a.31 and a.50. - if( VM_Version::v9_instructions_work() ) { - // Under TSO, setting bit 3, 2, or 0 is redundant, so the only value - // of the mmask subfield of const7a that does anything that isn't done - // implicitly is StoreLoad. - const Membar_mask_bits effective_mask = - Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); - if ( effective_mask != 0 ) { - Assembler::membar( effective_mask ); - } - } else { - // stbar is the closest there is on v8. Equivalent to membar(StoreStore). We - // do not issue the stbar because to my knowledge all v8 machines implement TSO, - // which guarantees that all stores behave as if an stbar were issued just after - // each one of them. On these machines, stbar ought to be a nop. There doesn't - // appear to be an equivalent of membar(StoreLoad) on v8: TSO doesn't require it, - // it can't be specified by stbar, nor have I come up with a way to simulate it. - // - // Addendum. Dave says that ldstub guarantees a write buffer flush to coherent - // space. Put one here to be on the safe side. - Assembler::ldstub(SP, 0, G0); - } -} - #endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP diff --git a/hotspot/src/cpu/sparc/vm/codeBuffer_sparc.hpp b/hotspot/src/cpu/sparc/vm/codeBuffer_sparc.hpp index f1a371acf1e..d3c0744d56c 100644 --- a/hotspot/src/cpu/sparc/vm/codeBuffer_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/codeBuffer_sparc.hpp @@ -31,9 +31,4 @@ private: public: void flush_bundle(bool start_new_bundle) {} - // Heuristic for pre-packing the pt/pn bit of a predicted branch. - bool is_backward_branch(Label& L) { - return L.is_bound() && insts_end() <= locator_address(L.loc()); - } - #endif // CPU_SPARC_VM_CODEBUFFER_SPARC_HPP diff --git a/hotspot/src/cpu/sparc/vm/frame_sparc.hpp b/hotspot/src/cpu/sparc/vm/frame_sparc.hpp index af2287d8a93..25bfea2a2c3 100644 --- a/hotspot/src/cpu/sparc/vm/frame_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/frame_sparc.hpp @@ -204,25 +204,6 @@ intptr_t* out_register_addr(Register reg) const { return younger_sp_addr_at(reg->after_save()->sp_offset_in_saved_window()); } - intptr_t* memory_param_addr(int param_ix, bool is_in) const { - int offset = callee_register_argument_save_area_sp_offset + param_ix; - if (is_in) - return fp_addr_at(offset); - else - return sp_addr_at(offset); - } - intptr_t* param_addr(int param_ix, bool is_in) const { - if (param_ix >= callee_register_argument_save_area_words) - return memory_param_addr(param_ix, is_in); - else if (is_in) - return register_addr(Argument(param_ix, true).as_register()); - else { - // the registers are stored in the next younger frame - // %%% is this really necessary? - ShouldNotReachHere(); - return NULL; - } - } // Interpreter frames @@ -269,12 +250,8 @@ #ifndef CC_INTERP // where Lmonitors is saved: - BasicObjectLock** interpreter_frame_monitors_addr() const { - return (BasicObjectLock**) sp_addr_at(Lmonitors->sp_offset_in_saved_window()); - } - intptr_t** interpreter_frame_esp_addr() const { - return (intptr_t**)sp_addr_at(Lesp->sp_offset_in_saved_window()); - } + inline BasicObjectLock** interpreter_frame_monitors_addr() const; + inline intptr_t** interpreter_frame_esp_addr() const; inline void interpreter_frame_set_tos_address(intptr_t* x); diff --git a/hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp b/hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp index 33b1b7f9598..71655f6d760 100644 --- a/hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp +++ b/hotspot/src/cpu/sparc/vm/frame_sparc.inline.hpp @@ -25,6 +25,8 @@ #ifndef CPU_SPARC_VM_FRAME_SPARC_INLINE_HPP #define CPU_SPARC_VM_FRAME_SPARC_INLINE_HPP +#include "asm/macroAssembler.hpp" + // Inline functions for SPARC frames: // Constructors @@ -185,6 +187,13 @@ inline intptr_t* frame::interpreter_frame_tos_address() const { return *interpreter_frame_esp_addr() + 1; } +inline BasicObjectLock** frame::interpreter_frame_monitors_addr() const { + return (BasicObjectLock**) sp_addr_at(Lmonitors->sp_offset_in_saved_window()); +} +inline intptr_t** frame::interpreter_frame_esp_addr() const { + return (intptr_t**)sp_addr_at(Lesp->sp_offset_in_saved_window()); +} + inline void frame::interpreter_frame_set_tos_address( intptr_t* x ) { *interpreter_frame_esp_addr() = x - 1; } diff --git a/hotspot/src/cpu/sparc/vm/icBuffer_sparc.cpp b/hotspot/src/cpu/sparc/vm/icBuffer_sparc.cpp index b72d4c76030..ee2b0380345 100644 --- a/hotspot/src/cpu/sparc/vm/icBuffer_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/icBuffer_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/icBuffer.hpp" #include "gc_interface/collectedHeap.inline.hpp" #include "interpreter/bytecodes.hpp" diff --git a/hotspot/src/cpu/sparc/vm/icache_sparc.cpp b/hotspot/src/cpu/sparc/vm/icache_sparc.cpp index af7a4c09b01..01914de00b4 100644 --- a/hotspot/src/cpu/sparc/vm/icache_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/icache_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/icache.hpp" #define __ _masm-> diff --git a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp index d06cccd8b5a..bf84848aa61 100644 --- a/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/interp_masm_sparc.hpp @@ -25,7 +25,7 @@ #ifndef CPU_SPARC_VM_INTERP_MASM_SPARC_HPP #define CPU_SPARC_VM_INTERP_MASM_SPARC_HPP -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/invocationCounter.hpp" // This file specializes the assember with interpreter-specific macros diff --git a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp index 42099468eda..132c5292a06 100644 --- a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/sparc/vm/jniFastGetField_sparc.cpp b/hotspot/src/cpu/sparc/vm/jniFastGetField_sparc.cpp index e31fa87b3aa..6d3f05a2aab 100644 --- a/hotspot/src/cpu/sparc/vm/jniFastGetField_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/jniFastGetField_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "memory/resourceArea.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp new file mode 100644 index 00000000000..2801a35a10d --- /dev/null +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp @@ -0,0 +1,4610 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#ifndef SERIALGC +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +// Convert the raw encoding form into the form expected by the +// constructor for Address. +Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) { + assert(scale == 0, "not supported"); + RelocationHolder rspec; + if (disp_reloc != relocInfo::none) { + rspec = Relocation::spec_simple(disp_reloc); + } + + Register rindex = as_Register(index); + if (rindex != G0) { + Address madr(as_Register(base), rindex); + madr._rspec = rspec; + return madr; + } else { + Address madr(as_Register(base), disp); + madr._rspec = rspec; + return madr; + } +} + +Address Argument::address_in_frame() const { + // Warning: In LP64 mode disp will occupy more than 10 bits, but + // op codes such as ld or ldx, only access disp() to get + // their simm13 argument. + int disp = ((_number - Argument::n_register_parameters + frame::memory_parameter_word_sp_offset) * BytesPerWord) + STACK_BIAS; + if (is_in()) + return Address(FP, disp); // In argument. + else + return Address(SP, disp); // Out argument. +} + +static const char* argumentNames[][2] = { + {"A0","P0"}, {"A1","P1"}, {"A2","P2"}, {"A3","P3"}, {"A4","P4"}, + {"A5","P5"}, {"A6","P6"}, {"A7","P7"}, {"A8","P8"}, {"A9","P9"}, + {"A(n>9)","P(n>9)"} +}; + +const char* Argument::name() const { + int nofArgs = sizeof argumentNames / sizeof argumentNames[0]; + int num = number(); + if (num >= nofArgs) num = nofArgs - 1; + return argumentNames[num][is_in() ? 1 : 0]; +} + +#ifdef ASSERT +// On RISC, there's no benefit to verifying instruction boundaries. +bool AbstractAssembler::pd_check_instruction_mark() { return false; } +#endif + + +void MacroAssembler::print_instruction(int inst) { + const char* s; + switch (inv_op(inst)) { + default: s = "????"; break; + case call_op: s = "call"; break; + case branch_op: + switch (inv_op2(inst)) { + case fb_op2: s = "fb"; break; + case fbp_op2: s = "fbp"; break; + case br_op2: s = "br"; break; + case bp_op2: s = "bp"; break; + case cb_op2: s = "cb"; break; + case bpr_op2: { + if (is_cbcond(inst)) { + s = is_cxb(inst) ? "cxb" : "cwb"; + } else { + s = "bpr"; + } + break; + } + default: s = "????"; break; + } + } + ::tty->print("%s", s); +} + + +// Patch instruction inst at offset inst_pos to refer to dest_pos +// and return the resulting instruction. +// We should have pcs, not offsets, but since all is relative, it will work out +// OK. +int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int m; // mask for displacement field + int v; // new value for displacement field + const int word_aligned_ones = -4; + switch (inv_op(inst)) { + default: ShouldNotReachHere(); + case call_op: m = wdisp(word_aligned_ones, 0, 30); v = wdisp(dest_pos, inst_pos, 30); break; + case branch_op: + switch (inv_op2(inst)) { + case fbp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; + case bp_op2: m = wdisp( word_aligned_ones, 0, 19); v = wdisp( dest_pos, inst_pos, 19); break; + case fb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case br_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case cb_op2: m = wdisp( word_aligned_ones, 0, 22); v = wdisp( dest_pos, inst_pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + m = wdisp10(word_aligned_ones, 0); + v = wdisp10(dest_pos, inst_pos); + } else { + m = wdisp16(word_aligned_ones, 0); + v = wdisp16(dest_pos, inst_pos); + } + break; + } + default: ShouldNotReachHere(); + } + } + return inst & ~m | v; +} + +// Return the offset of the branch destionation of instruction inst +// at offset pos. +// Should have pcs, but since all is relative, it works out. +int MacroAssembler::branch_destination(int inst, int pos) { + int r; + switch (inv_op(inst)) { + default: ShouldNotReachHere(); + case call_op: r = inv_wdisp(inst, pos, 30); break; + case branch_op: + switch (inv_op2(inst)) { + case fbp_op2: r = inv_wdisp( inst, pos, 19); break; + case bp_op2: r = inv_wdisp( inst, pos, 19); break; + case fb_op2: r = inv_wdisp( inst, pos, 22); break; + case br_op2: r = inv_wdisp( inst, pos, 22); break; + case cb_op2: r = inv_wdisp( inst, pos, 22); break; + case bpr_op2: { + if (is_cbcond(inst)) { + r = inv_wdisp10(inst, pos); + } else { + r = inv_wdisp16(inst, pos); + } + break; + } + default: ShouldNotReachHere(); + } + } + return r; +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check((intptr_t)offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any registers + ld_ptr(reg, 0, G0); + } + else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +// Ring buffer jumps + +#ifndef PRODUCT +void MacroAssembler::ret( bool trace ) { if (trace) { + mov(I7, O7); // traceable register + JMP(O7, 2 * BytesPerInstWord); + } else { + jmpl( I7, 2 * BytesPerInstWord, G0 ); + } + } + +void MacroAssembler::retl( bool trace ) { if (trace) JMP(O7, 2 * BytesPerInstWord); + else jmpl( O7, 2 * BytesPerInstWord, G0 ); } +#endif /* PRODUCT */ + + +void MacroAssembler::jmp2(Register r1, Register r2, const char* file, int line ) { + assert_not_delayed(); + // This can only be traceable if r1 & r2 are visible after a window save + if (TraceJumps) { +#ifndef PRODUCT + save_frame(0); + verify_thread(); + ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); + add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); + sll(O0, exact_log2(4*sizeof(intptr_t)), O2); + add(O2, O1, O1); + + add(r1->after_save(), r2->after_save(), O2); + set((intptr_t)file, O3); + set(line, O4); + Label L; + // get nearby pc, store jmp target + call(L, relocInfo::none); // No relocation for call to pc+0x8 + delayed()->st(O2, O1, 0); + bind(L); + + // store nearby pc + st(O7, O1, sizeof(intptr_t)); + // store file + st(O3, O1, 2*sizeof(intptr_t)); + // store line + st(O4, O1, 3*sizeof(intptr_t)); + add(O0, 1, O0); + and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); + st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); + restore(); +#endif /* PRODUCT */ + } + jmpl(r1, r2, G0); +} +void MacroAssembler::jmp(Register r1, int offset, const char* file, int line ) { + assert_not_delayed(); + // This can only be traceable if r1 is visible after a window save + if (TraceJumps) { +#ifndef PRODUCT + save_frame(0); + verify_thread(); + ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); + add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); + sll(O0, exact_log2(4*sizeof(intptr_t)), O2); + add(O2, O1, O1); + + add(r1->after_save(), offset, O2); + set((intptr_t)file, O3); + set(line, O4); + Label L; + // get nearby pc, store jmp target + call(L, relocInfo::none); // No relocation for call to pc+0x8 + delayed()->st(O2, O1, 0); + bind(L); + + // store nearby pc + st(O7, O1, sizeof(intptr_t)); + // store file + st(O3, O1, 2*sizeof(intptr_t)); + // store line + st(O4, O1, 3*sizeof(intptr_t)); + add(O0, 1, O0); + and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); + st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); + restore(); +#endif /* PRODUCT */ + } + jmp(r1, offset); +} + +// This code sequence is relocatable to any address, even on LP64. +void MacroAssembler::jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line) { + assert_not_delayed(); + // Force fixed length sethi because NativeJump and NativeFarCall don't handle + // variable length instruction streams. + patchable_sethi(addrlit, temp); + Address a(temp, addrlit.low10() + offset); // Add the offset to the displacement. + if (TraceJumps) { +#ifndef PRODUCT + // Must do the add here so relocation can find the remainder of the + // value to be relocated. + add(a.base(), a.disp(), a.base(), addrlit.rspec(offset)); + save_frame(0); + verify_thread(); + ld(G2_thread, in_bytes(JavaThread::jmp_ring_index_offset()), O0); + add(G2_thread, in_bytes(JavaThread::jmp_ring_offset()), O1); + sll(O0, exact_log2(4*sizeof(intptr_t)), O2); + add(O2, O1, O1); + + set((intptr_t)file, O3); + set(line, O4); + Label L; + + // get nearby pc, store jmp target + call(L, relocInfo::none); // No relocation for call to pc+0x8 + delayed()->st(a.base()->after_save(), O1, 0); + bind(L); + + // store nearby pc + st(O7, O1, sizeof(intptr_t)); + // store file + st(O3, O1, 2*sizeof(intptr_t)); + // store line + st(O4, O1, 3*sizeof(intptr_t)); + add(O0, 1, O0); + and3(O0, JavaThread::jump_ring_buffer_size - 1, O0); + st(O0, G2_thread, in_bytes(JavaThread::jmp_ring_index_offset())); + restore(); + jmpl(a.base(), G0, d); +#else + jmpl(a.base(), a.disp(), d); +#endif /* PRODUCT */ + } else { + jmpl(a.base(), a.disp(), d); + } +} + +void MacroAssembler::jump(const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line) { + jumpl(addrlit, temp, G0, offset, file, line); +} + + +// Conditional breakpoint (for assertion checks in assembly code) +void MacroAssembler::breakpoint_trap(Condition c, CC cc) { + trap(c, cc, G0, ST_RESERVED_FOR_USER_0); +} + +// We want to use ST_BREAKPOINT here, but the debugger is confused by it. +void MacroAssembler::breakpoint_trap() { + trap(ST_RESERVED_FOR_USER_0); +} + +// flush windows (except current) using flushw instruction if avail. +void MacroAssembler::flush_windows() { + if (VM_Version::v9_instructions_work()) flushw(); + else flush_windows_trap(); +} + +// Write serialization page so VM thread can do a pseudo remote membar +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp1, Register tmp2) { + srl(thread, os::get_serialize_page_shift_count(), tmp2); + if (Assembler::is_simm13(os::vm_page_size())) { + and3(tmp2, (os::vm_page_size() - sizeof(int)), tmp2); + } + else { + set((os::vm_page_size() - sizeof(int)), tmp1); + and3(tmp2, tmp1, tmp2); + } + set(os::get_memory_serialize_page(), tmp1); + st(G0, tmp1, tmp2); +} + + + +void MacroAssembler::enter() { + Unimplemented(); +} + +void MacroAssembler::leave() { + Unimplemented(); +} + +void MacroAssembler::mult(Register s1, Register s2, Register d) { + if(VM_Version::v9_instructions_work()) { + mulx (s1, s2, d); + } else { + smul (s1, s2, d); + } +} + +void MacroAssembler::mult(Register s1, int simm13a, Register d) { + if(VM_Version::v9_instructions_work()) { + mulx (s1, simm13a, d); + } else { + smul (s1, simm13a, d); + } +} + + +#ifdef ASSERT +void MacroAssembler::read_ccr_v8_assert(Register ccr_save) { + const Register s1 = G3_scratch; + const Register s2 = G4_scratch; + Label get_psr_test; + // Get the condition codes the V8 way. + read_ccr_trap(s1); + mov(ccr_save, s2); + // This is a test of V8 which has icc but not xcc + // so mask off the xcc bits + and3(s2, 0xf, s2); + // Compare condition codes from the V8 and V9 ways. + subcc(s2, s1, G0); + br(Assembler::notEqual, true, Assembler::pt, get_psr_test); + delayed()->breakpoint_trap(); + bind(get_psr_test); +} + +void MacroAssembler::write_ccr_v8_assert(Register ccr_save) { + const Register s1 = G3_scratch; + const Register s2 = G4_scratch; + Label set_psr_test; + // Write out the saved condition codes the V8 way + write_ccr_trap(ccr_save, s1, s2); + // Read back the condition codes using the V9 instruction + rdccr(s1); + mov(ccr_save, s2); + // This is a test of V8 which has icc but not xcc + // so mask off the xcc bits + and3(s2, 0xf, s2); + and3(s1, 0xf, s1); + // Compare the V8 way with the V9 way. + subcc(s2, s1, G0); + br(Assembler::notEqual, true, Assembler::pt, set_psr_test); + delayed()->breakpoint_trap(); + bind(set_psr_test); +} +#else +#define read_ccr_v8_assert(x) +#define write_ccr_v8_assert(x) +#endif // ASSERT + +void MacroAssembler::read_ccr(Register ccr_save) { + if (VM_Version::v9_instructions_work()) { + rdccr(ccr_save); + // Test code sequence used on V8. Do not move above rdccr. + read_ccr_v8_assert(ccr_save); + } else { + read_ccr_trap(ccr_save); + } +} + +void MacroAssembler::write_ccr(Register ccr_save) { + if (VM_Version::v9_instructions_work()) { + // Test code sequence used on V8. Do not move below wrccr. + write_ccr_v8_assert(ccr_save); + wrccr(ccr_save); + } else { + const Register temp_reg1 = G3_scratch; + const Register temp_reg2 = G4_scratch; + write_ccr_trap(ccr_save, temp_reg1, temp_reg2); + } +} + + +// Calls to C land + +#ifdef ASSERT +// a hook for debugging +static Thread* reinitialize_thread() { + return ThreadLocalStorage::thread(); +} +#else +#define reinitialize_thread ThreadLocalStorage::thread +#endif + +#ifdef ASSERT +address last_get_thread = NULL; +#endif + +// call this when G2_thread is not known to be valid +void MacroAssembler::get_thread() { + save_frame(0); // to avoid clobbering O0 + mov(G1, L0); // avoid clobbering G1 + mov(G5_method, L1); // avoid clobbering G5 + mov(G3, L2); // avoid clobbering G3 also + mov(G4, L5); // avoid clobbering G4 +#ifdef ASSERT + AddressLiteral last_get_thread_addrlit(&last_get_thread); + set(last_get_thread_addrlit, L3); + inc(L4, get_pc(L4) + 2 * BytesPerInstWord); // skip getpc() code + inc + st_ptr to point L4 at call + st_ptr(L4, L3, 0); +#endif + call(CAST_FROM_FN_PTR(address, reinitialize_thread), relocInfo::runtime_call_type); + delayed()->nop(); + mov(L0, G1); + mov(L1, G5_method); + mov(L2, G3); + mov(L5, G4); + restore(O0, 0, G2_thread); +} + +static Thread* verify_thread_subroutine(Thread* gthread_value) { + Thread* correct_value = ThreadLocalStorage::thread(); + guarantee(gthread_value == correct_value, "G2_thread value must be the thread"); + return correct_value; +} + +void MacroAssembler::verify_thread() { + if (VerifyThread) { + // NOTE: this chops off the heads of the 64-bit O registers. +#ifdef CC_INTERP + save_frame(0); +#else + // make sure G2_thread contains the right value + save_frame_and_mov(0, Lmethod, Lmethod); // to avoid clobbering O0 (and propagate Lmethod for -Xprof) + mov(G1, L1); // avoid clobbering G1 + // G2 saved below + mov(G3, L3); // avoid clobbering G3 + mov(G4, L4); // avoid clobbering G4 + mov(G5_method, L5); // avoid clobbering G5_method +#endif /* CC_INTERP */ +#if defined(COMPILER2) && !defined(_LP64) + // Save & restore possible 64-bit Long arguments in G-regs + srlx(G1,32,L0); + srlx(G4,32,L6); +#endif + call(CAST_FROM_FN_PTR(address,verify_thread_subroutine), relocInfo::runtime_call_type); + delayed()->mov(G2_thread, O0); + + mov(L1, G1); // Restore G1 + // G2 restored below + mov(L3, G3); // restore G3 + mov(L4, G4); // restore G4 + mov(L5, G5_method); // restore G5_method +#if defined(COMPILER2) && !defined(_LP64) + // Save & restore possible 64-bit Long arguments in G-regs + sllx(L0,32,G2); // Move old high G1 bits high in G2 + srl(G1, 0,G1); // Clear current high G1 bits + or3 (G1,G2,G1); // Recover 64-bit G1 + sllx(L6,32,G2); // Move old high G4 bits high in G2 + srl(G4, 0,G4); // Clear current high G4 bits + or3 (G4,G2,G4); // Recover 64-bit G4 +#endif + restore(O0, 0, G2_thread); + } +} + + +void MacroAssembler::save_thread(const Register thread_cache) { + verify_thread(); + if (thread_cache->is_valid()) { + assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); + mov(G2_thread, thread_cache); + } + if (VerifyThread) { + // smash G2_thread, as if the VM were about to anyway + set(0x67676767, G2_thread); + } +} + + +void MacroAssembler::restore_thread(const Register thread_cache) { + if (thread_cache->is_valid()) { + assert(thread_cache->is_local() || thread_cache->is_in(), "bad volatile"); + mov(thread_cache, G2_thread); + verify_thread(); + } else { + // do it the slow way + get_thread(); + } +} + + +// %%% maybe get rid of [re]set_last_Java_frame +void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_Java_pc) { + assert_not_delayed(); + Address flags(G2_thread, JavaThread::frame_anchor_offset() + + JavaFrameAnchor::flags_offset()); + Address pc_addr(G2_thread, JavaThread::last_Java_pc_offset()); + + // Always set last_Java_pc and flags first because once last_Java_sp is visible + // has_last_Java_frame is true and users will look at the rest of the fields. + // (Note: flags should always be zero before we get here so doesn't need to be set.) + +#ifdef ASSERT + // Verify that flags was zeroed on return to Java + Label PcOk; + save_frame(0); // to avoid clobbering O0 + ld_ptr(pc_addr, L0); + br_null_short(L0, Assembler::pt, PcOk); + STOP("last_Java_pc not zeroed before leaving Java"); + bind(PcOk); + + // Verify that flags was zeroed on return to Java + Label FlagsOk; + ld(flags, L0); + tst(L0); + br(Assembler::zero, false, Assembler::pt, FlagsOk); + delayed() -> restore(); + STOP("flags not zeroed before leaving Java"); + bind(FlagsOk); +#endif /* ASSERT */ + // + // When returning from calling out from Java mode the frame anchor's last_Java_pc + // will always be set to NULL. It is set here so that if we are doing a call to + // native (not VM) that we capture the known pc and don't have to rely on the + // native call having a standard frame linkage where we can find the pc. + + if (last_Java_pc->is_valid()) { + st_ptr(last_Java_pc, pc_addr); + } + +#ifdef _LP64 +#ifdef ASSERT + // Make sure that we have an odd stack + Label StackOk; + andcc(last_java_sp, 0x01, G0); + br(Assembler::notZero, false, Assembler::pt, StackOk); + delayed()->nop(); + STOP("Stack Not Biased in set_last_Java_frame"); + bind(StackOk); +#endif // ASSERT + assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame"); + add( last_java_sp, STACK_BIAS, G4_scratch ); + st_ptr(G4_scratch, G2_thread, JavaThread::last_Java_sp_offset()); +#else + st_ptr(last_java_sp, G2_thread, JavaThread::last_Java_sp_offset()); +#endif // _LP64 +} + +void MacroAssembler::reset_last_Java_frame(void) { + assert_not_delayed(); + + Address sp_addr(G2_thread, JavaThread::last_Java_sp_offset()); + Address pc_addr(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); + Address flags (G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); + +#ifdef ASSERT + // check that it WAS previously set +#ifdef CC_INTERP + save_frame(0); +#else + save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod to helper frame for -Xprof +#endif /* CC_INTERP */ + ld_ptr(sp_addr, L0); + tst(L0); + breakpoint_trap(Assembler::zero, Assembler::ptr_cc); + restore(); +#endif // ASSERT + + st_ptr(G0, sp_addr); + // Always return last_Java_pc to zero + st_ptr(G0, pc_addr); + // Always null flags after return to Java + st(G0, flags); +} + + +void MacroAssembler::call_VM_base( + Register oop_result, + Register thread_cache, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) +{ + assert_not_delayed(); + + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = SP; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + + // 64-bit last_java_sp is biased! + set_last_Java_frame(last_java_sp, noreg); + if (VerifyThread) mov(G2_thread, O0); // about to be smashed; pass early + save_thread(thread_cache); + // do the call + call(entry_point, relocInfo::runtime_call_type); + if (!VerifyThread) + delayed()->mov(G2_thread, O0); // pass thread as first argument + else + delayed()->nop(); // (thread already passed) + restore_thread(thread_cache); + reset_last_Java_frame(); + + // check for pending exceptions. use Gtemp as scratch register. + if (check_exceptions) { + check_and_forward_exception(Gtemp); + } + +#ifdef ASSERT + set(badHeapWordVal, G3); + set(badHeapWordVal, G4); + set(badHeapWordVal, G5); +#endif + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result); + } +} + +void MacroAssembler::check_and_forward_exception(Register scratch_reg) +{ + Label L; + + check_and_handle_popframe(scratch_reg); + check_and_handle_earlyret(scratch_reg); + + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + ld_ptr(exception_addr, scratch_reg); + br_null_short(scratch_reg, pt, L); + // we use O7 linkage so that forward_exception_entry has the issuing PC + call(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); + delayed()->nop(); + bind(L); +} + + +void MacroAssembler::check_and_handle_popframe(Register scratch_reg) { +} + + +void MacroAssembler::check_and_handle_earlyret(Register scratch_reg) { +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + call_VM(oop_result, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + call_VM(oop_result, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); + call_VM(oop_result, entry_point, 3, check_exceptions); +} + + + +// Note: The following call_VM overloadings are useful when a "save" +// has already been performed by a stub, and the last Java frame is +// the previous one. In that case, last_java_sp must be passed as FP +// instead of SP. + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) { + call_VM_base(oop_result, noreg, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + + +void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) { + // O0 is reserved for the thread + mov(arg_1, O1); + mov(arg_2, O2); assert(arg_2 != O1, "smashed argument"); + mov(arg_3, O3); assert(arg_3 != O1 && arg_3 != O2, "smashed argument"); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + + + +void MacroAssembler::call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments) { + assert_not_delayed(); + save_thread(thread_cache); + // do the call + call(entry_point, relocInfo::runtime_call_type); + delayed()->nop(); + restore_thread(thread_cache); +#ifdef ASSERT + set(badHeapWordVal, G3); + set(badHeapWordVal, G4); + set(badHeapWordVal, G5); +#endif +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments) { + call_VM_leaf_base(thread_cache, entry_point, number_of_arguments); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1) { + mov(arg_1, O0); + call_VM_leaf(thread_cache, entry_point, 1); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2) { + mov(arg_1, O0); + mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); + call_VM_leaf(thread_cache, entry_point, 2); +} + + +void MacroAssembler::call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3) { + mov(arg_1, O0); + mov(arg_2, O1); assert(arg_2 != O0, "smashed argument"); + mov(arg_3, O2); assert(arg_3 != O0 && arg_3 != O1, "smashed argument"); + call_VM_leaf(thread_cache, entry_point, 3); +} + + +void MacroAssembler::get_vm_result(Register oop_result) { + verify_thread(); + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + ld_ptr( vm_result_addr, oop_result); + st_ptr(G0, vm_result_addr); + verify_oop(oop_result); +} + + +void MacroAssembler::get_vm_result_2(Register metadata_result) { + verify_thread(); + Address vm_result_addr_2(G2_thread, JavaThread::vm_result_2_offset()); + ld_ptr(vm_result_addr_2, metadata_result); + st_ptr(G0, vm_result_addr_2); +} + + +// We require that C code which does not return a value in vm_result will +// leave it undisturbed. +void MacroAssembler::set_vm_result(Register oop_result) { + verify_thread(); + Address vm_result_addr(G2_thread, JavaThread::vm_result_offset()); + verify_oop(oop_result); + +# ifdef ASSERT + // Check that we are not overwriting any other oop. +#ifdef CC_INTERP + save_frame(0); +#else + save_frame_and_mov(0, Lmethod, Lmethod); // Propagate Lmethod for -Xprof +#endif /* CC_INTERP */ + ld_ptr(vm_result_addr, L0); + tst(L0); + restore(); + breakpoint_trap(notZero, Assembler::ptr_cc); + // } +# endif + + st_ptr(oop_result, vm_result_addr); +} + + +void MacroAssembler::ic_call(address entry, bool emit_delay) { + RelocationHolder rspec = virtual_call_Relocation::spec(pc()); + patchable_set((intptr_t)Universe::non_oop_word(), G5_inline_cache_reg); + relocate(rspec); + call(entry, relocInfo::none); + if (emit_delay) { + delayed()->nop(); + } +} + + +void MacroAssembler::card_table_write(jbyte* byte_map_base, + Register tmp, Register obj) { +#ifdef _LP64 + srlx(obj, CardTableModRefBS::card_shift, obj); +#else + srl(obj, CardTableModRefBS::card_shift, obj); +#endif + assert(tmp != obj, "need separate temp reg"); + set((address) byte_map_base, tmp); + stb(G0, tmp, obj); +} + + +void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { + address save_pc; + int shiftcnt; +#ifdef _LP64 +# ifdef CHECK_DELAY + assert_not_delayed((char*) "cannot put two instructions in delay slot"); +# endif + v9_dep(); + save_pc = pc(); + + int msb32 = (int) (addrlit.value() >> 32); + int lsb32 = (int) (addrlit.value()); + + if (msb32 == 0 && lsb32 >= 0) { + Assembler::sethi(lsb32, d, addrlit.rspec()); + } + else if (msb32 == -1) { + Assembler::sethi(~lsb32, d, addrlit.rspec()); + xor3(d, ~low10(~0), d); + } + else { + Assembler::sethi(msb32, d, addrlit.rspec()); // msb 22-bits + if (msb32 & 0x3ff) // Any bits? + or3(d, msb32 & 0x3ff, d); // msb 32-bits are now in lsb 32 + if (lsb32 & 0xFFFFFC00) { // done? + if ((lsb32 >> 20) & 0xfff) { // Any bits set? + sllx(d, 12, d); // Make room for next 12 bits + or3(d, (lsb32 >> 20) & 0xfff, d); // Or in next 12 + shiftcnt = 0; // We already shifted + } + else + shiftcnt = 12; + if ((lsb32 >> 10) & 0x3ff) { + sllx(d, shiftcnt + 10, d); // Make room for last 10 bits + or3(d, (lsb32 >> 10) & 0x3ff, d); // Or in next 10 + shiftcnt = 0; + } + else + shiftcnt = 10; + sllx(d, shiftcnt + 10, d); // Shift leaving disp field 0'd + } + else + sllx(d, 32, d); + } + // Pad out the instruction sequence so it can be patched later. + if (ForceRelocatable || (addrlit.rtype() != relocInfo::none && + addrlit.rtype() != relocInfo::runtime_call_type)) { + while (pc() < (save_pc + (7 * BytesPerInstWord))) + nop(); + } +#else + Assembler::sethi(addrlit.value(), d, addrlit.rspec()); +#endif +} + + +void MacroAssembler::sethi(const AddressLiteral& addrlit, Register d) { + internal_sethi(addrlit, d, false); +} + + +void MacroAssembler::patchable_sethi(const AddressLiteral& addrlit, Register d) { + internal_sethi(addrlit, d, true); +} + + +int MacroAssembler::insts_for_sethi(address a, bool worst_case) { +#ifdef _LP64 + if (worst_case) return 7; + intptr_t iaddr = (intptr_t) a; + int msb32 = (int) (iaddr >> 32); + int lsb32 = (int) (iaddr); + int count; + if (msb32 == 0 && lsb32 >= 0) + count = 1; + else if (msb32 == -1) + count = 2; + else { + count = 2; + if (msb32 & 0x3ff) + count++; + if (lsb32 & 0xFFFFFC00 ) { + if ((lsb32 >> 20) & 0xfff) count += 2; + if ((lsb32 >> 10) & 0x3ff) count += 2; + } + } + return count; +#else + return 1; +#endif +} + +int MacroAssembler::worst_case_insts_for_set() { + return insts_for_sethi(NULL, true) + 1; +} + + +// Keep in sync with MacroAssembler::insts_for_internal_set +void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) { + intptr_t value = addrlit.value(); + + if (!ForceRelocatable && addrlit.rspec().type() == relocInfo::none) { + // can optimize + if (-4096 <= value && value <= 4095) { + or3(G0, value, d); // setsw (this leaves upper 32 bits sign-extended) + return; + } + if (inv_hi22(hi22(value)) == value) { + sethi(addrlit, d); + return; + } + } + assert_not_delayed((char*) "cannot put two instructions in delay slot"); + internal_sethi(addrlit, d, ForceRelocatable); + if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) { + add(d, addrlit.low10(), d, addrlit.rspec()); + } +} + +// Keep in sync with MacroAssembler::internal_set +int MacroAssembler::insts_for_internal_set(intptr_t value) { + // can optimize + if (-4096 <= value && value <= 4095) { + return 1; + } + if (inv_hi22(hi22(value)) == value) { + return insts_for_sethi((address) value); + } + int count = insts_for_sethi((address) value); + AddressLiteral al(value); + if (al.low10() != 0) { + count++; + } + return count; +} + +void MacroAssembler::set(const AddressLiteral& al, Register d) { + internal_set(al, d, false); +} + +void MacroAssembler::set(intptr_t value, Register d) { + AddressLiteral al(value); + internal_set(al, d, false); +} + +void MacroAssembler::set(address addr, Register d, RelocationHolder const& rspec) { + AddressLiteral al(addr, rspec); + internal_set(al, d, false); +} + +void MacroAssembler::patchable_set(const AddressLiteral& al, Register d) { + internal_set(al, d, true); +} + +void MacroAssembler::patchable_set(intptr_t value, Register d) { + AddressLiteral al(value); + internal_set(al, d, true); +} + + +void MacroAssembler::set64(jlong value, Register d, Register tmp) { + assert_not_delayed(); + v9_dep(); + + int hi = (int)(value >> 32); + int lo = (int)(value & ~0); + // (Matcher::isSimpleConstant64 knows about the following optimizations.) + if (Assembler::is_simm13(lo) && value == lo) { + or3(G0, lo, d); + } else if (hi == 0) { + Assembler::sethi(lo, d); // hardware version zero-extends to upper 32 + if (low10(lo) != 0) + or3(d, low10(lo), d); + } + else if (hi == -1) { + Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 + xor3(d, low10(lo) ^ ~low10(~0), d); + } + else if (lo == 0) { + if (Assembler::is_simm13(hi)) { + or3(G0, hi, d); + } else { + Assembler::sethi(hi, d); // hardware version zero-extends to upper 32 + if (low10(hi) != 0) + or3(d, low10(hi), d); + } + sllx(d, 32, d); + } + else { + Assembler::sethi(hi, tmp); + Assembler::sethi(lo, d); // macro assembler version sign-extends + if (low10(hi) != 0) + or3 (tmp, low10(hi), tmp); + if (low10(lo) != 0) + or3 ( d, low10(lo), d); + sllx(tmp, 32, tmp); + or3 (d, tmp, d); + } +} + +int MacroAssembler::insts_for_set64(jlong value) { + v9_dep(); + + int hi = (int) (value >> 32); + int lo = (int) (value & ~0); + int count = 0; + + // (Matcher::isSimpleConstant64 knows about the following optimizations.) + if (Assembler::is_simm13(lo) && value == lo) { + count++; + } else if (hi == 0) { + count++; + if (low10(lo) != 0) + count++; + } + else if (hi == -1) { + count += 2; + } + else if (lo == 0) { + if (Assembler::is_simm13(hi)) { + count++; + } else { + count++; + if (low10(hi) != 0) + count++; + } + count++; + } + else { + count += 2; + if (low10(hi) != 0) + count++; + if (low10(lo) != 0) + count++; + count += 2; + } + return count; +} + +// compute size in bytes of sparc frame, given +// number of extraWords +int MacroAssembler::total_frame_size_in_bytes(int extraWords) { + + int nWords = frame::memory_parameter_word_sp_offset; + + nWords += extraWords; + + if (nWords & 1) ++nWords; // round up to double-word + + return nWords * BytesPerWord; +} + + +// save_frame: given number of "extra" words in frame, +// issue approp. save instruction (p 200, v8 manual) + +void MacroAssembler::save_frame(int extraWords) { + int delta = -total_frame_size_in_bytes(extraWords); + if (is_simm13(delta)) { + save(SP, delta, SP); + } else { + set(delta, G3_scratch); + save(SP, G3_scratch, SP); + } +} + + +void MacroAssembler::save_frame_c1(int size_in_bytes) { + if (is_simm13(-size_in_bytes)) { + save(SP, -size_in_bytes, SP); + } else { + set(-size_in_bytes, G3_scratch); + save(SP, G3_scratch, SP); + } +} + + +void MacroAssembler::save_frame_and_mov(int extraWords, + Register s1, Register d1, + Register s2, Register d2) { + assert_not_delayed(); + + // The trick here is to use precisely the same memory word + // that trap handlers also use to save the register. + // This word cannot be used for any other purpose, but + // it works fine to save the register's value, whether or not + // an interrupt flushes register windows at any given moment! + Address s1_addr; + if (s1->is_valid() && (s1->is_in() || s1->is_local())) { + s1_addr = s1->address_in_saved_window(); + st_ptr(s1, s1_addr); + } + + Address s2_addr; + if (s2->is_valid() && (s2->is_in() || s2->is_local())) { + s2_addr = s2->address_in_saved_window(); + st_ptr(s2, s2_addr); + } + + save_frame(extraWords); + + if (s1_addr.base() == SP) { + ld_ptr(s1_addr.after_save(), d1); + } else if (s1->is_valid()) { + mov(s1->after_save(), d1); + } + + if (s2_addr.base() == SP) { + ld_ptr(s2_addr.after_save(), d2); + } else if (s2->is_valid()) { + mov(s2->after_save(), d2); + } +} + + +AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->allocate_metadata_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + +AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) { + assert(oop_recorder() != NULL, "this assembler needs a Recorder"); + int index = oop_recorder()->find_index(obj); + RelocationHolder rspec = metadata_Relocation::spec(index); + return AddressLiteral((address)obj, rspec); +} + + +AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + assert(Universe::heap()->is_in_reserved(JNIHandles::resolve(obj)), "not an oop"); + int oop_index = oop_recorder()->find_index(obj); + return AddressLiteral(obj, oop_Relocation::spec(oop_index)); +} + +void MacroAssembler::set_narrow_oop(jobject obj, Register d) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + + assert_not_delayed(); + // Relocation with special format (see relocInfo_sparc.hpp). + relocate(rspec, 1); + // Assembler::sethi(0x3fffff, d); + emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(0x3fffff) ); + // Don't add relocation for 'add'. Do patching during 'sethi' processing. + add(d, 0x3ff, d); + +} + +void MacroAssembler::set_narrow_klass(Klass* k, Register d) { + assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + narrowOop encoded_k = oopDesc::encode_klass(k); + + assert_not_delayed(); + // Relocation with special format (see relocInfo_sparc.hpp). + relocate(rspec, 1); + // Assembler::sethi(encoded_k, d); + emit_long( op(branch_op) | rd(d) | op2(sethi_op2) | hi22(encoded_k) ); + // Don't add relocation for 'add'. Do patching during 'sethi' processing. + add(d, low10(encoded_k), d); + +} + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); +} + + +void MacroAssembler::safepoint() { + relocate(breakpoint_Relocation::spec(breakpoint_Relocation::safepoint)); +} + + +void RegistersForDebugging::print(outputStream* s) { + FlagSetting fs(Debugging, true); + int j; + for (j = 0; j < 8; ++j) { + if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); } + else { s->print( "fp = " ); os::print_location(s, i[j]); } + } + s->cr(); + + for (j = 0; j < 8; ++j) { + s->print("l%d = ", j); os::print_location(s, l[j]); + } + s->cr(); + + for (j = 0; j < 8; ++j) { + if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); } + else { s->print( "sp = " ); os::print_location(s, o[j]); } + } + s->cr(); + + for (j = 0; j < 8; ++j) { + s->print("g%d = ", j); os::print_location(s, g[j]); + } + s->cr(); + + // print out floats with compression + for (j = 0; j < 32; ) { + jfloat val = f[j]; + int last = j; + for ( ; last+1 < 32; ++last ) { + char b1[1024], b2[1024]; + sprintf(b1, "%f", val); + sprintf(b2, "%f", f[last+1]); + if (strcmp(b1, b2)) + break; + } + s->print("f%d", j); + if ( j != last ) s->print(" - f%d", last); + s->print(" = %f", val); + s->fill_to(25); + s->print_cr(" (0x%x)", val); + j = last + 1; + } + s->cr(); + + // and doubles (evens only) + for (j = 0; j < 32; ) { + jdouble val = d[j]; + int last = j; + for ( ; last+1 < 32; ++last ) { + char b1[1024], b2[1024]; + sprintf(b1, "%f", val); + sprintf(b2, "%f", d[last+1]); + if (strcmp(b1, b2)) + break; + } + s->print("d%d", 2 * j); + if ( j != last ) s->print(" - d%d", last); + s->print(" = %f", val); + s->fill_to(30); + s->print("(0x%x)", *(int*)&val); + s->fill_to(42); + s->print_cr("(0x%x)", *(1 + (int*)&val)); + j = last + 1; + } + s->cr(); +} + +void RegistersForDebugging::save_registers(MacroAssembler* a) { + a->sub(FP, round_to(sizeof(RegistersForDebugging), sizeof(jdouble)) - STACK_BIAS, O0); + a->flush_windows(); + int i; + for (i = 0; i < 8; ++i) { + a->ld_ptr(as_iRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, i_offset(i)); + a->ld_ptr(as_lRegister(i)->address_in_saved_window().after_save(), L1); a->st_ptr( L1, O0, l_offset(i)); + a->st_ptr(as_oRegister(i)->after_save(), O0, o_offset(i)); + a->st_ptr(as_gRegister(i)->after_save(), O0, g_offset(i)); + } + for (i = 0; i < 32; ++i) { + a->stf(FloatRegisterImpl::S, as_FloatRegister(i), O0, f_offset(i)); + } + for (i = 0; i < (VM_Version::v9_instructions_work() ? 64 : 32); i += 2) { + a->stf(FloatRegisterImpl::D, as_FloatRegister(i), O0, d_offset(i)); + } +} + +void RegistersForDebugging::restore_registers(MacroAssembler* a, Register r) { + for (int i = 1; i < 8; ++i) { + a->ld_ptr(r, g_offset(i), as_gRegister(i)); + } + for (int j = 0; j < 32; ++j) { + a->ldf(FloatRegisterImpl::S, O0, f_offset(j), as_FloatRegister(j)); + } + for (int k = 0; k < (VM_Version::v9_instructions_work() ? 64 : 32); k += 2) { + a->ldf(FloatRegisterImpl::D, O0, d_offset(k), as_FloatRegister(k)); + } +} + + +// pushes double TOS element of FPU stack on CPU stack; pops from FPU stack +void MacroAssembler::push_fTOS() { + // %%%%%% need to implement this +} + +// pops double TOS element from CPU stack and pushes on FPU stack +void MacroAssembler::pop_fTOS() { + // %%%%%% need to implement this +} + +void MacroAssembler::empty_FPU_stack() { + // %%%%%% need to implement this +} + +void MacroAssembler::_verify_oop(Register reg, const char* msg, const char * file, int line) { + // plausibility check for oops + if (!VerifyOops) return; + + if (reg == G0) return; // always NULL, which is always an oop + + BLOCK_COMMENT("verify_oop {"); + char buffer[64]; +#ifdef COMPILER1 + if (CommentedAssembly) { + snprintf(buffer, sizeof(buffer), "verify_oop at %d", offset()); + block_comment(buffer); + } +#endif + + int len = strlen(file) + strlen(msg) + 1 + 4; + sprintf(buffer, "%d", line); + len += strlen(buffer); + sprintf(buffer, " at offset %d ", offset()); + len += strlen(buffer); + char * real_msg = new char[len]; + sprintf(real_msg, "%s%s(%s:%d)", msg, buffer, file, line); + + // Call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); + + // Make some space on stack above the current register window. + // Enough to hold 8 64-bit registers. + add(SP,-8*8,SP); + + // Save some 64-bit registers; a normal 'save' chops the heads off + // of 64-bit longs in the 32-bit build. + stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); + stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); + mov(reg,O0); // Move arg into O0; arg might be in O7 which is about to be crushed + stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); + + // Size of set() should stay the same + patchable_set((intptr_t)real_msg, O1); + // Load address to call to into O7 + load_ptr_contents(a, O7); + // Register call to verify_oop_subroutine + callr(O7, G0); + delayed()->nop(); + // recover frame size + add(SP, 8*8,SP); + BLOCK_COMMENT("} verify_oop"); +} + +void MacroAssembler::_verify_oop_addr(Address addr, const char* msg, const char * file, int line) { + // plausibility check for oops + if (!VerifyOops) return; + + char buffer[64]; + sprintf(buffer, "%d", line); + int len = strlen(file) + strlen(msg) + 1 + 4 + strlen(buffer); + sprintf(buffer, " at SP+%d ", addr.disp()); + len += strlen(buffer); + char * real_msg = new char[len]; + sprintf(real_msg, "%s at SP+%d (%s:%d)", msg, addr.disp(), file, line); + + // Call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::verify_oop_subroutine_entry_address()); + + // Make some space on stack above the current register window. + // Enough to hold 8 64-bit registers. + add(SP,-8*8,SP); + + // Save some 64-bit registers; a normal 'save' chops the heads off + // of 64-bit longs in the 32-bit build. + stx(O0,SP,frame::register_save_words*wordSize+STACK_BIAS+0*8); + stx(O1,SP,frame::register_save_words*wordSize+STACK_BIAS+1*8); + ld_ptr(addr.base(), addr.disp() + 8*8, O0); // Load arg into O0; arg might be in O7 which is about to be crushed + stx(O7,SP,frame::register_save_words*wordSize+STACK_BIAS+7*8); + + // Size of set() should stay the same + patchable_set((intptr_t)real_msg, O1); + // Load address to call to into O7 + load_ptr_contents(a, O7); + // Register call to verify_oop_subroutine + callr(O7, G0); + delayed()->nop(); + // recover frame size + add(SP, 8*8,SP); +} + +// side-door communication with signalHandler in os_solaris.cpp +address MacroAssembler::_verify_oop_implicit_branch[3] = { NULL }; + +// This macro is expanded just once; it creates shared code. Contract: +// receives an oop in O0. Must restore O0 & O7 from TLS. Must not smash ANY +// registers, including flags. May not use a register 'save', as this blows +// the high bits of the O-regs if they contain Long values. Acts as a 'leaf' +// call. +void MacroAssembler::verify_oop_subroutine() { + assert( VM_Version::v9_instructions_work(), "VerifyOops not supported for V8" ); + + // Leaf call; no frame. + Label succeed, fail, null_or_fail; + + // O0 and O7 were saved already (O0 in O0's TLS home, O7 in O5's TLS home). + // O0 is now the oop to be checked. O7 is the return address. + Register O0_obj = O0; + + // Save some more registers for temps. + stx(O2,SP,frame::register_save_words*wordSize+STACK_BIAS+2*8); + stx(O3,SP,frame::register_save_words*wordSize+STACK_BIAS+3*8); + stx(O4,SP,frame::register_save_words*wordSize+STACK_BIAS+4*8); + stx(O5,SP,frame::register_save_words*wordSize+STACK_BIAS+5*8); + + // Save flags + Register O5_save_flags = O5; + rdccr( O5_save_flags ); + + { // count number of verifies + Register O2_adr = O2; + Register O3_accum = O3; + inc_counter(StubRoutines::verify_oop_count_addr(), O2_adr, O3_accum); + } + + Register O2_mask = O2; + Register O3_bits = O3; + Register O4_temp = O4; + + // mark lower end of faulting range + assert(_verify_oop_implicit_branch[0] == NULL, "set once"); + _verify_oop_implicit_branch[0] = pc(); + + // We can't check the mark oop because it could be in the process of + // locking or unlocking while this is running. + set(Universe::verify_oop_mask (), O2_mask); + set(Universe::verify_oop_bits (), O3_bits); + + // assert((obj & oop_mask) == oop_bits); + and3(O0_obj, O2_mask, O4_temp); + cmp_and_brx_short(O4_temp, O3_bits, notEqual, pn, null_or_fail); + + if ((NULL_WORD & Universe::verify_oop_mask()) == Universe::verify_oop_bits()) { + // the null_or_fail case is useless; must test for null separately + br_null_short(O0_obj, pn, succeed); + } + + // Check the Klass* of this object for being in the right area of memory. + // Cannot do the load in the delay above slot in case O0 is null + load_klass(O0_obj, O0_obj); + // assert((klass != NULL) + br_null_short(O0_obj, pn, fail); + // TODO: Future assert that klass is lower 4g memory for UseCompressedKlassPointers + + wrccr( O5_save_flags ); // Restore CCR's + + // mark upper end of faulting range + _verify_oop_implicit_branch[1] = pc(); + + //----------------------- + // all tests pass + bind(succeed); + + // Restore prior 64-bit registers + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+0*8,O0); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+1*8,O1); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+2*8,O2); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+3*8,O3); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+4*8,O4); + ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+5*8,O5); + + retl(); // Leaf return; restore prior O7 in delay slot + delayed()->ldx(SP,frame::register_save_words*wordSize+STACK_BIAS+7*8,O7); + + //----------------------- + bind(null_or_fail); // nulls are less common but OK + br_null(O0_obj, false, pt, succeed); + delayed()->wrccr( O5_save_flags ); // Restore CCR's + + //----------------------- + // report failure: + bind(fail); + _verify_oop_implicit_branch[2] = pc(); + + wrccr( O5_save_flags ); // Restore CCR's + + save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + + // stop_subroutine expects message pointer in I1. + mov(I1, O1); + + // Restore prior 64-bit registers + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+0*8,I0); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+1*8,I1); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+2*8,I2); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+3*8,I3); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+4*8,I4); + ldx(FP,frame::register_save_words*wordSize+STACK_BIAS+5*8,I5); + + // factor long stop-sequence into subroutine to save space + assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); + + // call indirectly to solve generation ordering problem + AddressLiteral al(StubRoutines::Sparc::stop_subroutine_entry_address()); + load_ptr_contents(al, O5); + jmpl(O5, 0, O7); + delayed()->nop(); +} + + +void MacroAssembler::stop(const char* msg) { + // save frame first to get O7 for return address + // add one word to size in case struct is odd number of words long + // It must be doubleword-aligned for storing doubles into it. + + save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + + // stop_subroutine expects message pointer in I1. + // Size of set() should stay the same + patchable_set((intptr_t)msg, O1); + + // factor long stop-sequence into subroutine to save space + assert(StubRoutines::Sparc::stop_subroutine_entry_address(), "hasn't been generated yet"); + + // call indirectly to solve generation ordering problem + AddressLiteral a(StubRoutines::Sparc::stop_subroutine_entry_address()); + load_ptr_contents(a, O5); + jmpl(O5, 0, O7); + delayed()->nop(); + + breakpoint_trap(); // make stop actually stop rather than writing + // unnoticeable results in the output files. + + // restore(); done in callee to save space! +} + + +void MacroAssembler::warn(const char* msg) { + save_frame(::round_to(sizeof(RegistersForDebugging) / BytesPerWord, 2)); + RegistersForDebugging::save_registers(this); + mov(O0, L0); + // Size of set() should stay the same + patchable_set((intptr_t)msg, O0); + call( CAST_FROM_FN_PTR(address, warning) ); + delayed()->nop(); +// ret(); +// delayed()->restore(); + RegistersForDebugging::restore_registers(this, L0); + restore(); +} + + +void MacroAssembler::untested(const char* what) { + // We must be able to turn interactive prompting off + // in order to run automated test scripts on the VM + // Use the flag ShowMessageBoxOnError + + char* b = new char[1024]; + sprintf(b, "untested: %s", what); + + if (ShowMessageBoxOnError) { STOP(b); } + else { warn(b); } +} + + +void MacroAssembler::stop_subroutine() { + RegistersForDebugging::save_registers(this); + + // for the sake of the debugger, stick a PC on the current frame + // (this assumes that the caller has performed an extra "save") + mov(I7, L7); + add(O7, -7 * BytesPerInt, I7); + + save_frame(); // one more save to free up another O7 register + mov(I0, O1); // addr of reg save area + + // We expect pointer to message in I1. Caller must set it up in O1 + mov(I1, O0); // get msg + call (CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); + delayed()->nop(); + + restore(); + + RegistersForDebugging::restore_registers(this, O0); + + save_frame(0); + call(CAST_FROM_FN_PTR(address,breakpoint)); + delayed()->nop(); + restore(); + + mov(L7, I7); + retl(); + delayed()->restore(); // see stop above +} + + +void MacroAssembler::debug(char* msg, RegistersForDebugging* regs) { + if ( ShowMessageBoxOnError ) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); + { + // In order to get locks work, we need to fake a in_VM state + ttyLocker ttyl; + ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + BytecodeCounter::print(); + } + if (os::message_box(msg, "Execution stopped, print registers?")) + regs->print(::tty); + } + BREAKPOINT; + ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); + } + else { + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + } + assert(false, err_msg("DEBUG MESSAGE: %s", msg)); +} + + +void MacroAssembler::calc_mem_param_words(Register Rparam_words, Register Rresult) { + subcc( Rparam_words, Argument::n_register_parameters, Rresult); // how many mem words? + Label no_extras; + br( negative, true, pt, no_extras ); // if neg, clear reg + delayed()->set(0, Rresult); // annuled, so only if taken + bind( no_extras ); +} + + +void MacroAssembler::calc_frame_size(Register Rextra_words, Register Rresult) { +#ifdef _LP64 + add(Rextra_words, frame::memory_parameter_word_sp_offset, Rresult); +#else + add(Rextra_words, frame::memory_parameter_word_sp_offset + 1, Rresult); +#endif + bclr(1, Rresult); + sll(Rresult, LogBytesPerWord, Rresult); // Rresult has total frame bytes +} + + +void MacroAssembler::calc_frame_size_and_save(Register Rextra_words, Register Rresult) { + calc_frame_size(Rextra_words, Rresult); + neg(Rresult); + save(SP, Rresult, SP); +} + + +// --------------------------------------------------------- +Assembler::RCondition cond2rcond(Assembler::Condition c) { + switch (c) { + /*case zero: */ + case Assembler::equal: return Assembler::rc_z; + case Assembler::lessEqual: return Assembler::rc_lez; + case Assembler::less: return Assembler::rc_lz; + /*case notZero:*/ + case Assembler::notEqual: return Assembler::rc_nz; + case Assembler::greater: return Assembler::rc_gz; + case Assembler::greaterEqual: return Assembler::rc_gez; + } + ShouldNotReachHere(); + return Assembler::rc_z; +} + +// compares (32 bit) register with zero and branches. NOT FOR USE WITH 64-bit POINTERS +void MacroAssembler::cmp_zero_and_br(Condition c, Register s1, Label& L, bool a, Predict p) { + tst(s1); + br (c, a, p, L); +} + +// Compares a pointer register with zero and branches on null. +// Does a test & branch on 32-bit systems and a register-branch on 64-bit. +void MacroAssembler::br_null( Register s1, bool a, Predict p, Label& L ) { + assert_not_delayed(); +#ifdef _LP64 + bpr( rc_z, a, p, s1, L ); +#else + tst(s1); + br ( zero, a, p, L ); +#endif +} + +void MacroAssembler::br_notnull( Register s1, bool a, Predict p, Label& L ) { + assert_not_delayed(); +#ifdef _LP64 + bpr( rc_nz, a, p, s1, L ); +#else + tst(s1); + br ( notZero, a, p, L ); +#endif +} + +// Compare registers and branch with nop in delay slot or cbcond without delay slot. + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, s2, L); + } else { + cmp(s1, s2); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Compare integer (32 bit) values (icc only). +void MacroAssembler::cmp_and_br_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, icc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + br(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, Register s2, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, s2, L); + } else { + cmp(s1, s2); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Branch that tests xcc in LP64 and icc in !LP64 +void MacroAssembler::cmp_and_brx_short(Register s1, int simm13a, Condition c, + Predict p, Label& L) { + assert_not_delayed(); + if (is_simm(simm13a,5) && use_cbcond(L)) { + Assembler::cbcond(c, ptr_cc, s1, simm13a, L); + } else { + cmp(s1, simm13a); + brx(c, false, p, L); + delayed()->nop(); + } +} + +// Short branch version for compares a pointer with zero. + +void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(zero, ptr_cc, s1, 0, L); + return; + } + br_null(s1, false, p, L); + delayed()->nop(); +} + +void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) { + assert_not_delayed(); + if (use_cbcond(L)) { + Assembler::cbcond(notZero, ptr_cc, s1, 0, L); + return; + } + br_notnull(s1, false, p, L); + delayed()->nop(); +} + +// Unconditional short branch +void MacroAssembler::ba_short(Label& L) { + if (use_cbcond(L)) { + Assembler::cbcond(equal, icc, G0, G0, L); + return; + } + br(always, false, pt, L); + delayed()->nop(); +} + +// instruction sequences factored across compiler & interpreter + + +void MacroAssembler::lcmp( Register Ra_hi, Register Ra_low, + Register Rb_hi, Register Rb_low, + Register Rresult) { + + Label check_low_parts, done; + + cmp(Ra_hi, Rb_hi ); // compare hi parts + br(equal, true, pt, check_low_parts); + delayed()->cmp(Ra_low, Rb_low); // test low parts + + // And, with an unsigned comparison, it does not matter if the numbers + // are negative or not. + // E.g., -2 cmp -1: the low parts are 0xfffffffe and 0xffffffff. + // The second one is bigger (unsignedly). + + // Other notes: The first move in each triplet can be unconditional + // (and therefore probably prefetchable). + // And the equals case for the high part does not need testing, + // since that triplet is reached only after finding the high halves differ. + + if (VM_Version::v9_instructions_work()) { + mov(-1, Rresult); + ba(done); delayed()-> movcc(greater, false, icc, 1, Rresult); + } else { + br(less, true, pt, done); delayed()-> set(-1, Rresult); + br(greater, true, pt, done); delayed()-> set( 1, Rresult); + } + + bind( check_low_parts ); + + if (VM_Version::v9_instructions_work()) { + mov( -1, Rresult); + movcc(equal, false, icc, 0, Rresult); + movcc(greaterUnsigned, false, icc, 1, Rresult); + } else { + set(-1, Rresult); + br(equal, true, pt, done); delayed()->set( 0, Rresult); + br(greaterUnsigned, true, pt, done); delayed()->set( 1, Rresult); + } + bind( done ); +} + +void MacroAssembler::lneg( Register Rhi, Register Rlow ) { + subcc( G0, Rlow, Rlow ); + subc( G0, Rhi, Rhi ); +} + +void MacroAssembler::lshl( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_low + && Rout_low != Rin_high, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting right by 32-count the low + // register. This is done by shifting right by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + srl(Rin_low, Ralt_count, Rxfer_bits); // shift right by 31-count + if (Rcount != Rout_low) { + sll(Rin_low, Rcount, Rout_low); // low half + } + sll(Rin_high, Rcount, Rout_high); + if (Rcount == Rout_low) { + sll(Rin_low, Rcount, Rout_low); // low half + } + srl(Rxfer_bits, 1, Rxfer_bits ); // shift right by one more + ba(done); + delayed()->or3(Rout_high, Rxfer_bits, Rout_high); // new hi value: or in shifted old hi part and xfer from low + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + sll(Rin_low, Ralt_count, Rout_high ); + clr(Rout_low); + + bind(done); +} + + +void MacroAssembler::lshr( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_high + && Rout_high != Rin_low, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting left by 32-count the high + // register. This is done by shifting left by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + if (Rcount != Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + sra(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more + if (Rcount == Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + + sra(Rin_high, Ralt_count, Rout_low); + sra(Rin_high, 31, Rout_high); // sign into hi + + bind( done ); +} + + + +void MacroAssembler::lushr( Register Rin_high, Register Rin_low, + Register Rcount, + Register Rout_high, Register Rout_low, + Register Rtemp ) { + + Register Ralt_count = Rtemp; + Register Rxfer_bits = Rtemp; + + assert( Ralt_count != Rin_high + && Ralt_count != Rin_low + && Ralt_count != Rcount + && Rxfer_bits != Rin_low + && Rxfer_bits != Rin_high + && Rxfer_bits != Rcount + && Rxfer_bits != Rout_high + && Rout_high != Rin_low, + "register alias checks"); + + Label big_shift, done; + + // This code can be optimized to use the 64 bit shifts in V9. + // Here we use the 32 bit shifts. + + and3( Rcount, 0x3f, Rcount); // take least significant 6 bits + subcc(Rcount, 31, Ralt_count); + br(greater, true, pn, big_shift); + delayed()->dec(Ralt_count); + + // shift < 32 bits, Ralt_count = Rcount-31 + + // We get the transfer bits by shifting left by 32-count the high + // register. This is done by shifting left by 31-count and then by one + // more to take care of the special (rare) case where count is zero + // (shifting by 32 would not work). + + neg(Ralt_count); + if (Rcount != Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + + // The order of the next two instructions is critical in the case where + // Rin and Rout are the same and should not be reversed. + + sll(Rin_high, Ralt_count, Rxfer_bits); // shift left by 31-count + srl(Rin_high, Rcount, Rout_high ); // high half + sll(Rxfer_bits, 1, Rxfer_bits); // shift left by one more + if (Rcount == Rout_low) { + srl(Rin_low, Rcount, Rout_low); + } + ba(done); + delayed()->or3(Rout_low, Rxfer_bits, Rout_low); // new low value: or shifted old low part and xfer from high + + // shift >= 32 bits, Ralt_count = Rcount-32 + bind(big_shift); + + srl(Rin_high, Ralt_count, Rout_low); + clr(Rout_high); + + bind( done ); +} + +#ifdef _LP64 +void MacroAssembler::lcmp( Register Ra, Register Rb, Register Rresult) { + cmp(Ra, Rb); + mov(-1, Rresult); + movcc(equal, false, xcc, 0, Rresult); + movcc(greater, false, xcc, 1, Rresult); +} +#endif + + +void MacroAssembler::load_sized_value(Address src, Register dst, size_t size_in_bytes, bool is_signed) { + switch (size_in_bytes) { + case 8: ld_long(src, dst); break; + case 4: ld( src, dst); break; + case 2: is_signed ? ldsh(src, dst) : lduh(src, dst); break; + case 1: is_signed ? ldsb(src, dst) : ldub(src, dst); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes) { + switch (size_in_bytes) { + case 8: st_long(src, dst); break; + case 4: st( src, dst); break; + case 2: sth( src, dst); break; + case 1: stb( src, dst); break; + default: ShouldNotReachHere(); + } +} + + +void MacroAssembler::float_cmp( bool is_float, int unordered_result, + FloatRegister Fa, FloatRegister Fb, + Register Rresult) { + + fcmp(is_float ? FloatRegisterImpl::S : FloatRegisterImpl::D, fcc0, Fa, Fb); + + Condition lt = unordered_result == -1 ? f_unorderedOrLess : f_less; + Condition eq = f_equal; + Condition gt = unordered_result == 1 ? f_unorderedOrGreater : f_greater; + + if (VM_Version::v9_instructions_work()) { + + mov(-1, Rresult); + movcc(eq, true, fcc0, 0, Rresult); + movcc(gt, true, fcc0, 1, Rresult); + + } else { + Label done; + + set( -1, Rresult ); + //fb(lt, true, pn, done); delayed()->set( -1, Rresult ); + fb( eq, true, pn, done); delayed()->set( 0, Rresult ); + fb( gt, true, pn, done); delayed()->set( 1, Rresult ); + + bind (done); + } +} + + +void MacroAssembler::fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) +{ + if (VM_Version::v9_instructions_work()) { + Assembler::fneg(w, s, d); + } else { + if (w == FloatRegisterImpl::S) { + Assembler::fneg(w, s, d); + } else if (w == FloatRegisterImpl::D) { + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); + + Assembler::fneg(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + } else { + assert(w == FloatRegisterImpl::Q, "Invalid float register width"); + + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); + + Assembler::fneg(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); + } + } +} + +void MacroAssembler::fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) +{ + if (VM_Version::v9_instructions_work()) { + Assembler::fmov(w, s, d); + } else { + if (w == FloatRegisterImpl::S) { + Assembler::fmov(w, s, d); + } else if (w == FloatRegisterImpl::D) { + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); + + Assembler::fmov(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + } else { + assert(w == FloatRegisterImpl::Q, "Invalid float register width"); + + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); + + Assembler::fmov(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); + } + } +} + +void MacroAssembler::fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d) +{ + if (VM_Version::v9_instructions_work()) { + Assembler::fabs(w, s, d); + } else { + if (w == FloatRegisterImpl::S) { + Assembler::fabs(w, s, d); + } else if (w == FloatRegisterImpl::D) { + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 1) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 1) == 0), "float register alignment check"); + + Assembler::fabs(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + } else { + assert(w == FloatRegisterImpl::Q, "Invalid float register width"); + + // number() does a sanity check on the alignment. + assert(((s->encoding(FloatRegisterImpl::D) & 3) == 0) && + ((d->encoding(FloatRegisterImpl::D) & 3) == 0), "float register alignment check"); + + Assembler::fabs(FloatRegisterImpl::S, s, d); + Assembler::fmov(FloatRegisterImpl::S, s->successor(), d->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor(), d->successor()->successor()); + Assembler::fmov(FloatRegisterImpl::S, s->successor()->successor()->successor(), d->successor()->successor()->successor()); + } + } +} + +void MacroAssembler::save_all_globals_into_locals() { + mov(G1,L1); + mov(G2,L2); + mov(G3,L3); + mov(G4,L4); + mov(G5,L5); + mov(G6,L6); + mov(G7,L7); +} + +void MacroAssembler::restore_globals_from_locals() { + mov(L1,G1); + mov(L2,G2); + mov(L3,G3); + mov(L4,G4); + mov(L5,G5); + mov(L6,G6); + mov(L7,G7); +} + +// Use for 64 bit operation. +void MacroAssembler::casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) +{ + // store ptr_reg as the new top value +#ifdef _LP64 + casx(top_ptr_reg, top_reg, ptr_reg); +#else + cas_under_lock(top_ptr_reg, top_reg, ptr_reg, lock_addr, use_call_vm); +#endif // _LP64 +} + +// [RGV] This routine does not handle 64 bit operations. +// use casx_under_lock() or casx directly!!! +void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, address lock_addr, bool use_call_vm) +{ + // store ptr_reg as the new top value + if (VM_Version::v9_instructions_work()) { + cas(top_ptr_reg, top_reg, ptr_reg); + } else { + + // If the register is not an out nor global, it is not visible + // after the save. Allocate a register for it, save its + // value in the register save area (the save may not flush + // registers to the save area). + + Register top_ptr_reg_after_save; + Register top_reg_after_save; + Register ptr_reg_after_save; + + if (top_ptr_reg->is_out() || top_ptr_reg->is_global()) { + top_ptr_reg_after_save = top_ptr_reg->after_save(); + } else { + Address reg_save_addr = top_ptr_reg->address_in_saved_window(); + top_ptr_reg_after_save = L0; + st(top_ptr_reg, reg_save_addr); + } + + if (top_reg->is_out() || top_reg->is_global()) { + top_reg_after_save = top_reg->after_save(); + } else { + Address reg_save_addr = top_reg->address_in_saved_window(); + top_reg_after_save = L1; + st(top_reg, reg_save_addr); + } + + if (ptr_reg->is_out() || ptr_reg->is_global()) { + ptr_reg_after_save = ptr_reg->after_save(); + } else { + Address reg_save_addr = ptr_reg->address_in_saved_window(); + ptr_reg_after_save = L2; + st(ptr_reg, reg_save_addr); + } + + const Register& lock_reg = L3; + const Register& lock_ptr_reg = L4; + const Register& value_reg = L5; + const Register& yield_reg = L6; + const Register& yieldall_reg = L7; + + save_frame(); + + if (top_ptr_reg_after_save == L0) { + ld(top_ptr_reg->address_in_saved_window().after_save(), top_ptr_reg_after_save); + } + + if (top_reg_after_save == L1) { + ld(top_reg->address_in_saved_window().after_save(), top_reg_after_save); + } + + if (ptr_reg_after_save == L2) { + ld(ptr_reg->address_in_saved_window().after_save(), ptr_reg_after_save); + } + + Label(retry_get_lock); + Label(not_same); + Label(dont_yield); + + assert(lock_addr, "lock_address should be non null for v8"); + set((intptr_t)lock_addr, lock_ptr_reg); + // Initialize yield counter + mov(G0,yield_reg); + mov(G0, yieldall_reg); + set(StubRoutines::Sparc::locked, lock_reg); + + bind(retry_get_lock); + cmp_and_br_short(yield_reg, V8AtomicOperationUnderLockSpinCount, Assembler::less, Assembler::pt, dont_yield); + + if(use_call_vm) { + Untested("Need to verify global reg consistancy"); + call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::yield_all), yieldall_reg); + } else { + // Save the regs and make space for a C call + save(SP, -96, SP); + save_all_globals_into_locals(); + call(CAST_FROM_FN_PTR(address,os::yield_all)); + delayed()->mov(yieldall_reg, O0); + restore_globals_from_locals(); + restore(); + } + + // reset the counter + mov(G0,yield_reg); + add(yieldall_reg, 1, yieldall_reg); + + bind(dont_yield); + // try to get lock + Assembler::swap(lock_ptr_reg, 0, lock_reg); + + // did we get the lock? + cmp(lock_reg, StubRoutines::Sparc::unlocked); + br(Assembler::notEqual, true, Assembler::pn, retry_get_lock); + delayed()->add(yield_reg,1,yield_reg); + + // yes, got lock. do we have the same top? + ld(top_ptr_reg_after_save, 0, value_reg); + cmp_and_br_short(value_reg, top_reg_after_save, Assembler::notEqual, Assembler::pn, not_same); + + // yes, same top. + st(ptr_reg_after_save, top_ptr_reg_after_save, 0); + membar(Assembler::StoreStore); + + bind(not_same); + mov(value_reg, ptr_reg_after_save); + st(lock_reg, lock_ptr_reg, 0); // unlock + + restore(); + } +} + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + AddressLiteral a(delayed_value_addr); + load_ptr_contents(a, tmp); + +#ifdef ASSERT + tst(tmp); + breakpoint_trap(zero, xcc); +#endif + + if (offset != 0) + add(tmp, offset, tmp); + + return RegisterOrConstant(tmp); +} + + +RegisterOrConstant MacroAssembler::regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + andn(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + set(s1.as_constant(), temp); + andn(temp, s2.as_register(), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() & ~s2.as_constant(); + return res; + } + } +} + +RegisterOrConstant MacroAssembler::regcon_inc_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + add(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + add(s2.as_register(), ensure_simm13_or_reg(s1, temp), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() + s2.as_constant(); + return res; + } + } +} + +RegisterOrConstant MacroAssembler::regcon_sll_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp) { + assert(d.register_or_noreg() != G0, "lost side effect"); + if (!is_simm13(s2.constant_or_zero())) + s2 = (s2.as_constant() & 0xFF); + if ((s2.is_constant() && s2.as_constant() == 0) || + (s2.is_register() && s2.as_register() == G0)) { + // Do nothing, just move value. + if (s1.is_register()) { + if (d.is_constant()) d = temp; + mov(s1.as_register(), d.as_register()); + return d; + } else { + return s1; + } + } + + if (s1.is_register()) { + assert_different_registers(s1.as_register(), temp); + if (d.is_constant()) d = temp; + sll_ptr(s1.as_register(), ensure_simm13_or_reg(s2, temp), d.as_register()); + return d; + } else { + if (s2.is_register()) { + assert_different_registers(s2.as_register(), temp); + if (d.is_constant()) d = temp; + set(s1.as_constant(), temp); + sll_ptr(temp, s2.as_register(), d.as_register()); + return d; + } else { + intptr_t res = s1.as_constant() << s2.as_constant(); + return res; + } + } +} + + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Register sethi_temp, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + Label L_no_such_interface_restore; + bool did_save = false; + if (scan_temp == noreg || sethi_temp == noreg) { + Register recv_2 = recv_klass->is_global() ? recv_klass : L0; + Register intf_2 = intf_klass->is_global() ? intf_klass : L1; + assert(method_result->is_global(), "must be able to return value"); + scan_temp = L2; + sethi_temp = L3; + save_frame_and_mov(0, recv_klass, recv_2, intf_klass, intf_2); + recv_klass = recv_2; + intf_klass = intf_2; + did_save = true; + } + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + + lduw(recv_klass, InstanceKlass::vtable_length_offset() * wordSize, scan_temp); + // %%% We should store the aligned, prescaled offset in the klassoop. + // Then the next several instructions would fold away. + + int round_to_unit = ((HeapWordsPerLong > 1) ? BytesPerLong : 0); + int itb_offset = vtable_base; + if (round_to_unit != 0) { + // hoist first instruction of round_to(scan_temp, BytesPerLong): + itb_offset += round_to_unit - wordSize; + } + int itb_scale = exact_log2(vtableEntry::size() * wordSize); + sll(scan_temp, itb_scale, scan_temp); + add(scan_temp, itb_offset, scan_temp); + if (round_to_unit != 0) { + // Round up to align_object_offset boundary + // see code for InstanceKlass::start_of_itable! + // Was: round_to(scan_temp, BytesPerLong); + // Hoisted: add(scan_temp, BytesPerLong-1, scan_temp); + and3(scan_temp, -round_to_unit, scan_temp); + } + add(recv_klass, scan_temp, scan_temp); + + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + RegisterOrConstant itable_offset = itable_index; + itable_offset = regcon_sll_ptr(itable_index, exact_log2(itableMethodEntry::size() * wordSize), itable_offset); + itable_offset = regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes(), itable_offset); + add(recv_klass, ensure_simm13_or_reg(itable_offset, sethi_temp), recv_klass); + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label L_search, L_found_method; + + for (int peel = 1; peel >= 0; peel--) { + // %%%% Could load both offset and interface in one ldx, if they were + // in the opposite order. This would save a load. + ld_ptr(scan_temp, itableOffsetEntry::interface_offset_in_bytes(), method_result); + + // Check that this entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + bpr(Assembler::rc_z, false, Assembler::pn, method_result, did_save ? L_no_such_interface_restore : L_no_such_interface); + delayed()->cmp(method_result, intf_klass); + + if (peel) { + brx(Assembler::equal, false, Assembler::pt, L_found_method); + } else { + brx(Assembler::notEqual, false, Assembler::pn, L_search); + // (invert the test to fall through to found_method...) + } + delayed()->add(scan_temp, scan_step, scan_temp); + + if (!peel) break; + + bind(L_search); + } + + bind(L_found_method); + + // Got a hit. + int ito_offset = itableOffsetEntry::offset_offset_in_bytes(); + // scan_temp[-scan_step] points to the vtable offset we need + ito_offset -= scan_step; + lduw(scan_temp, ito_offset, scan_temp); + ld_ptr(recv_klass, scan_temp, method_result); + + if (did_save) { + Label L_done; + ba(L_done); + delayed()->restore(); + + bind(L_no_such_interface_restore); + ba(L_no_such_interface); + delayed()->restore(); + + bind(L_done); + } +} + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg()); + Register sethi_temp = method_result; + const int base = (InstanceKlass::vtable_start_offset() * wordSize + + // method pointer offset within the vtable entry: + vtableEntry::method_offset_in_bytes()); + RegisterOrConstant vtable_offset = vtable_index; + // Each of the following three lines potentially generates an instruction. + // But the total number of address formation instructions will always be + // at most two, and will often be zero. In any case, it will be optimal. + // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x). + // If vtable_index is a constant, we will have at most (set B+X<is_global()) sub_2 = L0; + if (!sup_2->is_global()) sup_2 = L1; + bool did_save = false; + if (temp_reg == noreg || temp2_reg == noreg) { + temp_reg = L2; + temp2_reg = L3; + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); + sub_klass = sub_2; + super_klass = sup_2; + did_save = true; + } + Label L_failure, L_pop_to_failure, L_pop_to_success; + check_klass_subtype_fast_path(sub_klass, super_klass, + temp_reg, temp2_reg, + (did_save ? &L_pop_to_success : &L_success), + (did_save ? &L_pop_to_failure : &L_failure), NULL); + + if (!did_save) + save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2); + check_klass_subtype_slow_path(sub_2, sup_2, + L2, L3, L4, L5, + NULL, &L_pop_to_failure); + + // on success: + bind(L_pop_to_success); + restore(); + ba_short(L_success); + + // on failure: + bind(L_pop_to_failure); + restore(); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + bool need_slow_path = (must_load_sco || + super_check_offset.constant_or_zero() == sco_offset); + + assert_different_registers(sub_klass, super_klass, temp_reg); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, temp_reg, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp2_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1 || + (L_slow_path == &L_fallthrough && label_nulls <= 2 && !need_slow_path), + "at most one NULL in the batch, usually"); + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmp(super_klass, sub_klass); + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->nop(); + + // Check the supertype display: + if (must_load_sco) { + // The super check offset is always positive... + lduw(super_klass, sco_offset, temp2_reg); + super_check_offset = RegisterOrConstant(temp2_reg); + // super_check_offset is register. + assert_different_registers(sub_klass, super_klass, temp_reg, super_check_offset.as_register()); + } + ld_ptr(sub_klass, super_check_offset, temp_reg); + cmp(super_klass, temp_reg); + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + // Hacked ba(), which may only be used just before L_fallthrough. +#define FINAL_JUMP(label) \ + if (&(label) != &L_fallthrough) { \ + ba(label); delayed()->nop(); \ + } + + if (super_check_offset.is_register()) { + brx(Assembler::equal, false, Assembler::pn, *L_success); + delayed()->cmp(super_check_offset.as_register(), sc_offset); + + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_slow_path); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_slow_path); + delayed()->nop(); + FINAL_JUMP(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + brx(Assembler::equal, false, Assembler::pt, *L_success); + delayed()->nop(); + } else { + brx(Assembler::notEqual, false, Assembler::pn, *L_failure); + delayed()->nop(); + FINAL_JUMP(*L_success); + } + } + + bind(L_fallthrough); + +#undef FINAL_JUMP +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register count_temp, + Register scan_temp, + Register scratch_reg, + Register coop_reg, + Label* L_success, + Label* L_failure) { + assert_different_registers(sub_klass, super_klass, + count_temp, scan_temp, scratch_reg, coop_reg); + + Label L_fallthrough, L_loop; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + inc_counter((address) pst_counter, count_temp, scan_temp); +#endif + + // We will consult the secondary-super array. + ld_ptr(sub_klass, ss_offset, scan_temp); + + Register search_key = super_klass; + + // Load the array length. (Positive movl does right thing on LP64.) + lduw(scan_temp, Array::length_offset_in_bytes(), count_temp); + + // Check for empty secondary super list + tst(count_temp); + + // In the array of super classes elements are pointer sized. + int element_size = wordSize; + + // Top of search loop + bind(L_loop); + br(Assembler::equal, false, Assembler::pn, *L_failure); + delayed()->add(scan_temp, element_size, scan_temp); + + // Skip the array header in all array accesses. + int elem_offset = Array::base_offset_in_bytes(); + elem_offset -= element_size; // the scan pointer was pre-incremented also + + // Load next super to check + ld_ptr( scan_temp, elem_offset, scratch_reg ); + + // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list + cmp(scratch_reg, search_key); + + // A miss means we are NOT a subtype and need to keep looping + brx(Assembler::notEqual, false, Assembler::pn, L_loop); + delayed()->deccc(count_temp); // decrement trip counter in delay slot + + // Success. Cache the super we found and proceed in triumph. + st_ptr(super_klass, sub_klass, sc_offset); + + if (L_success != &L_fallthrough) { + ba(*L_success); + delayed()->nop(); + } + + bind(L_fallthrough); +} + + +RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = extra_slot_offset * stackElementSize; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + return offset; + } else { + assert(temp_reg != noreg, "must specify"); + sll_ptr(arg_slot.as_register(), exact_log2(stackElementSize), temp_reg); + if (offset != 0) + add(temp_reg, offset, temp_reg); + return temp_reg; + } +} + + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset) { + return Address(Gargs, argument_offset(arg_slot, temp_reg, extra_slot_offset)); +} + + +void MacroAssembler::biased_locking_enter(Register obj_reg, Register mark_reg, + Register temp_reg, + Label& done, Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + + if (PrintBiasedLockingStatistics) { + assert_different_registers(obj_reg, mark_reg, temp_reg, O7); + if (counters == NULL) + counters = BiasedLocking::counters(); + } + + Label cas_label; + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + and3(mark_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); + cmp_and_brx_short(temp_reg, markOopDesc::biased_lock_pattern, Assembler::notEqual, Assembler::pn, cas_label); + + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + or3(G2_thread, temp_reg, temp_reg); + xor3(mark_reg, temp_reg, temp_reg); + andcc(temp_reg, ~((int) markOopDesc::age_mask_in_place), temp_reg); + if (counters != NULL) { + cond_inc(Assembler::equal, (address) counters->biased_lock_entry_count_addr(), mark_reg, temp_reg); + // Reload mark_reg as we may need it later + ld_ptr(Address(obj_reg, oopDesc::mark_offset_in_bytes()), mark_reg); + } + brx(Assembler::equal, true, Assembler::pt, done); + delayed()->nop(); + + Label try_revoke_bias; + Label try_rebias; + Address mark_addr = Address(obj_reg, oopDesc::mark_offset_in_bytes()); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + btst(markOopDesc::biased_lock_mask_in_place, temp_reg); + brx(Assembler::notZero, false, Assembler::pn, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + delayed()->btst(markOopDesc::epoch_mask_in_place, temp_reg); + brx(Assembler::notZero, false, Assembler::pn, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + delayed()->and3(mark_reg, + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, + mark_reg); + or3(G2_thread, mark_reg, temp_reg); + casn(mark_addr.base(), mark_reg, temp_reg); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + cmp(mark_reg, temp_reg); + if (counters != NULL) { + cond_inc(Assembler::zero, (address) counters->anonymously_biased_lock_entry_count_addr(), mark_reg, temp_reg); + } + if (slow_case != NULL) { + brx(Assembler::notEqual, true, Assembler::pn, *slow_case); + delayed()->nop(); + } + ba_short(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + or3(G2_thread, temp_reg, temp_reg); + casn(mark_addr.base(), mark_reg, temp_reg); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + cmp(mark_reg, temp_reg); + if (counters != NULL) { + cond_inc(Assembler::zero, (address) counters->rebiased_lock_entry_count_addr(), mark_reg, temp_reg); + } + if (slow_case != NULL) { + brx(Assembler::notEqual, true, Assembler::pn, *slow_case); + delayed()->nop(); + } + ba_short(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_klass(obj_reg, temp_reg); + ld_ptr(Address(temp_reg, Klass::prototype_header_offset()), temp_reg); + casn(mark_addr.base(), mark_reg, temp_reg); + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cmp(mark_reg, temp_reg); + cond_inc(Assembler::zero, (address) counters->revoked_lock_entry_count_addr(), mark_reg, temp_reg); + } + + bind(cas_label); +} + +void MacroAssembler::biased_locking_exit (Address mark_addr, Register temp_reg, Label& done, + bool allow_delay_slot_filling) { + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + ld_ptr(mark_addr, temp_reg); + and3(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg); + cmp(temp_reg, markOopDesc::biased_lock_pattern); + brx(Assembler::equal, allow_delay_slot_filling, Assembler::pt, done); + delayed(); + if (!allow_delay_slot_filling) { + nop(); + } +} + + +// CASN -- 32-64 bit switch hitter similar to the synthetic CASN provided by +// Solaris/SPARC's "as". Another apt name would be cas_ptr() + +void MacroAssembler::casn (Register addr_reg, Register cmp_reg, Register set_reg ) { + casx_under_lock (addr_reg, cmp_reg, set_reg, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); +} + + + +// compiler_lock_object() and compiler_unlock_object() are direct transliterations +// of i486.ad fast_lock() and fast_unlock(). See those methods for detailed comments. +// The code could be tightened up considerably. +// +// box->dhw disposition - post-conditions at DONE_LABEL. +// - Successful inflated lock: box->dhw != 0. +// Any non-zero value suffices. +// Consider G2_thread, rsp, boxReg, or unused_mark() +// - Successful Stack-lock: box->dhw == mark. +// box->dhw must contain the displaced mark word value +// - Failure -- icc.ZFlag == 0 and box->dhw is undefined. +// The slow-path fast_enter() and slow_enter() operators +// are responsible for setting box->dhw = NonZero (typically ::unused_mark). +// - Biased: box->dhw is undefined +// +// SPARC refworkload performance - specifically jetstream and scimark - are +// extremely sensitive to the size of the code emitted by compiler_lock_object +// and compiler_unlock_object. Critically, the key factor is code size, not path +// length. (Simply experiments to pad CLO with unexecuted NOPs demonstrte the +// effect). + + +void MacroAssembler::compiler_lock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + BiasedLockingCounters* counters, + bool try_bias) { + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + + verify_oop(Roop); + Label done ; + + if (counters != NULL) { + inc_counter((address) counters->total_entry_count_addr(), Rmark, Rscratch); + } + + if (EmitSync & 1) { + mov(3, Rscratch); + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + cmp(SP, G0); + return ; + } + + if (EmitSync & 2) { + + // Fetch object's markword + ld_ptr(mark_addr, Rmark); + + if (try_bias) { + biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); + } + + // Save Rbox in Rscratch to be used for the cas operation + mov(Rbox, Rscratch); + + // set Rmark to markOop | markOopDesc::unlocked_value + or3(Rmark, markOopDesc::unlocked_value, Rmark); + + // Initialize the box. (Must happen before we update the object mark!) + st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + + // compare object markOop with Rmark and if equal exchange Rscratch with object markOop + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casx_under_lock(mark_addr.base(), Rmark, Rscratch, + (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + + // if compare/exchange succeeded we found an unlocked object and we now have locked it + // hence we are done + cmp(Rmark, Rscratch); +#ifdef _LP64 + sub(Rscratch, STACK_BIAS, Rscratch); +#endif + brx(Assembler::equal, false, Assembler::pt, done); + delayed()->sub(Rscratch, SP, Rscratch); //pull next instruction into delay slot + + // we did not find an unlocked object so see if this is a recursive case + // sub(Rscratch, SP, Rscratch); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + andcc(Rscratch, 0xfffff003, Rscratch); + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + bind (done); + return ; + } + + Label Egress ; + + if (EmitSync & 256) { + Label IsInflated ; + + ld_ptr(mark_addr, Rmark); // fetch obj->mark + // Triage: biased, stack-locked, neutral, inflated + if (try_bias) { + biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); + // Invariant: if control reaches this point in the emitted stream + // then Rmark has not been modified. + } + + // Store mark into displaced mark field in the on-stack basic-lock "box" + // Critically, this must happen before the CAS + // Maximize the ST-CAS distance to minimize the ST-before-CAS penalty. + st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + andcc(Rmark, 2, G0); + brx(Assembler::notZero, false, Assembler::pn, IsInflated); + delayed()-> + + // Try stack-lock acquisition. + // Beware: the 1st instruction is in a delay slot + mov(Rbox, Rscratch); + or3(Rmark, markOopDesc::unlocked_value, Rmark); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casn(mark_addr.base(), Rmark, Rscratch); + cmp(Rmark, Rscratch); + brx(Assembler::equal, false, Assembler::pt, done); + delayed()->sub(Rscratch, SP, Rscratch); + + // Stack-lock attempt failed - check for recursive stack-lock. + // See the comments below about how we might remove this case. +#ifdef _LP64 + sub(Rscratch, STACK_BIAS, Rscratch); +#endif + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + andcc(Rscratch, 0xfffff003, Rscratch); + br(Assembler::always, false, Assembler::pt, done); + delayed()-> st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + + bind(IsInflated); + if (EmitSync & 64) { + // If m->owner != null goto IsLocked + // Pessimistic form: Test-and-CAS vs CAS + // The optimistic form avoids RTS->RTO cache line upgrades. + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + // m->owner == null : it's unlocked. + } + + // Try to CAS m->owner from null to Self + // Invariant: if we acquire the lock then _recursions should be 0. + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // Intentional fall-through into done + } else { + // Aggressively avoid the Store-before-CAS penalty + // Defer the store into box->dhw until after the CAS + Label IsInflated, Recursive ; + +// Anticipate CAS -- Avoid RTS->RTO upgrade +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + + ld_ptr(mark_addr, Rmark); // fetch obj->mark + // Triage: biased, stack-locked, neutral, inflated + + if (try_bias) { + biased_locking_enter(Roop, Rmark, Rscratch, done, NULL, counters); + // Invariant: if control reaches this point in the emitted stream + // then Rmark has not been modified. + } + andcc(Rmark, 2, G0); + brx(Assembler::notZero, false, Assembler::pn, IsInflated); + delayed()-> // Beware - dangling delay-slot + + // Try stack-lock acquisition. + // Transiently install BUSY (0) encoding in the mark word. + // if the CAS of 0 into the mark was successful then we execute: + // ST box->dhw = mark -- save fetched mark in on-stack basiclock box + // ST obj->mark = box -- overwrite transient 0 value + // This presumes TSO, of course. + + mov(0, Rscratch); + or3(Rmark, markOopDesc::unlocked_value, Rmark); + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casn(mark_addr.base(), Rmark, Rscratch); +// prefetch (mark_addr, Assembler::severalWritesAndPossiblyReads); + cmp(Rscratch, Rmark); + brx(Assembler::notZero, false, Assembler::pn, Recursive); + delayed()->st_ptr(Rmark, Rbox, BasicLock::displaced_header_offset_in_bytes()); + if (counters != NULL) { + cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); + } + ba(done); + delayed()->st_ptr(Rbox, mark_addr); + + bind(Recursive); + // Stack-lock attempt failed - check for recursive stack-lock. + // Tests show that we can remove the recursive case with no impact + // on refworkload 0.83. If we need to reduce the size of the code + // emitted by compiler_lock_object() the recursive case is perfect + // candidate. + // + // A more extreme idea is to always inflate on stack-lock recursion. + // This lets us eliminate the recursive checks in compiler_lock_object + // and compiler_unlock_object and the (box->dhw == 0) encoding. + // A brief experiment - requiring changes to synchronizer.cpp, interpreter, + // and showed a performance *increase*. In the same experiment I eliminated + // the fast-path stack-lock code from the interpreter and always passed + // control to the "slow" operators in synchronizer.cpp. + + // RScratch contains the fetched obj->mark value from the failed CASN. +#ifdef _LP64 + sub(Rscratch, STACK_BIAS, Rscratch); +#endif + sub(Rscratch, SP, Rscratch); + assert(os::vm_page_size() > 0xfff, "page size too small - change the constant"); + andcc(Rscratch, 0xfffff003, Rscratch); + if (counters != NULL) { + // Accounting needs the Rscratch register + st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + cond_inc(Assembler::equal, (address) counters->fast_path_entry_count_addr(), Rmark, Rscratch); + ba_short(done); + } else { + ba(done); + delayed()->st_ptr(Rscratch, Rbox, BasicLock::displaced_header_offset_in_bytes()); + } + + bind (IsInflated); + if (EmitSync & 64) { + // If m->owner != null goto IsLocked + // Test-and-CAS vs CAS + // Pessimistic form avoids futile (doomed) CAS attempts + // The optimistic form avoids RTS->RTO cache line upgrades. + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + // m->owner == null : it's unlocked. + } + + // Try to CAS m->owner from null to Self + // Invariant: if we acquire the lock then _recursions should be 0. + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); + cmp(Rscratch, G0); + // ST box->displaced_header = NonZero. + // Any non-zero value suffices: + // unused_mark(), G2_thread, RBox, RScratch, rsp, etc. + st_ptr(Rbox, Rbox, BasicLock::displaced_header_offset_in_bytes()); + // Intentional fall-through into done + } + + bind (done); +} + +void MacroAssembler::compiler_unlock_object(Register Roop, Register Rmark, + Register Rbox, Register Rscratch, + bool try_bias) { + Address mark_addr(Roop, oopDesc::mark_offset_in_bytes()); + + Label done ; + + if (EmitSync & 4) { + cmp(SP, G0); + return ; + } + + if (EmitSync & 8) { + if (try_bias) { + biased_locking_exit(mark_addr, Rscratch, done); + } + + // Test first if it is a fast recursive unlock + ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rmark); + br_null_short(Rmark, Assembler::pt, done); + + // Check if it is still a light weight lock, this is is true if we see + // the stack address of the basicLock in the markOop of the object + assert(mark_addr.disp() == 0, "cas must take a zero displacement"); + casx_under_lock(mark_addr.base(), Rbox, Rmark, + (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + ba(done); + delayed()->cmp(Rbox, Rmark); + bind(done); + return ; + } + + // Beware ... If the aggregate size of the code emitted by CLO and CUO is + // is too large performance rolls abruptly off a cliff. + // This could be related to inlining policies, code cache management, or + // I$ effects. + Label LStacked ; + + if (try_bias) { + // TODO: eliminate redundant LDs of obj->mark + biased_locking_exit(mark_addr, Rscratch, done); + } + + ld_ptr(Roop, oopDesc::mark_offset_in_bytes(), Rmark); + ld_ptr(Rbox, BasicLock::displaced_header_offset_in_bytes(), Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::zero, false, Assembler::pn, done); + delayed()->nop(); // consider: relocate fetch of mark, above, into this DS + andcc(Rmark, 2, G0); + brx(Assembler::zero, false, Assembler::pt, LStacked); + delayed()->nop(); + + // It's inflated + // Conceptually we need a #loadstore|#storestore "release" MEMBAR before + // the ST of 0 into _owner which releases the lock. This prevents loads + // and stores within the critical section from reordering (floating) + // past the store that releases the lock. But TSO is a strong memory model + // and that particular flavor of barrier is a noop, so we can safely elide it. + // Note that we use 1-0 locking by default for the inflated case. We + // close the resultant (and rare) race by having contented threads in + // monitorenter periodically poll _owner. + ld_ptr(Rmark, ObjectMonitor::owner_offset_in_bytes() - 2, Rscratch); + ld_ptr(Rmark, ObjectMonitor::recursions_offset_in_bytes() - 2, Rbox); + xor3(Rscratch, G2_thread, Rscratch); + orcc(Rbox, Rscratch, Rbox); + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()-> + ld_ptr(Rmark, ObjectMonitor::EntryList_offset_in_bytes() - 2, Rscratch); + ld_ptr(Rmark, ObjectMonitor::cxq_offset_in_bytes() - 2, Rbox); + orcc(Rbox, Rscratch, G0); + if (EmitSync & 65536) { + Label LSucc ; + brx(Assembler::notZero, false, Assembler::pn, LSucc); + delayed()->nop(); + ba(done); + delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + + bind(LSucc); + st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + if (os::is_MP()) { membar (StoreLoad); } + ld_ptr(Rmark, ObjectMonitor::succ_offset_in_bytes() - 2, Rscratch); + andcc(Rscratch, Rscratch, G0); + brx(Assembler::notZero, false, Assembler::pt, done); + delayed()->andcc(G0, G0, G0); + add(Rmark, ObjectMonitor::owner_offset_in_bytes()-2, Rmark); + mov(G2_thread, Rscratch); + casn(Rmark, G0, Rscratch); + // invert icc.zf and goto done + br_notnull(Rscratch, false, Assembler::pt, done); + delayed()->cmp(G0, G0); + ba(done); + delayed()->cmp(G0, 1); + } else { + brx(Assembler::notZero, false, Assembler::pn, done); + delayed()->nop(); + ba(done); + delayed()->st_ptr(G0, Rmark, ObjectMonitor::owner_offset_in_bytes() - 2); + } + + bind (LStacked); + // Consider: we could replace the expensive CAS in the exit + // path with a simple ST of the displaced mark value fetched from + // the on-stack basiclock box. That admits a race where a thread T2 + // in the slow lock path -- inflating with monitor M -- could race a + // thread T1 in the fast unlock path, resulting in a missed wakeup for T2. + // More precisely T1 in the stack-lock unlock path could "stomp" the + // inflated mark value M installed by T2, resulting in an orphan + // object monitor M and T2 becoming stranded. We can remedy that situation + // by having T2 periodically poll the object's mark word using timed wait + // operations. If T2 discovers that a stomp has occurred it vacates + // the monitor M and wakes any other threads stranded on the now-orphan M. + // In addition the monitor scavenger, which performs deflation, + // would also need to check for orpan monitors and stranded threads. + // + // Finally, inflation is also used when T2 needs to assign a hashCode + // to O and O is stack-locked by T1. The "stomp" race could cause + // an assigned hashCode value to be lost. We can avoid that condition + // and provide the necessary hashCode stability invariants by ensuring + // that hashCode generation is idempotent between copying GCs. + // For example we could compute the hashCode of an object O as + // O's heap address XOR some high quality RNG value that is refreshed + // at GC-time. The monitor scavenger would install the hashCode + // found in any orphan monitors. Again, the mechanism admits a + // lost-update "stomp" WAW race but detects and recovers as needed. + // + // A prototype implementation showed excellent results, although + // the scavenger and timeout code was rather involved. + + casn(mark_addr.base(), Rbox, Rscratch); + cmp(Rbox, Rscratch); + // Intentional fall through into done ... + + bind(done); +} + + + +void MacroAssembler::print_CPU_state() { + // %%%%% need to implement this +} + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + // %%%%% need to implement this +} + +void MacroAssembler::push_IU_state() { + // %%%%% need to implement this +} + + +void MacroAssembler::pop_IU_state() { + // %%%%% need to implement this +} + + +void MacroAssembler::push_FPU_state() { + // %%%%% need to implement this +} + + +void MacroAssembler::pop_FPU_state() { + // %%%%% need to implement this +} + + +void MacroAssembler::push_CPU_state() { + // %%%%% need to implement this +} + + +void MacroAssembler::pop_CPU_state() { + // %%%%% need to implement this +} + + + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, next2, ok; + Register t1 = L0; + Register t2 = L1; + Register t3 = L2; + + save_frame(0); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2); + or3(t1, t2, t3); + cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next); + STOP("assert(top >= start)"); + should_not_reach_here(); + + bind(next); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), t1); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2); + or3(t3, t2, t3); + cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2); + STOP("assert(top <= end)"); + should_not_reach_here(); + + bind(next2); + and3(t3, MinObjAlignmentInBytesMask, t3); + cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok); + STOP("assert(aligned)"); + should_not_reach_here(); + + bind(ok); + restore(); + } +#endif +} + + +void MacroAssembler::eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails +){ + // make sure arguments make sense + assert_different_registers(obj, var_size_in_bytes, t1, t2); + assert(0 <= con_size_in_bytes && Assembler::is_simm13(con_size_in_bytes), "illegal object size"); + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); + + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + ba_short(slow_case); + } else { + // get eden boundaries + // note: we need both top & top_addr! + const Register top_addr = t1; + const Register end = t2; + + CollectedHeap* ch = Universe::heap(); + set((intx)ch->top_addr(), top_addr); + intx delta = (intx)ch->end_addr() - (intx)ch->top_addr(); + ld_ptr(top_addr, delta, end); + ld_ptr(top_addr, 0, obj); + + // try to allocate + Label retry; + bind(retry); +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, obj); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT + const Register free = end; + sub(end, obj, free); // compute amount of free space + if (var_size_in_bytes->is_valid()) { + // size is unknown at compile time + cmp(free, var_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, var_size_in_bytes, end); + } else { + // size is known at compile time + cmp(free, con_size_in_bytes); + br(Assembler::lessUnsigned, false, Assembler::pn, slow_case); // if there is not enough space go the slow case + delayed()->add(obj, con_size_in_bytes, end); + } + // Compare obj with the value at top_addr; if still equal, swap the value of + // end with the value at top_addr. If not equal, read the value at top_addr + // into end. + casx_under_lock(top_addr, obj, end, (address)StubRoutines::Sparc::atomic_memory_operation_lock_addr()); + // if someone beat us on the allocation, try again, otherwise continue + cmp(obj, end); + brx(Assembler::notEqual, false, Assembler::pn, retry); + delayed()->mov(end, obj); // nop if successfull since obj == end + +#ifdef ASSERT + // make sure eden top is properly aligned + { + Label L; + const Register top_addr = t1; + + set((intx)ch->top_addr(), top_addr); + ld_ptr(top_addr, 0, top_addr); + btst(MinObjAlignmentInBytesMask, top_addr); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("eden top is not properly aligned"); + bind(L); + } +#endif // ASSERT + } +} + + +void MacroAssembler::tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails +){ + // make sure arguments make sense + assert_different_registers(obj, var_size_in_bytes, t1); + assert(0 <= con_size_in_bytes && is_simm13(con_size_in_bytes), "illegal object size"); + assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0, "object size is not multiple of alignment"); + + const Register free = t1; + + verify_tlab(); + + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), obj); + + // calculate amount of free space + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), free); + sub(free, obj, free); + + Label done; + if (var_size_in_bytes == noreg) { + cmp(free, con_size_in_bytes); + } else { + cmp(free, var_size_in_bytes); + } + br(Assembler::less, false, Assembler::pn, slow_case); + // calculate the new top pointer + if (var_size_in_bytes == noreg) { + delayed()->add(obj, con_size_in_bytes, free); + } else { + delayed()->add(obj, var_size_in_bytes, free); + } + + bind(done); + +#ifdef ASSERT + // make sure new free pointer is properly aligned + { + Label L; + btst(MinObjAlignmentInBytesMask, free); + br(Assembler::zero, false, Assembler::pt, L); + delayed()->nop(); + STOP("updated TLAB free is not properly aligned"); + bind(L); + } +#endif // ASSERT + + // update the tlab top pointer + st_ptr(free, G2_thread, in_bytes(JavaThread::tlab_top_offset())); + verify_tlab(); +} + + +void MacroAssembler::tlab_refill(Label& retry, Label& try_eden, Label& slow_case) { + Register top = O0; + Register t1 = G1; + Register t2 = G3; + Register t3 = O1; + assert_different_registers(top, t1, t2, t3, G4, G5 /* preserve G4 and G5 */); + Label do_refill, discard_tlab; + + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + ba_short(slow_case); + } + + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_top_offset()), top); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t1); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset()), t2); + + // calculate amount of free space + sub(t1, top, t1); + srl_ptr(t1, LogHeapWordSize, t1); + + // Retain tlab and allocate object in shared space if + // the amount free in the tlab is too large to discard. + cmp(t1, t2); + brx(Assembler::lessEqual, false, Assembler::pt, discard_tlab); + + // increment waste limit to prevent getting stuck on this slow path + delayed()->add(t2, ThreadLocalAllocBuffer::refill_waste_limit_increment(), t2); + st_ptr(t2, G2_thread, in_bytes(JavaThread::tlab_refill_waste_limit_offset())); + if (TLABStats) { + // increment number of slow_allocations + ld(G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset()), t2); + add(t2, 1, t2); + stw(t2, G2_thread, in_bytes(JavaThread::tlab_slow_allocations_offset())); + } + ba_short(try_eden); + + bind(discard_tlab); + if (TLABStats) { + // increment number of refills + ld(G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset()), t2); + add(t2, 1, t2); + stw(t2, G2_thread, in_bytes(JavaThread::tlab_number_of_refills_offset())); + // accumulate wastage + ld(G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset()), t2); + add(t2, t1, t2); + stw(t2, G2_thread, in_bytes(JavaThread::tlab_fast_refill_waste_offset())); + } + + // if tlab is currently allocated (top or end != null) then + // fill [top, end + alignment_reserve) with array object + br_null_short(top, Assembler::pn, do_refill); + + set((intptr_t)markOopDesc::prototype()->copy_set_hash(0x2), t2); + st_ptr(t2, top, oopDesc::mark_offset_in_bytes()); // set up the mark word + // set klass to intArrayKlass + sub(t1, typeArrayOopDesc::header_size(T_INT), t1); + add(t1, ThreadLocalAllocBuffer::alignment_reserve(), t1); + sll_ptr(t1, log2_intptr(HeapWordSize/sizeof(jint)), t1); + st(t1, top, arrayOopDesc::length_offset_in_bytes()); + set((intptr_t)Universe::intArrayKlassObj_addr(), t2); + ld_ptr(t2, 0, t2); + // store klass last. concurrent gcs assumes klass length is valid if + // klass field is not null. + store_klass(t2, top); + verify_oop(top); + + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t1); + sub(top, t1, t1); // size of tlab's allocated portion + incr_allocated_bytes(t1, t2, t3); + + // refill the tlab with an eden allocation + bind(do_refill); + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t1); + sll_ptr(t1, LogHeapWordSize, t1); + // allocate new tlab, address returned in top + eden_allocate(top, t1, 0, t2, t3, slow_case); + + st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_start_offset())); + st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_top_offset())); +#ifdef ASSERT + // check that tlab_size (t1) is still valid + { + Label ok; + ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2); + sll_ptr(t2, LogHeapWordSize, t2); + cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok); + STOP("assert(t1 == tlab_size)"); + should_not_reach_here(); + + bind(ok); + } +#endif // ASSERT + add(top, t1, top); // t1 is tlab_size + sub(top, ThreadLocalAllocBuffer::alignment_reserve_in_bytes(), top); + st_ptr(top, G2_thread, in_bytes(JavaThread::tlab_end_offset())); + verify_tlab(); + ba_short(retry); +} + +void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, + Register t1, Register t2) { + // Bump total bytes allocated by this thread + assert(t1->is_global(), "must be global reg"); // so all 64 bits are saved on a context switch + assert_different_registers(size_in_bytes.register_or_noreg(), t1, t2); + // v8 support has gone the way of the dodo + ldx(G2_thread, in_bytes(JavaThread::allocated_bytes_offset()), t1); + add(t1, ensure_simm13_or_reg(size_in_bytes, t2), t1); + stx(t1, G2_thread, in_bytes(JavaThread::allocated_bytes_offset())); +} + +Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { + switch (cond) { + // Note some conditions are synonyms for others + case Assembler::never: return Assembler::always; + case Assembler::zero: return Assembler::notZero; + case Assembler::lessEqual: return Assembler::greater; + case Assembler::less: return Assembler::greaterEqual; + case Assembler::lessEqualUnsigned: return Assembler::greaterUnsigned; + case Assembler::lessUnsigned: return Assembler::greaterEqualUnsigned; + case Assembler::negative: return Assembler::positive; + case Assembler::overflowSet: return Assembler::overflowClear; + case Assembler::always: return Assembler::never; + case Assembler::notZero: return Assembler::zero; + case Assembler::greater: return Assembler::lessEqual; + case Assembler::greaterEqual: return Assembler::less; + case Assembler::greaterUnsigned: return Assembler::lessEqualUnsigned; + case Assembler::greaterEqualUnsigned: return Assembler::lessUnsigned; + case Assembler::positive: return Assembler::negative; + case Assembler::overflowClear: return Assembler::overflowSet; + } + + ShouldNotReachHere(); return Assembler::overflowClear; +} + +void MacroAssembler::cond_inc(Assembler::Condition cond, address counter_ptr, + Register Rtmp1, Register Rtmp2 /*, Register Rtmp3, Register Rtmp4 */) { + Condition negated_cond = negate_condition(cond); + Label L; + brx(negated_cond, false, Assembler::pt, L); + delayed()->nop(); + inc_counter(counter_ptr, Rtmp1, Rtmp2); + bind(L); +} + +void MacroAssembler::inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2) { + AddressLiteral addrlit(counter_addr); + sethi(addrlit, Rtmp1); // Move hi22 bits into temporary register. + Address addr(Rtmp1, addrlit.low10()); // Build an address with low10 bits. + ld(addr, Rtmp2); + inc(Rtmp2); + st(Rtmp2, addr); +} + +void MacroAssembler::inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2) { + inc_counter((address) counter_addr, Rtmp1, Rtmp2); +} + +SkipIfEqual::SkipIfEqual( + MacroAssembler* masm, Register temp, const bool* flag_addr, + Assembler::Condition condition) { + _masm = masm; + AddressLiteral flag(flag_addr); + _masm->sethi(flag, temp); + _masm->ldub(temp, flag.low10(), temp); + _masm->tst(temp); + _masm->br(condition, false, Assembler::pt, _label); + _masm->delayed()->nop(); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} + + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tsp and scratch. +void MacroAssembler::bang_stack_size(Register Rsize, Register Rtsp, + Register Rscratch) { + // Use stack pointer in temp stack pointer + mov(SP, Rtsp); + + // Bang stack for total size given plus stack shadow page size. + // Bang one page at a time because a large size can overflow yellow and + // red zones (the bang will fail but stack overflow handling can't tell that + // it was a stack overflow bang vs a regular segv). + int offset = os::vm_page_size(); + Register Roffset = Rscratch; + + Label loop; + bind(loop); + set((-offset)+STACK_BIAS, Rscratch); + st(G0, Rtsp, Rscratch); + set(offset, Roffset); + sub(Rsize, Roffset, Rsize); + cmp(Rsize, G0); + br(Assembler::greater, false, Assembler::pn, loop); + delayed()->sub(Rtsp, Roffset, Rtsp); + + // Bang down shadow pages too. + // The -1 because we already subtracted 1 page. + for (int i = 0; i< StackShadowPages-1; i++) { + set((-i*offset)+STACK_BIAS, Rscratch); + st(G0, Rtsp, Rscratch); + } +} + +/////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +static address satb_log_enqueue_with_frame = NULL; +static u_char* satb_log_enqueue_with_frame_end = NULL; + +static address satb_log_enqueue_frameless = NULL; +static u_char* satb_log_enqueue_frameless_end = NULL; + +static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions? + +static void generate_satb_log_enqueue(bool with_frame) { + BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize); + CodeBuffer buf(bb); + MacroAssembler masm(&buf); + +#define __ masm. + + address start = __ pc(); + Register pre_val; + + Label refill, restart; + if (with_frame) { + __ save_frame(0); + pre_val = I0; // Was O0 before the save. + } else { + pre_val = O0; + } + + int satb_q_index_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index()); + + int satb_q_buf_byte_offset = + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf()); + + assert(in_bytes(PtrQueue::byte_width_of_index()) == sizeof(intptr_t) && + in_bytes(PtrQueue::byte_width_of_buf()) == sizeof(intptr_t), + "check sizes in assembly below"); + + __ bind(restart); + + // Load the index into the SATB buffer. PtrQueue::_index is a size_t + // so ld_ptr is appropriate. + __ ld_ptr(G2_thread, satb_q_index_byte_offset, L0); + + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, satb_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(pre_val, L1, L0); // [_buf + index] := I0 + if (!with_frame) { + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } else { + // Not delayed. + __ st_ptr(L0, G2_thread, satb_q_index_byte_offset); + } + if (with_frame) { + __ ret(); + __ delayed()->restore(); + } + __ bind(refill); + + address handle_zero = + CAST_FROM_FN_PTR(address, + &SATBMarkQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + __ mov(G1_scratch, L0); + __ mov(G3_scratch, L1); + __ mov(G4, L2); + // We need the value of O0 above (for the write into the buffer), so we + // save and restore it. + __ mov(O0, L3); + // Since the call will overwrite O7, we save and restore that, as well. + __ mov(O7, L4); + __ call_VM_leaf(L5, handle_zero, G2_thread); + __ mov(L0, G1_scratch); + __ mov(L1, G3_scratch); + __ mov(L2, G4); + __ mov(L3, O0); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); + + if (with_frame) { + satb_log_enqueue_with_frame = start; + satb_log_enqueue_with_frame_end = __ pc(); + } else { + satb_log_enqueue_frameless = start; + satb_log_enqueue_frameless_end = __ pc(); + } + +#undef __ +} + +static inline void generate_satb_log_enqueue_if_necessary(bool with_frame) { + if (with_frame) { + if (satb_log_enqueue_with_frame == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_with_frame != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated with-frame satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_with_frame, + satb_log_enqueue_with_frame_end, + tty); + } + } + } else { + if (satb_log_enqueue_frameless == 0) { + generate_satb_log_enqueue(with_frame); + assert(satb_log_enqueue_frameless != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated frameless satb enqueue:"); + Disassembler::decode((u_char*)satb_log_enqueue_frameless, + satb_log_enqueue_frameless_end, + tty); + } + } + } +} + +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register index, + int offset, + Register pre_val, + Register tmp, + bool preserve_o_regs) { + Label filtered; + + if (obj == noreg) { + // We are not loading the previous value so make + // sure that we don't trash the value in pre_val + // with the code below. + assert_different_registers(pre_val, tmp); + } else { + // We will be loading the previous value + // in this code so... + assert(offset == 0 || index == noreg, "choose one"); + assert(pre_val == noreg, "check this code"); + } + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + ld(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } else { + guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1, + "Assumption"); + ldsb(G2, + in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active()), + tmp); + } + + // Is marking active? + cmp_and_br_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); + + // Do we need to load the previous value? + if (obj != noreg) { + // Load the previous value... + if (index == noreg) { + if (Assembler::is_simm13(offset)) { + load_heap_oop(obj, offset, tmp); + } else { + set(offset, tmp); + load_heap_oop(obj, tmp, tmp); + } + } else { + load_heap_oop(obj, index, tmp); + } + // Previous value has been loaded into tmp + pre_val = tmp; + } + + assert(pre_val != noreg, "must have a real register"); + + // Is the previous value null? + cmp_and_brx_short(pre_val, G0, Assembler::equal, Assembler::pt, filtered); + + // OK, it's not filtered, so we'll need to call enqueue. In the normal + // case, pre_val will be a scratch G-reg, but there are some cases in + // which it's an O-reg. In the first case, do a normal call. In the + // latter, do a save here and call the frameless version. + + guarantee(pre_val->is_global() || pre_val->is_out(), + "Or we need to think harder."); + + if (pre_val->is_global() && !preserve_o_regs) { + generate_satb_log_enqueue_if_necessary(true); // with frame + + call(satb_log_enqueue_with_frame); + delayed()->mov(pre_val, O0); + } else { + generate_satb_log_enqueue_if_necessary(false); // frameless + + save_frame(0); + call(satb_log_enqueue_frameless); + delayed()->mov(pre_val->after_save(), O0); + restore(); + } + + bind(filtered); +} + +static address dirty_card_log_enqueue = 0; +static u_char* dirty_card_log_enqueue_end = 0; + +// This gets to assume that o0 contains the object address. +static void generate_dirty_card_log_enqueue(jbyte* byte_map_base) { + BufferBlob* bb = BufferBlob::create("dirty_card_enqueue", EnqueueCodeSize*2); + CodeBuffer buf(bb); + MacroAssembler masm(&buf); +#define __ masm. + address start = __ pc(); + + Label not_already_dirty, restart, refill; + +#ifdef _LP64 + __ srlx(O0, CardTableModRefBS::card_shift, O0); +#else + __ srl(O0, CardTableModRefBS::card_shift, O0); +#endif + AddressLiteral addrlit(byte_map_base); + __ set(addrlit, O1); // O1 := + __ ldub(O0, O1, O2); // O2 := [O0 + O1] + + assert(CardTableModRefBS::dirty_card_val() == 0, "otherwise check this code"); + __ cmp_and_br_short(O2, G0, Assembler::notEqual, Assembler::pt, not_already_dirty); + + // We didn't take the branch, so we're already dirty: return. + // Use return-from-leaf + __ retl(); + __ delayed()->nop(); + + // Not dirty. + __ bind(not_already_dirty); + + // Get O0 + O1 into a reg by itself + __ add(O0, O1, O3); + + // First, dirty it. + __ stb(G0, O3, G0); // [cardPtr] := 0 (i.e., dirty). + + int dirty_card_q_index_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index()); + int dirty_card_q_buf_byte_offset = + in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf()); + __ bind(restart); + + // Load the index into the update buffer. PtrQueue::_index is + // a size_t so ld_ptr is appropriate here. + __ ld_ptr(G2_thread, dirty_card_q_index_byte_offset, L0); + + // index == 0? + __ cmp_and_brx_short(L0, G0, Assembler::equal, Assembler::pn, refill); + + __ ld_ptr(G2_thread, dirty_card_q_buf_byte_offset, L1); + __ sub(L0, oopSize, L0); + + __ st_ptr(O3, L1, L0); // [_buf + index] := I0 + // Use return-from-leaf + __ retl(); + __ delayed()->st_ptr(L0, G2_thread, dirty_card_q_index_byte_offset); + + __ bind(refill); + address handle_zero = + CAST_FROM_FN_PTR(address, + &DirtyCardQueueSet::handle_zero_index_for_thread); + // This should be rare enough that we can afford to save all the + // scratch registers that the calling context might be using. + __ mov(G1_scratch, L3); + __ mov(G3_scratch, L5); + // We need the value of O3 above (for the write into the buffer), so we + // save and restore it. + __ mov(O3, L6); + // Since the call will overwrite O7, we save and restore that, as well. + __ mov(O7, L4); + + __ call_VM_leaf(L7_thread_cache, handle_zero, G2_thread); + __ mov(L3, G1_scratch); + __ mov(L5, G3_scratch); + __ mov(L6, O3); + __ br(Assembler::always, /*annul*/false, Assembler::pt, restart); + __ delayed()->mov(L4, O7); + + dirty_card_log_enqueue = start; + dirty_card_log_enqueue_end = __ pc(); + // XXX Should have a guarantee here about not going off the end! + // Does it already do so? Do an experiment... + +#undef __ + +} + +static inline void +generate_dirty_card_log_enqueue_if_necessary(jbyte* byte_map_base) { + if (dirty_card_log_enqueue == 0) { + generate_dirty_card_log_enqueue(byte_map_base); + assert(dirty_card_log_enqueue != 0, "postcondition."); + if (G1SATBPrintStubs) { + tty->print_cr("Generated dirty_card enqueue:"); + Disassembler::decode((u_char*)dirty_card_log_enqueue, + dirty_card_log_enqueue_end, + tty); + } + } +} + + +void MacroAssembler::g1_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + + Label filtered; + MacroAssembler* post_filter_masm = this; + + if (new_val == G0) return; + + G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::G1SATBCT || + bs->kind() == BarrierSet::G1SATBCTLogging, "wrong barrier"); + + if (G1RSBarrierRegionFilter) { + xor3(store_addr, new_val, tmp); +#ifdef _LP64 + srlx(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#else + srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp); +#endif + + // XXX Should I predict this taken or not? Does it matter? + cmp_and_brx_short(tmp, G0, Assembler::equal, Assembler::pt, filtered); + } + + // If the "store_addr" register is an "in" or "local" register, move it to + // a scratch reg so we can pass it as an argument. + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); + // Pick a scratch register different from "tmp". + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); + // Make sure we use up the delay slot! + if (use_scr) { + post_filter_masm->mov(store_addr, scr); + } else { + post_filter_masm->nop(); + } + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); + save_frame(0); + call(dirty_card_log_enqueue); + if (use_scr) { + delayed()->mov(scr, O0); + } else { + delayed()->mov(store_addr->after_save(), O0); + } + restore(); + + bind(filtered); +} + +#endif // SERIALGC +/////////////////////////////////////////////////////////////////////////////////// + +void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_val, Register tmp) { + // If we're writing constant NULL, we can skip the write barrier. + if (new_val == G0) return; + CardTableModRefBS* bs = (CardTableModRefBS*) Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef || + bs->kind() == BarrierSet::CardTableExtension, "wrong barrier"); + card_table_write(bs->byte_map_base, tmp, store_addr); +} + +void MacroAssembler::load_klass(Register src_oop, Register klass) { + // The number of bytes in this code is used by + // MachCallDynamicJavaNode::ret_addr_offset() + // if this changes, change that. + if (UseCompressedKlassPointers) { + lduw(src_oop, oopDesc::klass_offset_in_bytes(), klass); + decode_klass_not_null(klass); + } else { + ld_ptr(src_oop, oopDesc::klass_offset_in_bytes(), klass); + } +} + +void MacroAssembler::store_klass(Register klass, Register dst_oop) { + if (UseCompressedKlassPointers) { + assert(dst_oop != klass, "not enough registers"); + encode_klass_not_null(klass); + st(klass, dst_oop, oopDesc::klass_offset_in_bytes()); + } else { + st_ptr(klass, dst_oop, oopDesc::klass_offset_in_bytes()); + } +} + +void MacroAssembler::store_klass_gap(Register s, Register d) { + if (UseCompressedKlassPointers) { + assert(s != d, "not enough registers"); + st(s, d, oopDesc::klass_gap_offset_in_bytes()); + } +} + +void MacroAssembler::load_heap_oop(const Address& s, Register d) { + if (UseCompressedOops) { + lduw(s, d); + decode_heap_oop(d); + } else { + ld_ptr(s, d); + } +} + +void MacroAssembler::load_heap_oop(Register s1, Register s2, Register d) { + if (UseCompressedOops) { + lduw(s1, s2, d); + decode_heap_oop(d, d); + } else { + ld_ptr(s1, s2, d); + } +} + +void MacroAssembler::load_heap_oop(Register s1, int simm13a, Register d) { + if (UseCompressedOops) { + lduw(s1, simm13a, d); + decode_heap_oop(d, d); + } else { + ld_ptr(s1, simm13a, d); + } +} + +void MacroAssembler::load_heap_oop(Register s1, RegisterOrConstant s2, Register d) { + if (s2.is_constant()) load_heap_oop(s1, s2.as_constant(), d); + else load_heap_oop(s1, s2.as_register(), d); +} + +void MacroAssembler::store_heap_oop(Register d, Register s1, Register s2) { + if (UseCompressedOops) { + assert(s1 != d && s2 != d, "not enough registers"); + encode_heap_oop(d); + st(d, s1, s2); + } else { + st_ptr(d, s1, s2); + } +} + +void MacroAssembler::store_heap_oop(Register d, Register s1, int simm13a) { + if (UseCompressedOops) { + assert(s1 != d, "not enough registers"); + encode_heap_oop(d); + st(d, s1, simm13a); + } else { + st_ptr(d, s1, simm13a); + } +} + +void MacroAssembler::store_heap_oop(Register d, const Address& a, int offset) { + if (UseCompressedOops) { + assert(a.base() != d, "not enough registers"); + encode_heap_oop(d); + st(d, a, offset); + } else { + st_ptr(d, a, offset); + } +} + + +void MacroAssembler::encode_heap_oop(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + srlx(src, LogMinObjAlignmentInBytes, dst); + return; + } + Label done; + if (src == dst) { + // optimize for frequent case src == dst + bpr(rc_nz, true, Assembler::pt, src, done); + delayed() -> sub(src, G6_heapbase, dst); // annuled if not taken + bind(done); + srlx(src, LogMinObjAlignmentInBytes, dst); + } else { + bpr(rc_z, false, Assembler::pn, src, done); + delayed() -> mov(G0, dst); + // could be moved before branch, and annulate delay, + // but may add some unneeded work decoding null + sub(src, G6_heapbase, dst); + srlx(dst, LogMinObjAlignmentInBytes, dst); + bind(done); + } +} + + +void MacroAssembler::encode_heap_oop_not_null(Register r) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + verify_oop(r); + if (Universe::narrow_oop_base() != NULL) + sub(r, G6_heapbase, r); + srlx(r, LogMinObjAlignmentInBytes, r); +} + +void MacroAssembler::encode_heap_oop_not_null(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + verify_oop(src); + if (Universe::narrow_oop_base() == NULL) { + srlx(src, LogMinObjAlignmentInBytes, dst); + } else { + sub(src, G6_heapbase, dst); + srlx(dst, LogMinObjAlignmentInBytes, dst); + } +} + +// Same algorithm as oops.inline.hpp decode_heap_oop. +void MacroAssembler::decode_heap_oop(Register src, Register dst) { + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + sllx(src, LogMinObjAlignmentInBytes, dst); + if (Universe::narrow_oop_base() != NULL) { + Label done; + bpr(rc_nz, true, Assembler::pt, dst, done); + delayed() -> add(dst, G6_heapbase, dst); // annuled if not taken + bind(done); + } + verify_oop(dst); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + assert (UseCompressedOops, "must be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + sllx(r, LogMinObjAlignmentInBytes, r); + if (Universe::narrow_oop_base() != NULL) + add(r, G6_heapbase, r); +} + +void MacroAssembler::decode_heap_oop_not_null(Register src, Register dst) { + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + assert (UseCompressedOops, "must be compressed"); + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + sllx(src, LogMinObjAlignmentInBytes, dst); + if (Universe::narrow_oop_base() != NULL) + add(dst, G6_heapbase, dst); +} + +void MacroAssembler::encode_klass_not_null(Register r) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + assert (UseCompressedKlassPointers, "must be compressed"); + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + if (Universe::narrow_klass_base() != NULL) + sub(r, G6_heapbase, r); + srlx(r, LogKlassAlignmentInBytes, r); +} + +void MacroAssembler::encode_klass_not_null(Register src, Register dst) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + assert (UseCompressedKlassPointers, "must be compressed"); + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + if (Universe::narrow_klass_base() == NULL) { + srlx(src, LogKlassAlignmentInBytes, dst); + } else { + sub(src, G6_heapbase, dst); + srlx(dst, LogKlassAlignmentInBytes, dst); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + assert (UseCompressedKlassPointers, "must be compressed"); + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + sllx(r, LogKlassAlignmentInBytes, r); + if (Universe::narrow_klass_base() != NULL) + add(r, G6_heapbase, r); +} + +void MacroAssembler::decode_klass_not_null(Register src, Register dst) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + // Do not add assert code to this unless you change vtableStubs_sparc.cpp + // pd_code_size_limit. + assert (UseCompressedKlassPointers, "must be compressed"); + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + sllx(src, LogKlassAlignmentInBytes, dst); + if (Universe::narrow_klass_base() != NULL) + add(dst, G6_heapbase, dst); +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedKlassPointers) { + AddressLiteral base(Universe::narrow_ptrs_base_addr()); + load_ptr_contents(base, G6_heapbase); + } +} + +// Compare char[] arrays aligned to 4 bytes. +void MacroAssembler::char_arrays_equals(Register ary1, Register ary2, + Register limit, Register result, + Register chr1, Register chr2, Label& Ldone) { + Label Lvector, Lloop; + assert(chr1 == result, "should be the same"); + + // Note: limit contains number of bytes (2*char_elements) != 0. + andcc(limit, 0x2, chr1); // trailing character ? + br(Assembler::zero, false, Assembler::pt, Lvector); + delayed()->nop(); + + // compare the trailing char + sub(limit, sizeof(jchar), limit); + lduh(ary1, limit, chr1); + lduh(ary2, limit, chr2); + cmp(chr1, chr2); + br(Assembler::notEqual, true, Assembler::pt, Ldone); + delayed()->mov(G0, result); // not equal + + // only one char ? + cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn); + delayed()->add(G0, 1, result); // zero-length arrays are equal + + // word by word compare, dont't need alignment check + bind(Lvector); + // Shift ary1 and ary2 to the end of the arrays, negate limit + add(ary1, limit, ary1); + add(ary2, limit, ary2); + neg(limit, limit); + + lduw(ary1, limit, chr1); + bind(Lloop); + lduw(ary2, limit, chr2); + cmp(chr1, chr2); + br(Assembler::notEqual, true, Assembler::pt, Ldone); + delayed()->mov(G0, result); // not equal + inccc(limit, 2*sizeof(jchar)); + // annul LDUW if branch is not taken to prevent access past end of array + br(Assembler::notZero, true, Assembler::pt, Lloop); + delayed()->lduw(ary1, limit, chr1); // hoisted + + // Caller should set it: + // add(G0, 1, result); // equals +} + +// Use BIS for zeroing (count is in bytes). +void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) { + assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing"); + Register end = count; + int cache_line_size = VM_Version::prefetch_data_size(); + // Minimum count when BIS zeroing can be used since + // it needs membar which is expensive. + int block_zero_size = MAX2(cache_line_size*3, (int)BlockZeroingLowLimit); + + Label small_loop; + // Check if count is negative (dead code) or zero. + // Note, count uses 64bit in 64 bit VM. + cmp_and_brx_short(count, 0, Assembler::lessEqual, Assembler::pn, Ldone); + + // Use BIS zeroing only for big arrays since it requires membar. + if (Assembler::is_simm13(block_zero_size)) { // < 4096 + cmp(count, block_zero_size); + } else { + set(block_zero_size, temp); + cmp(count, temp); + } + br(Assembler::lessUnsigned, false, Assembler::pt, small_loop); + delayed()->add(to, count, end); + + // Note: size is >= three (32 bytes) cache lines. + + // Clean the beginning of space up to next cache line. + for (int offs = 0; offs < cache_line_size; offs += 8) { + stx(G0, to, offs); + } + + // align to next cache line + add(to, cache_line_size, to); + and3(to, -cache_line_size, to); + + // Note: size left >= two (32 bytes) cache lines. + + // BIS should not be used to zero tail (64 bytes) + // to avoid zeroing a header of the following object. + sub(end, (cache_line_size*2)-8, end); + + Label bis_loop; + bind(bis_loop); + stxa(G0, to, G0, Assembler::ASI_ST_BLKINIT_PRIMARY); + add(to, cache_line_size, to); + cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, bis_loop); + + // BIS needs membar. + membar(Assembler::StoreLoad); + + add(end, (cache_line_size*2)-8, end); // restore end + cmp_and_brx_short(to, end, Assembler::greaterEqualUnsigned, Assembler::pn, Ldone); + + // Clean the tail. + bind(small_loop); + stx(G0, to, 0); + add(to, 8, to); + cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); + nop(); // Separate short branches +} diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp new file mode 100644 index 00000000000..1e1e6de9e47 --- /dev/null +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp @@ -0,0 +1,1504 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP +#define CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP + +#include "asm/assembler.hpp" + +// promises that the system will not use traps 16-31 +#define ST_RESERVED_FOR_USER_0 0x10 + +class BiasedLockingCounters; + + +// Register aliases for parts of the system: + +// 64 bit values can be kept in g1-g5, o1-o5 and o7 and all 64 bits are safe +// across context switches in V8+ ABI. Of course, there are no 64 bit regs +// in V8 ABI. All 64 bits are preserved in V9 ABI for all registers. + +// g2-g4 are scratch registers called "application globals". Their +// meaning is reserved to the "compilation system"--which means us! +// They are are not supposed to be touched by ordinary C code, although +// highly-optimized C code might steal them for temps. They are safe +// across thread switches, and the ABI requires that they be safe +// across function calls. +// +// g1 and g3 are touched by more modules. V8 allows g1 to be clobbered +// across func calls, and V8+ also allows g5 to be clobbered across +// func calls. Also, g1 and g5 can get touched while doing shared +// library loading. +// +// We must not touch g7 (it is the thread-self register) and g6 is +// reserved for certain tools. g0, of course, is always zero. +// +// (Sources: SunSoft Compilers Group, thread library engineers.) + +// %%%% The interpreter should be revisited to reduce global scratch regs. + +// This global always holds the current JavaThread pointer: + +REGISTER_DECLARATION(Register, G2_thread , G2); +REGISTER_DECLARATION(Register, G6_heapbase , G6); + +// The following globals are part of the Java calling convention: + +REGISTER_DECLARATION(Register, G5_method , G5); +REGISTER_DECLARATION(Register, G5_megamorphic_method , G5_method); +REGISTER_DECLARATION(Register, G5_inline_cache_reg , G5_method); + +// The following globals are used for the new C1 & interpreter calling convention: +REGISTER_DECLARATION(Register, Gargs , G4); // pointing to the last argument + +// This local is used to preserve G2_thread in the interpreter and in stubs: +REGISTER_DECLARATION(Register, L7_thread_cache , L7); + +// These globals are used as scratch registers in the interpreter: + +REGISTER_DECLARATION(Register, Gframe_size , G1); // SAME REG as G1_scratch +REGISTER_DECLARATION(Register, G1_scratch , G1); // also SAME +REGISTER_DECLARATION(Register, G3_scratch , G3); +REGISTER_DECLARATION(Register, G4_scratch , G4); + +// These globals are used as short-lived scratch registers in the compiler: + +REGISTER_DECLARATION(Register, Gtemp , G5); + +// JSR 292 fixed register usages: +REGISTER_DECLARATION(Register, G5_method_type , G5); +REGISTER_DECLARATION(Register, G3_method_handle , G3); +REGISTER_DECLARATION(Register, L7_mh_SP_save , L7); + +// The compiler requires that G5_megamorphic_method is G5_inline_cache_klass, +// because a single patchable "set" instruction (NativeMovConstReg, +// or NativeMovConstPatching for compiler1) instruction +// serves to set up either quantity, depending on whether the compiled +// call site is an inline cache or is megamorphic. See the function +// CompiledIC::set_to_megamorphic. +// +// If a inline cache targets an interpreted method, then the +// G5 register will be used twice during the call. First, +// the call site will be patched to load a compiledICHolder +// into G5. (This is an ordered pair of ic_klass, method.) +// The c2i adapter will first check the ic_klass, then load +// G5_method with the method part of the pair just before +// jumping into the interpreter. +// +// Note that G5_method is only the method-self for the interpreter, +// and is logically unrelated to G5_megamorphic_method. +// +// Invariants on G2_thread (the JavaThread pointer): +// - it should not be used for any other purpose anywhere +// - it must be re-initialized by StubRoutines::call_stub() +// - it must be preserved around every use of call_VM + +// We can consider using g2/g3/g4 to cache more values than the +// JavaThread, such as the card-marking base or perhaps pointers into +// Eden. It's something of a waste to use them as scratch temporaries, +// since they are not supposed to be volatile. (Of course, if we find +// that Java doesn't benefit from application globals, then we can just +// use them as ordinary temporaries.) +// +// Since g1 and g5 (and/or g6) are the volatile (caller-save) registers, +// it makes sense to use them routinely for procedure linkage, +// whenever the On registers are not applicable. Examples: G5_method, +// G5_inline_cache_klass, and a double handful of miscellaneous compiler +// stubs. This means that compiler stubs, etc., should be kept to a +// maximum of two or three G-register arguments. + + +// stub frames + +REGISTER_DECLARATION(Register, Lentry_args , L0); // pointer to args passed to callee (interpreter) not stub itself + +// Interpreter frames + +#ifdef CC_INTERP +REGISTER_DECLARATION(Register, Lstate , L0); // interpreter state object pointer +REGISTER_DECLARATION(Register, L1_scratch , L1); // scratch +REGISTER_DECLARATION(Register, Lmirror , L1); // mirror (for native methods only) +REGISTER_DECLARATION(Register, L2_scratch , L2); +REGISTER_DECLARATION(Register, L3_scratch , L3); +REGISTER_DECLARATION(Register, L4_scratch , L4); +REGISTER_DECLARATION(Register, Lscratch , L5); // C1 uses +REGISTER_DECLARATION(Register, Lscratch2 , L6); // C1 uses +REGISTER_DECLARATION(Register, L7_scratch , L7); // constant pool cache +REGISTER_DECLARATION(Register, O5_savedSP , O5); +REGISTER_DECLARATION(Register, I5_savedSP , I5); // Saved SP before bumping for locals. This is simply + // a copy SP, so in 64-bit it's a biased value. The bias + // is added and removed as needed in the frame code. +// Interface to signature handler +REGISTER_DECLARATION(Register, Llocals , L7); // pointer to locals for signature handler +REGISTER_DECLARATION(Register, Lmethod , L6); // Method* when calling signature handler + +#else +REGISTER_DECLARATION(Register, Lesp , L0); // expression stack pointer +REGISTER_DECLARATION(Register, Lbcp , L1); // pointer to next bytecode +REGISTER_DECLARATION(Register, Lmethod , L2); +REGISTER_DECLARATION(Register, Llocals , L3); +REGISTER_DECLARATION(Register, Largs , L3); // pointer to locals for signature handler + // must match Llocals in asm interpreter +REGISTER_DECLARATION(Register, Lmonitors , L4); +REGISTER_DECLARATION(Register, Lbyte_code , L5); +// When calling out from the interpreter we record SP so that we can remove any extra stack +// space allocated during adapter transitions. This register is only live from the point +// of the call until we return. +REGISTER_DECLARATION(Register, Llast_SP , L5); +REGISTER_DECLARATION(Register, Lscratch , L5); +REGISTER_DECLARATION(Register, Lscratch2 , L6); +REGISTER_DECLARATION(Register, LcpoolCache , L6); // constant pool cache + +REGISTER_DECLARATION(Register, O5_savedSP , O5); +REGISTER_DECLARATION(Register, I5_savedSP , I5); // Saved SP before bumping for locals. This is simply + // a copy SP, so in 64-bit it's a biased value. The bias + // is added and removed as needed in the frame code. +REGISTER_DECLARATION(Register, IdispatchTables , I4); // Base address of the bytecode dispatch tables +REGISTER_DECLARATION(Register, IdispatchAddress , I3); // Register which saves the dispatch address for each bytecode +REGISTER_DECLARATION(Register, ImethodDataPtr , I2); // Pointer to the current method data +#endif /* CC_INTERP */ + +// NOTE: Lscratch2 and LcpoolCache point to the same registers in +// the interpreter code. If Lscratch2 needs to be used for some +// purpose than LcpoolCache should be restore after that for +// the interpreter to work right +// (These assignments must be compatible with L7_thread_cache; see above.) + +// Since Lbcp points into the middle of the method object, +// it is temporarily converted into a "bcx" during GC. + +// Exception processing +// These registers are passed into exception handlers. +// All exception handlers require the exception object being thrown. +// In addition, an nmethod's exception handler must be passed +// the address of the call site within the nmethod, to allow +// proper selection of the applicable catch block. +// (Interpreter frames use their own bcp() for this purpose.) +// +// The Oissuing_pc value is not always needed. When jumping to a +// handler that is known to be interpreted, the Oissuing_pc value can be +// omitted. An actual catch block in compiled code receives (from its +// nmethod's exception handler) the thrown exception in the Oexception, +// but it doesn't need the Oissuing_pc. +// +// If an exception handler (either interpreted or compiled) +// discovers there is no applicable catch block, it updates +// the Oissuing_pc to the continuation PC of its own caller, +// pops back to that caller's stack frame, and executes that +// caller's exception handler. Obviously, this process will +// iterate until the control stack is popped back to a method +// containing an applicable catch block. A key invariant is +// that the Oissuing_pc value is always a value local to +// the method whose exception handler is currently executing. +// +// Note: The issuing PC value is __not__ a raw return address (I7 value). +// It is a "return pc", the address __following__ the call. +// Raw return addresses are converted to issuing PCs by frame::pc(), +// or by stubs. Issuing PCs can be used directly with PC range tables. +// +REGISTER_DECLARATION(Register, Oexception , O0); // exception being thrown +REGISTER_DECLARATION(Register, Oissuing_pc , O1); // where the exception is coming from + + +// These must occur after the declarations above +#ifndef DONT_USE_REGISTER_DEFINES + +#define Gthread AS_REGISTER(Register, Gthread) +#define Gmethod AS_REGISTER(Register, Gmethod) +#define Gmegamorphic_method AS_REGISTER(Register, Gmegamorphic_method) +#define Ginline_cache_reg AS_REGISTER(Register, Ginline_cache_reg) +#define Gargs AS_REGISTER(Register, Gargs) +#define Lthread_cache AS_REGISTER(Register, Lthread_cache) +#define Gframe_size AS_REGISTER(Register, Gframe_size) +#define Gtemp AS_REGISTER(Register, Gtemp) + +#ifdef CC_INTERP +#define Lstate AS_REGISTER(Register, Lstate) +#define Lesp AS_REGISTER(Register, Lesp) +#define L1_scratch AS_REGISTER(Register, L1_scratch) +#define Lmirror AS_REGISTER(Register, Lmirror) +#define L2_scratch AS_REGISTER(Register, L2_scratch) +#define L3_scratch AS_REGISTER(Register, L3_scratch) +#define L4_scratch AS_REGISTER(Register, L4_scratch) +#define Lscratch AS_REGISTER(Register, Lscratch) +#define Lscratch2 AS_REGISTER(Register, Lscratch2) +#define L7_scratch AS_REGISTER(Register, L7_scratch) +#define Ostate AS_REGISTER(Register, Ostate) +#else +#define Lesp AS_REGISTER(Register, Lesp) +#define Lbcp AS_REGISTER(Register, Lbcp) +#define Lmethod AS_REGISTER(Register, Lmethod) +#define Llocals AS_REGISTER(Register, Llocals) +#define Lmonitors AS_REGISTER(Register, Lmonitors) +#define Lbyte_code AS_REGISTER(Register, Lbyte_code) +#define Lscratch AS_REGISTER(Register, Lscratch) +#define Lscratch2 AS_REGISTER(Register, Lscratch2) +#define LcpoolCache AS_REGISTER(Register, LcpoolCache) +#endif /* ! CC_INTERP */ + +#define Lentry_args AS_REGISTER(Register, Lentry_args) +#define I5_savedSP AS_REGISTER(Register, I5_savedSP) +#define O5_savedSP AS_REGISTER(Register, O5_savedSP) +#define IdispatchAddress AS_REGISTER(Register, IdispatchAddress) +#define ImethodDataPtr AS_REGISTER(Register, ImethodDataPtr) +#define IdispatchTables AS_REGISTER(Register, IdispatchTables) + +#define Oexception AS_REGISTER(Register, Oexception) +#define Oissuing_pc AS_REGISTER(Register, Oissuing_pc) + +#endif + + +// Address is an abstraction used to represent a memory location. +// +// Note: A register location is represented via a Register, not +// via an address for efficiency & simplicity reasons. + +class Address VALUE_OBJ_CLASS_SPEC { + private: + Register _base; // Base register. + RegisterOrConstant _index_or_disp; // Index register or constant displacement. + RelocationHolder _rspec; + + public: + Address() : _base(noreg), _index_or_disp(noreg) {} + + Address(Register base, RegisterOrConstant index_or_disp) + : _base(base), + _index_or_disp(index_or_disp) { + } + + Address(Register base, Register index) + : _base(base), + _index_or_disp(index) { + } + + Address(Register base, int disp) + : _base(base), + _index_or_disp(disp) { + } + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. + Address(Register base, ByteSize disp) + : _base(base), + _index_or_disp(in_bytes(disp)) { + } +#endif + + // accessors + Register base() const { return _base; } + Register index() const { return _index_or_disp.as_register(); } + int disp() const { return _index_or_disp.as_constant(); } + + bool has_index() const { return _index_or_disp.is_register(); } + bool has_disp() const { return _index_or_disp.is_constant(); } + + bool uses(Register reg) const { return base() == reg || (has_index() && index() == reg); } + + const relocInfo::relocType rtype() { return _rspec.type(); } + const RelocationHolder& rspec() { return _rspec; } + + RelocationHolder rspec(int offset) const { + return offset == 0 ? _rspec : _rspec.plus(offset); + } + + inline bool is_simm13(int offset = 0); // check disp+offset for overflow + + Address plus_disp(int plusdisp) const { // bump disp by a small amount + assert(_index_or_disp.is_constant(), "must have a displacement"); + Address a(base(), disp() + plusdisp); + return a; + } + bool is_same_address(Address a) const { + // disregard _rspec + return base() == a.base() && (has_index() ? index() == a.index() : disp() == a.disp()); + } + + Address after_save() const { + Address a = (*this); + a._base = a._base->after_save(); + return a; + } + + Address after_restore() const { + Address a = (*this); + a._base = a._base->after_restore(); + return a; + } + + // Convert the raw encoding form into the form expected by the + // constructor for Address. + static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc); + + friend class Assembler; +}; + + +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + private: + address _address; + RelocationHolder _rspec; + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); +#ifdef _LP64 + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); +#endif + case relocInfo::none: + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + + protected: + // creation + AddressLiteral() : _address(NULL), _rspec(NULL) {} + + public: + AddressLiteral(address addr, RelocationHolder const& rspec) + : _address(addr), + _rspec(rspec) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(jobject obj, RelocationHolder const& rspec) + : _address((address) obj), + _rspec(rspec) {} + + AddressLiteral(intptr_t value, RelocationHolder const& rspec) + : _address((address) value), + _rspec(rspec) {} + + AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + // Some constructors to avoid casting at the call site. + AddressLiteral(address* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(const bool* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(signed char* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(int* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(intptr_t addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + +#ifdef _LP64 + // 32-bit complains about a multiple declaration for int*. + AddressLiteral(intptr_t* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} +#endif + + AddressLiteral(Metadata* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(Metadata** addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(float* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(double* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + intptr_t value() const { return (intptr_t) _address; } + int low10() const; + + const relocInfo::relocType rtype() const { return _rspec.type(); } + const RelocationHolder& rspec() const { return _rspec; } + + RelocationHolder rspec(int offset) const { + return offset == 0 ? _rspec : _rspec.plus(offset); + } +}; + +// Convenience classes +class ExternalAddress: public AddressLiteral { + private: + static relocInfo::relocType reloc_for_target(address target) { + // Sometimes ExternalAddress is used for values which aren't + // exactly addresses, like the card table base. + // external_word_type can't be used for values in the first page + // so just skip the reloc in that case. + return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; + } + + public: + ExternalAddress(address target) : AddressLiteral(target, reloc_for_target( target)) {} + ExternalAddress(Metadata** target) : AddressLiteral(target, reloc_for_target((address) target)) {} +}; + +inline Address RegisterImpl::address_in_saved_window() const { + return (Address(SP, (sp_offset_in_saved_window() * wordSize) + STACK_BIAS)); +} + + + +// Argument is an abstraction used to represent an outgoing +// actual argument or an incoming formal parameter, whether +// it resides in memory or in a register, in a manner consistent +// with the SPARC Application Binary Interface, or ABI. This is +// often referred to as the native or C calling convention. + +class Argument VALUE_OBJ_CLASS_SPEC { + private: + int _number; + bool _is_in; + + public: +#ifdef _LP64 + enum { + n_register_parameters = 6, // only 6 registers may contain integer parameters + n_float_register_parameters = 16 // Can have up to 16 floating registers + }; +#else + enum { + n_register_parameters = 6 // only 6 registers may contain integer parameters + }; +#endif + + // creation + Argument(int number, bool is_in) : _number(number), _is_in(is_in) {} + + int number() const { return _number; } + bool is_in() const { return _is_in; } + bool is_out() const { return !is_in(); } + + Argument successor() const { return Argument(number() + 1, is_in()); } + Argument as_in() const { return Argument(number(), true ); } + Argument as_out() const { return Argument(number(), false); } + + // locating register-based arguments: + bool is_register() const { return _number < n_register_parameters; } + +#ifdef _LP64 + // locating Floating Point register-based arguments: + bool is_float_register() const { return _number < n_float_register_parameters; } + + FloatRegister as_float_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister(( number() *2 ) + 1); + } + FloatRegister as_double_register() const { + assert(is_float_register(), "must be a register argument"); + return as_FloatRegister(( number() *2 )); + } +#endif + + Register as_register() const { + assert(is_register(), "must be a register argument"); + return is_in() ? as_iRegister(number()) : as_oRegister(number()); + } + + // locating memory-based arguments + Address as_address() const { + assert(!is_register(), "must be a memory argument"); + return address_in_frame(); + } + + // When applied to a register-based argument, give the corresponding address + // into the 6-word area "into which callee may store register arguments" + // (This is a different place than the corresponding register-save area location.) + Address address_in_frame() const; + + // debugging + const char* name() const; + + friend class Assembler; +}; + + +class RegistersForDebugging : public StackObj { + public: + intptr_t i[8], l[8], o[8], g[8]; + float f[32]; + double d[32]; + + void print(outputStream* s); + + static int i_offset(int j) { return offset_of(RegistersForDebugging, i[j]); } + static int l_offset(int j) { return offset_of(RegistersForDebugging, l[j]); } + static int o_offset(int j) { return offset_of(RegistersForDebugging, o[j]); } + static int g_offset(int j) { return offset_of(RegistersForDebugging, g[j]); } + static int f_offset(int j) { return offset_of(RegistersForDebugging, f[j]); } + static int d_offset(int j) { return offset_of(RegistersForDebugging, d[j / 2]); } + + // gen asm code to save regs + static void save_registers(MacroAssembler* a); + + // restore global registers in case C code disturbed them + static void restore_registers(MacroAssembler* a, Register r); +}; + + +// MacroAssembler extends Assembler by a few frequently used macros. +// +// Most of the standard SPARC synthetic ops are defined here. +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +#define JMP2(r1, r2) jmp(r1, r2, __FILE__, __LINE__) +#define JMP(r1, off) jmp(r1, off, __FILE__, __LINE__) +#define JUMP(a, temp, off) jump(a, temp, off, __FILE__, __LINE__) +#define JUMPL(a, temp, d, off) jumpl(a, temp, d, off, __FILE__, __LINE__) + + +class MacroAssembler : public Assembler { + // code patchers need various routines like inv_wdisp() + friend class NativeInstruction; + friend class NativeGeneralJump; + friend class Relocation; + friend class Label; + + protected: + static void print_instruction(int inst); + static int patched_branch(int dest_pos, int inst, int inst_pos); + static int branch_destination(int inst, int pos); + + // Support for VM calls + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). +#ifdef CC_INTERP + #define VIRTUAL +#else + #define VIRTUAL virtual +#endif + + VIRTUAL void call_VM_leaf_base(Register thread_cache, address entry_point, int number_of_arguments); + + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + // + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // A non-volatile java_thread_cache register should be specified so + // that the G2_thread value can be preserved across the call. + // (If java_thread_cache is noreg, then a slow get_thread call + // will re-initialize the G2_thread.) call_VM_base returns the register that contains the + // thread. + // + // If no last_java_sp is specified (noreg) than SP will be used instead. + + virtual void call_VM_base( + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread_cache, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after call + bool check_exception=true // flag which indicates if exception should be checked + ); + + // This routine should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles and ForceEarlyReturn PopFrame requests. + virtual void check_and_handle_popframe(Register scratch_reg); + virtual void check_and_handle_earlyret(Register scratch_reg); + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + // + // %%%%%% Currently not done for SPARC + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // support for delayed instructions + MacroAssembler* delayed() { Assembler::delayed(); return this; } + + // branches that use right instruction for v8 vs. v9 + inline void br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void br( Condition c, bool a, Predict p, Label& L ); + + inline void fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void fb( Condition c, bool a, Predict p, Label& L ); + + // compares register with zero (32 bit) and branches (V9 and V8 instructions) + void cmp_zero_and_br( Condition c, Register s1, Label& L, bool a = false, Predict p = pn ); + // Compares a pointer register with zero and branches on (not)null. + // Does a test & branch on 32-bit systems and a register-branch on 64-bit. + void br_null ( Register s1, bool a, Predict p, Label& L ); + void br_notnull( Register s1, bool a, Predict p, Label& L ); + + // + // Compare registers and branch with nop in delay slot or cbcond without delay slot. + // + // ATTENTION: use these instructions with caution because cbcond instruction + // has very short distance: 512 instructions (2Kbyte). + + // Compare integer (32 bit) values (icc only). + void cmp_and_br_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_br_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + // Platform depending version for pointer compare (icc on !LP64 and xcc on LP64). + void cmp_and_brx_short(Register s1, Register s2, Condition c, Predict p, Label& L); + void cmp_and_brx_short(Register s1, int simm13a, Condition c, Predict p, Label& L); + + // Short branch version for compares a pointer pwith zero. + void br_null_short ( Register s1, Predict p, Label& L ); + void br_notnull_short( Register s1, Predict p, Label& L ); + + // unconditional short branch + void ba_short(Label& L); + + inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void bp( Condition c, bool a, CC cc, Predict p, Label& L ); + + // Branch that tests xcc in LP64 and icc in !LP64 + inline void brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void brx( Condition c, bool a, Predict p, Label& L ); + + // unconditional branch + inline void ba( Label& L ); + + // Branch that tests fp condition codes + inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none ); + inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L ); + + // get PC the best way + inline int get_pc( Register d ); + + // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual) + inline void cmp( Register s1, Register s2 ) { subcc( s1, s2, G0 ); } + inline void cmp( Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); } + + inline void jmp( Register s1, Register s2 ); + inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); + + // Check if the call target is out of wdisp30 range (relative to the code cache) + static inline bool is_far_target(address d); + inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type ); + inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type ); + inline void callr( Register s1, Register s2 ); + inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() ); + + // Emits nothing on V8 + inline void iprefetch( address d, relocInfo::relocType rt = relocInfo::none ); + inline void iprefetch( Label& L); + + inline void tst( Register s ) { orcc( G0, s, G0 ); } + +#ifdef PRODUCT + inline void ret( bool trace = TraceJumps ) { if (trace) { + mov(I7, O7); // traceable register + JMP(O7, 2 * BytesPerInstWord); + } else { + jmpl( I7, 2 * BytesPerInstWord, G0 ); + } + } + + inline void retl( bool trace = TraceJumps ) { if (trace) JMP(O7, 2 * BytesPerInstWord); + else jmpl( O7, 2 * BytesPerInstWord, G0 ); } +#else + void ret( bool trace = TraceJumps ); + void retl( bool trace = TraceJumps ); +#endif /* PRODUCT */ + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target); +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch); +#endif + + // sethi Macro handles optimizations and relocations +private: + void internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable); +public: + void sethi(const AddressLiteral& addrlit, Register d); + void patchable_sethi(const AddressLiteral& addrlit, Register d); + + // compute the number of instructions for a sethi/set + static int insts_for_sethi( address a, bool worst_case = false ); + static int worst_case_insts_for_set(); + + // set may be either setsw or setuw (high 32 bits may be zero or sign) +private: + void internal_set(const AddressLiteral& al, Register d, bool ForceRelocatable); + static int insts_for_internal_set(intptr_t value); +public: + void set(const AddressLiteral& addrlit, Register d); + void set(intptr_t value, Register d); + void set(address addr, Register d, RelocationHolder const& rspec); + static int insts_for_set(intptr_t value) { return insts_for_internal_set(value); } + + void patchable_set(const AddressLiteral& addrlit, Register d); + void patchable_set(intptr_t value, Register d); + void set64(jlong value, Register d, Register tmp); + static int insts_for_set64(jlong value); + + // sign-extend 32 to 64 + inline void signx( Register s, Register d ) { sra( s, G0, d); } + inline void signx( Register d ) { sra( d, G0, d); } + + inline void not1( Register s, Register d ) { xnor( s, G0, d ); } + inline void not1( Register d ) { xnor( d, G0, d ); } + + inline void neg( Register s, Register d ) { sub( G0, s, d ); } + inline void neg( Register d ) { sub( G0, d, d ); } + + inline void cas( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY); } + inline void casx( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY); } + // Functions for isolating 64 bit atomic swaps for LP64 + // cas_ptr will perform cas for 32 bit VM's and casx for 64 bit VM's + inline void cas_ptr( Register s1, Register s2, Register d) { +#ifdef _LP64 + casx( s1, s2, d ); +#else + cas( s1, s2, d ); +#endif + } + + // Functions for isolating 64 bit shifts for LP64 + inline void sll_ptr( Register s1, Register s2, Register d ); + inline void sll_ptr( Register s1, int imm6a, Register d ); + inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d ); + inline void srl_ptr( Register s1, Register s2, Register d ); + inline void srl_ptr( Register s1, int imm6a, Register d ); + + // little-endian + inline void casl( Register s1, Register s2, Register d) { casa( s1, s2, d, ASI_PRIMARY_LITTLE); } + inline void casxl( Register s1, Register s2, Register d) { casxa(s1, s2, d, ASI_PRIMARY_LITTLE); } + + inline void inc( Register d, int const13 = 1 ) { add( d, const13, d); } + inline void inccc( Register d, int const13 = 1 ) { addcc( d, const13, d); } + + inline void dec( Register d, int const13 = 1 ) { sub( d, const13, d); } + inline void deccc( Register d, int const13 = 1 ) { subcc( d, const13, d); } + + using Assembler::add; + inline void add(Register s1, int simm13a, Register d, relocInfo::relocType rtype); + inline void add(Register s1, int simm13a, Register d, RelocationHolder const& rspec); + inline void add(Register s1, RegisterOrConstant s2, Register d, int offset = 0); + inline void add(const Address& a, Register d, int offset = 0); + + using Assembler::andn; + inline void andn( Register s1, RegisterOrConstant s2, Register d); + + inline void btst( Register s1, Register s2 ) { andcc( s1, s2, G0 ); } + inline void btst( int simm13a, Register s ) { andcc( s, simm13a, G0 ); } + + inline void bset( Register s1, Register s2 ) { or3( s1, s2, s2 ); } + inline void bset( int simm13a, Register s ) { or3( s, simm13a, s ); } + + inline void bclr( Register s1, Register s2 ) { andn( s1, s2, s2 ); } + inline void bclr( int simm13a, Register s ) { andn( s, simm13a, s ); } + + inline void btog( Register s1, Register s2 ) { xor3( s1, s2, s2 ); } + inline void btog( int simm13a, Register s ) { xor3( s, simm13a, s ); } + + inline void clr( Register d ) { or3( G0, G0, d ); } + + inline void clrb( Register s1, Register s2); + inline void clrh( Register s1, Register s2); + inline void clr( Register s1, Register s2); + inline void clrx( Register s1, Register s2); + + inline void clrb( Register s1, int simm13a); + inline void clrh( Register s1, int simm13a); + inline void clr( Register s1, int simm13a); + inline void clrx( Register s1, int simm13a); + + // copy & clear upper word + inline void clruw( Register s, Register d ) { srl( s, G0, d); } + // clear upper word + inline void clruwu( Register d ) { srl( d, G0, d); } + + using Assembler::ldsb; + using Assembler::ldsh; + using Assembler::ldsw; + using Assembler::ldub; + using Assembler::lduh; + using Assembler::lduw; + using Assembler::ldx; + using Assembler::ldd; + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. + inline void ld(Register s1, ByteSize simm13a, Register d); +#endif + + inline void ld(Register s1, Register s2, Register d); + inline void ld(Register s1, int simm13a, Register d); + + inline void ldsb(const Address& a, Register d, int offset = 0); + inline void ldsh(const Address& a, Register d, int offset = 0); + inline void ldsw(const Address& a, Register d, int offset = 0); + inline void ldub(const Address& a, Register d, int offset = 0); + inline void lduh(const Address& a, Register d, int offset = 0); + inline void lduw(const Address& a, Register d, int offset = 0); + inline void ldx( const Address& a, Register d, int offset = 0); + inline void ld( const Address& a, Register d, int offset = 0); + inline void ldd( const Address& a, Register d, int offset = 0); + + inline void ldub(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsb(Register s1, RegisterOrConstant s2, Register d ); + inline void lduh(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsh(Register s1, RegisterOrConstant s2, Register d ); + inline void lduw(Register s1, RegisterOrConstant s2, Register d ); + inline void ldsw(Register s1, RegisterOrConstant s2, Register d ); + inline void ldx( Register s1, RegisterOrConstant s2, Register d ); + inline void ld( Register s1, RegisterOrConstant s2, Register d ); + inline void ldd( Register s1, RegisterOrConstant s2, Register d ); + + using Assembler::ldf; + inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); + inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); + + // membar psuedo instruction. takes into account target memory model. + inline void membar( Assembler::Membar_mask_bits const7a ); + + // returns if membar generates anything. + inline bool membar_has_effect( Assembler::Membar_mask_bits const7a ); + + // mov pseudo instructions + inline void mov( Register s, Register d) { + if ( s != d ) or3( G0, s, d); + else assert_not_delayed(); // Put something useful in the delay slot! + } + + inline void mov_or_nop( Register s, Register d) { + if ( s != d ) or3( G0, s, d); + else nop(); + } + + inline void mov( int simm13a, Register d) { or3( G0, simm13a, d); } + + using Assembler::prefetch; + inline void prefetch(const Address& a, PrefetchFcn F, int offset = 0); + + using Assembler::stb; + using Assembler::sth; + using Assembler::stw; + using Assembler::stx; + using Assembler::std; + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. + inline void st(Register d, Register s1, ByteSize simm13a); +#endif + + inline void st(Register d, Register s1, Register s2); + inline void st(Register d, Register s1, int simm13a); + + inline void stb(Register d, const Address& a, int offset = 0 ); + inline void sth(Register d, const Address& a, int offset = 0 ); + inline void stw(Register d, const Address& a, int offset = 0 ); + inline void stx(Register d, const Address& a, int offset = 0 ); + inline void st( Register d, const Address& a, int offset = 0 ); + inline void std(Register d, const Address& a, int offset = 0 ); + + inline void stb(Register d, Register s1, RegisterOrConstant s2 ); + inline void sth(Register d, Register s1, RegisterOrConstant s2 ); + inline void stw(Register d, Register s1, RegisterOrConstant s2 ); + inline void stx(Register d, Register s1, RegisterOrConstant s2 ); + inline void std(Register d, Register s1, RegisterOrConstant s2 ); + inline void st( Register d, Register s1, RegisterOrConstant s2 ); + + using Assembler::stf; + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2); + inline void stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset = 0); + + // Note: offset is added to s2. + using Assembler::sub; + inline void sub(Register s1, RegisterOrConstant s2, Register d, int offset = 0); + + using Assembler::swap; + inline void swap(Address& a, Register d, int offset = 0); + + // address pseudos: make these names unlike instruction names to avoid confusion + inline intptr_t load_pc_address( Register reg, int bytes_to_skip ); + inline void load_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void load_bool_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset = 0); + inline void store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset = 0); + inline void jump_to(const AddressLiteral& addrlit, Register temp, int offset = 0); + inline void jump_indirect_to(Address& a, Register temp, int ld_offset = 0, int jmp_offset = 0); + + // ring buffer traceable jumps + + void jmp2( Register r1, Register r2, const char* file, int line ); + void jmp ( Register r1, int offset, const char* file, int line ); + + void jumpl(const AddressLiteral& addrlit, Register temp, Register d, int offset, const char* file, int line); + void jump (const AddressLiteral& addrlit, Register temp, int offset, const char* file, int line); + + + // argument pseudos: + + inline void load_argument( Argument& a, Register d ); + inline void store_argument( Register s, Argument& a ); + inline void store_ptr_argument( Register s, Argument& a ); + inline void store_float_argument( FloatRegister s, Argument& a ); + inline void store_double_argument( FloatRegister s, Argument& a ); + inline void store_long_argument( Register s, Argument& a ); + + // handy macros: + + inline void round_to( Register r, int modulus ) { + assert_not_delayed(); + inc( r, modulus - 1 ); + and3( r, -modulus, r ); + } + + // -------------------------------------------------- + + // Functions for isolating 64 bit loads for LP64 + // ld_ptr will perform ld for 32 bit VM's and ldx for 64 bit VM's + // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's + inline void ld_ptr(Register s1, Register s2, Register d); + inline void ld_ptr(Register s1, int simm13a, Register d); + inline void ld_ptr(Register s1, RegisterOrConstant s2, Register d); + inline void ld_ptr(const Address& a, Register d, int offset = 0); + inline void st_ptr(Register d, Register s1, Register s2); + inline void st_ptr(Register d, Register s1, int simm13a); + inline void st_ptr(Register d, Register s1, RegisterOrConstant s2); + inline void st_ptr(Register d, const Address& a, int offset = 0); + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. + inline void ld_ptr(Register s1, ByteSize simm13a, Register d); + inline void st_ptr(Register d, Register s1, ByteSize simm13a); +#endif + + // ld_long will perform ldd for 32 bit VM's and ldx for 64 bit VM's + // st_long will perform std for 32 bit VM's and stx for 64 bit VM's + inline void ld_long(Register s1, Register s2, Register d); + inline void ld_long(Register s1, int simm13a, Register d); + inline void ld_long(Register s1, RegisterOrConstant s2, Register d); + inline void ld_long(const Address& a, Register d, int offset = 0); + inline void st_long(Register d, Register s1, Register s2); + inline void st_long(Register d, Register s1, int simm13a); + inline void st_long(Register d, Register s1, RegisterOrConstant s2); + inline void st_long(Register d, const Address& a, int offset = 0); + + // Helpers for address formation. + // - They emit only a move if s2 is a constant zero. + // - If dest is a constant and either s1 or s2 is a register, the temp argument is required and becomes the result. + // - If dest is a register and either s1 or s2 is a non-simm13 constant, the temp argument is required and used to materialize the constant. + RegisterOrConstant regcon_andn_ptr(RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + RegisterOrConstant regcon_inc_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + RegisterOrConstant regcon_sll_ptr( RegisterOrConstant s1, RegisterOrConstant s2, RegisterOrConstant d, Register temp = noreg); + + RegisterOrConstant ensure_simm13_or_reg(RegisterOrConstant src, Register temp) { + if (is_simm13(src.constant_or_zero())) + return src; // register or short constant + guarantee(temp != noreg, "constant offset overflow"); + set(src.as_constant(), temp); + return temp; + } + + // -------------------------------------------------- + + public: + // traps as per trap.h (SPARC ABI?) + + void breakpoint_trap(); + void breakpoint_trap(Condition c, CC cc); + void flush_windows_trap(); + void clean_windows_trap(); + void get_psr_trap(); + void set_psr_trap(); + + // V8/V9 flush_windows + void flush_windows(); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp1, Register tmp2); + + // Stack frame creation/removal + void enter(); + void leave(); + + // V8/V9 integer multiply + void mult(Register s1, Register s2, Register d); + void mult(Register s1, int simm13a, Register d); + + // V8/V9 read and write of condition codes. + void read_ccr(Register d); + void write_ccr(Register s); + + // Manipulation of C++ bools + // These are idioms to flag the need for care with accessing bools but on + // this platform we assume byte size + + inline void stbool(Register d, const Address& a) { stb(d, a); } + inline void ldbool(const Address& a, Register d) { ldub(a, d); } + inline void movbool( bool boolconst, Register d) { mov( (int) boolconst, d); } + + // klass oop manipulations if compressed + void load_klass(Register src_oop, Register klass); + void store_klass(Register klass, Register dst_oop); + void store_klass_gap(Register s, Register dst_oop); + + // oop manipulations + void load_heap_oop(const Address& s, Register d); + void load_heap_oop(Register s1, Register s2, Register d); + void load_heap_oop(Register s1, int simm13a, Register d); + void load_heap_oop(Register s1, RegisterOrConstant s2, Register d); + void store_heap_oop(Register d, Register s1, Register s2); + void store_heap_oop(Register d, Register s1, int simm13a); + void store_heap_oop(Register d, const Address& a, int offset = 0); + + void encode_heap_oop(Register src, Register dst); + void encode_heap_oop(Register r) { + encode_heap_oop(r, r); + } + void decode_heap_oop(Register src, Register dst); + void decode_heap_oop(Register r) { + decode_heap_oop(r, r); + } + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register src, Register dst); + void decode_heap_oop_not_null(Register src, Register dst); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register src, Register dst); + void decode_klass_not_null(Register src, Register dst); + + // Support for managing the JavaThread pointer (i.e.; the reference to + // thread-local information). + void get_thread(); // load G2_thread + void verify_thread(); // verify G2_thread contents + void save_thread (const Register threache); // save to cache + void restore_thread(const Register thread_cache); // restore from cache + + // Support for last Java frame (but use call_VM instead where possible) + void set_last_Java_frame(Register last_java_sp, Register last_Java_pc); + void reset_last_Java_frame(void); + + // Call into the VM. + // Passes the thread pointer (in O0) as a prepended argument. + // Makes sure oop return values are visible to the GC. + void call_VM(Register oop_result, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + // these overloadings are not presently used on SPARC: + void call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + + void call_VM_leaf(Register thread_cache, address entry_point, int number_of_arguments = 0); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2); + void call_VM_leaf(Register thread_cache, address entry_point, Register arg_1, Register arg_2, Register arg_3); + + void get_vm_result (Register oop_result); + void get_vm_result_2(Register metadata_result); + + // vm result is currently getting hijacked to for oop preservation + void set_vm_result(Register oop_result); + + // Emit the CompiledIC call idiom + void ic_call(address entry, bool emit_delay = true); + + // if call_VM_base was called with check_exceptions=false, then call + // check_and_forward_exception to handle exceptions when it is safe + void check_and_forward_exception(Register scratch_reg); + + private: + // For V8 + void read_ccr_trap(Register ccr_save); + void write_ccr_trap(Register ccr_save1, Register scratch1, Register scratch2); + +#ifdef ASSERT + // For V8 debugging. Uses V8 instruction sequence and checks + // result with V9 insturctions rdccr and wrccr. + // Uses Gscatch and Gscatch2 + void read_ccr_v8_assert(Register ccr_save); + void write_ccr_v8_assert(Register ccr_save); +#endif // ASSERT + + public: + + // Write to card table for - register is destroyed afterwards. + void card_table_write(jbyte* byte_map_base, Register tmp, Register obj); + + void card_write_barrier_post(Register store_addr, Register new_val, Register tmp); + +#ifndef SERIALGC + // General G1 pre-barrier generator. + void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs); + + // General G1 post-barrier generator + void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp); +#endif // SERIALGC + + // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack + void push_fTOS(); + + // pops double TOS element from CPU stack and pushes on FPU stack + void pop_fTOS(); + + void empty_FPU_stack(); + + void push_IU_state(); + void pop_IU_state(); + + void push_FPU_state(); + void pop_FPU_state(); + + void push_CPU_state(); + void pop_CPU_state(); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + // Debugging + void _verify_oop(Register reg, const char * msg, const char * file, int line); + void _verify_oop_addr(Address addr, const char * msg, const char * file, int line); + + // TODO: verify_method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__) +#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr ", __FILE__, __LINE__) +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyOops + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + // only if +VerifyFPU + void stop(const char* msg); // prints msg, dumps registers and stops execution + void warn(const char* msg); // prints msg, but don't stop + void untested(const char* what = ""); + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } + void should_not_reach_here() { stop("should not reach here"); } + void print_CPU_state(); + + // oops in code + AddressLiteral allocate_oop_address(jobject obj); // allocate_index + AddressLiteral constant_oop_address(jobject obj); // find_index + inline void set_oop (jobject obj, Register d); // uses allocate_oop_address + inline void set_oop_constant (jobject obj, Register d); // uses constant_oop_address + inline void set_oop (const AddressLiteral& obj_addr, Register d); // same as load_address + + // metadata in code that we have to keep track of + AddressLiteral allocate_metadata_address(Metadata* obj); // allocate_index + AddressLiteral constant_metadata_address(Metadata* obj); // find_index + inline void set_metadata (Metadata* obj, Register d); // uses allocate_metadata_address + inline void set_metadata_constant (Metadata* obj, Register d); // uses constant_metadata_address + inline void set_metadata (const AddressLiteral& obj_addr, Register d); // same as load_address + + void set_narrow_oop( jobject obj, Register d ); + void set_narrow_klass( Klass* k, Register d ); + + // nop padding + void align(int modulus); + + // declare a safepoint + void safepoint(); + + // factor out part of stop into subroutine to save space + void stop_subroutine(); + // factor out part of verify_oop into subroutine to save space + void verify_oop_subroutine(); + + // side-door communication with signalHandler in os_solaris.cpp + static address _verify_oop_implicit_branch[3]; + + int total_frame_size_in_bytes(int extraWords); + + // used when extraWords known statically + void save_frame(int extraWords = 0); + void save_frame_c1(int size_in_bytes); + // make a frame, and simultaneously pass up one or two register value + // into the new register window + void save_frame_and_mov(int extraWords, Register s1, Register d1, Register s2 = Register(), Register d2 = Register()); + + // give no. (outgoing) params, calc # of words will need on frame + void calc_mem_param_words(Register Rparam_words, Register Rresult); + + // used to calculate frame size dynamically + // result is in bytes and must be negated for save inst + void calc_frame_size(Register extraWords, Register resultReg); + + // calc and also save + void calc_frame_size_and_save(Register extraWords, Register resultReg); + + static void debug(char* msg, RegistersForDebugging* outWindow); + + // implementations of bytecodes used by both interpreter and compiler + + void lcmp( Register Ra_hi, Register Ra_low, + Register Rb_hi, Register Rb_low, + Register Rresult); + + void lneg( Register Rhi, Register Rlow ); + + void lshl( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + + void lshr( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + + void lushr( Register Rin_high, Register Rin_low, Register Rcount, + Register Rout_high, Register Rout_low, Register Rtemp ); + +#ifdef _LP64 + void lcmp( Register Ra, Register Rb, Register Rresult); +#endif + + // Load and store values by size and signed-ness + void load_sized_value( Address src, Register dst, size_t size_in_bytes, bool is_signed); + void store_sized_value(Register src, Address dst, size_t size_in_bytes); + + void float_cmp( bool is_float, int unordered_result, + FloatRegister Fa, FloatRegister Fb, + Register Rresult); + + void fneg( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + void fneg( FloatRegisterImpl::Width w, FloatRegister sd ) { Assembler::fneg(w, sd); } + void fmov( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + void fabs( FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d); + + void save_all_globals_into_locals(); + void restore_globals_from_locals(); + + void casx_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, + address lock_addr=0, bool use_call_vm=false); + void cas_under_lock(Register top_ptr_reg, Register top_reg, Register ptr_reg, + address lock_addr=0, bool use_call_vm=false); + void casn (Register addr_reg, Register cmp_reg, Register set_reg) ; + + // These set the icc condition code to equal if the lock succeeded + // and notEqual if it failed and requires a slow case + void compiler_lock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + BiasedLockingCounters* counters = NULL, + bool try_bias = UseBiasedLocking); + void compiler_unlock_object(Register Roop, Register Rmark, Register Rbox, + Register Rscratch, + bool try_bias = UseBiasedLocking); + + // Biased locking support + // Upon entry, lock_reg must point to the lock record on the stack, + // obj_reg must contain the target object, and mark_reg must contain + // the target object's header. + // Destroys mark_reg if an attempt is made to bias an anonymously + // biased lock. In this case a failure will go either to the slow + // case or fall through with the notEqual condition code set with + // the expectation that the slow case in the runtime will be called. + // In the fall-through case where the CAS-based lock is done, + // mark_reg is not destroyed. + void biased_locking_enter(Register obj_reg, Register mark_reg, Register temp_reg, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + // Upon entry, the base register of mark_addr must contain the oop. + // Destroys temp_reg. + + // If allow_delay_slot_filling is set to true, the next instruction + // emitted after this one will go in an annulled delay slot if the + // biased locking exit case failed. + void biased_locking_exit(Address mark_addr, Register temp_reg, Label& done, bool allow_delay_slot_filling = false); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); + void incr_allocated_bytes(RegisterOrConstant size_in_bytes, + Register t1, Register t2); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register temp_reg, Register temp2_reg, + Label& no_such_interface); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg and temp2_reg. + // If super_check_offset is not -1, temp2_reg is not used and can be noreg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg can be noreg, if no temps are available. + // It can also be sub_klass or super_klass, meaning it's OK to kill that one. + // Updates the sub's secondary super cache as necessary. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Register temp3_reg, + Register temp4_reg, + Label* L_success, + Label* L_failure); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label& L_success); + + // method handles (JSR 292) + // offset relative to Gargs of argument at tos[arg_slot]. + // (arg_slot == 0 means the last argument, not the first). + RegisterOrConstant argument_offset(RegisterOrConstant arg_slot, + Register temp_reg, + int extra_slot_offset = 0); + // Address of Gargs and argument_offset. + Address argument_address(RegisterOrConstant arg_slot, + Register temp_reg = noreg, + int extra_slot_offset = 0); + + // Stack overflow checking + + // Note: this clobbers G3_scratch + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + set((-offset)+STACK_BIAS, G3_scratch); + st(G0, SP, G3_scratch); + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Clobbers tsp and scratch registers. + void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); + + void verify_tlab(); + + Condition negate_condition(Condition cond); + + // Helper functions for statistics gathering. + // Conditionally (non-atomically) increments passed counter address, preserving condition codes. + void cond_inc(Condition cond, address counter_addr, Register Rtemp1, Register Rtemp2); + // Unconditional increment. + void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2); + void inc_counter(int* counter_addr, Register Rtmp1, Register Rtmp2); + + // Compare char[] arrays aligned to 4 bytes. + void char_arrays_equals(Register ary1, Register ary2, + Register limit, Register result, + Register chr1, Register chr2, Label& Ldone); + // Use BIS for zeroing + void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); + +#undef VIRTUAL +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual : public StackObj { + private: + MacroAssembler* _masm; + Label _label; + + public: + // 'temp' is a temp register that this object can use (and trash) + SkipIfEqual(MacroAssembler*, Register temp, + const bool* flag_addr, Assembler::Condition condition); + ~SkipIfEqual(); +}; + +#endif // CPU_SPARC_VM_MACROASSEMBLER_SPARC_HPP diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp new file mode 100644 index 00000000000..e9bdaea79ad --- /dev/null +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp @@ -0,0 +1,765 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP +#define CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline bool Address::is_simm13(int offset) { return Assembler::is_simm13(disp() + offset); } + + +inline int AddressLiteral::low10() const { + return Assembler::low10(value()); +} + + +inline void MacroAssembler::pd_patch_instruction(address branch, address target) { + jint& stub_inst = *(jint*) branch; + stub_inst = patched_branch(target - branch, stub_inst, 0); +} + +#ifndef PRODUCT +inline void MacroAssembler::pd_print_patched_instruction(address branch) { + jint stub_inst = *(jint*) branch; + print_instruction(stub_inst); + ::tty->print("%s", " (unresolved)"); +} +#endif // PRODUCT + +// Use the right loads/stores for the platform +inline void MacroAssembler::ld_ptr( Register s1, Register s2, Register d ) { +#ifdef _LP64 + Assembler::ldx(s1, s2, d); +#else + ld( s1, s2, d); +#endif +} + +inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) { +#ifdef _LP64 + Assembler::ldx(s1, simm13a, d); +#else + ld( s1, simm13a, d); +#endif +} + +#ifdef ASSERT +// ByteSize is only a class when ASSERT is defined, otherwise it's an int. +inline void MacroAssembler::ld_ptr( Register s1, ByteSize simm13a, Register d ) { + ld_ptr(s1, in_bytes(simm13a), d); +} +#endif + +inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) { +#ifdef _LP64 + ldx(s1, s2, d); +#else + ld( s1, s2, d); +#endif +} + +inline void MacroAssembler::ld_ptr(const Address& a, Register d, int offset) { +#ifdef _LP64 + ldx(a, d, offset); +#else + ld( a, d, offset); +#endif +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, Register s2 ) { +#ifdef _LP64 + Assembler::stx(d, s1, s2); +#else + st( d, s1, s2); +#endif +} + +inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) { +#ifdef _LP64 + Assembler::stx(d, s1, simm13a); +#else + st( d, s1, simm13a); +#endif +} + +#ifdef ASSERT +// ByteSize is only a class when ASSERT is defined, otherwise it's an int. +inline void MacroAssembler::st_ptr( Register d, Register s1, ByteSize simm13a ) { + st_ptr(d, s1, in_bytes(simm13a)); +} +#endif + +inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) { +#ifdef _LP64 + stx(d, s1, s2); +#else + st( d, s1, s2); +#endif +} + +inline void MacroAssembler::st_ptr(Register d, const Address& a, int offset) { +#ifdef _LP64 + stx(d, a, offset); +#else + st( d, a, offset); +#endif +} + +// Use the right loads/stores for the platform +inline void MacroAssembler::ld_long( Register s1, Register s2, Register d ) { +#ifdef _LP64 + Assembler::ldx(s1, s2, d); +#else + Assembler::ldd(s1, s2, d); +#endif +} + +inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) { +#ifdef _LP64 + Assembler::ldx(s1, simm13a, d); +#else + Assembler::ldd(s1, simm13a, d); +#endif +} + +inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) { +#ifdef _LP64 + ldx(s1, s2, d); +#else + ldd(s1, s2, d); +#endif +} + +inline void MacroAssembler::ld_long(const Address& a, Register d, int offset) { +#ifdef _LP64 + ldx(a, d, offset); +#else + ldd(a, d, offset); +#endif +} + +inline void MacroAssembler::st_long( Register d, Register s1, Register s2 ) { +#ifdef _LP64 + Assembler::stx(d, s1, s2); +#else + Assembler::std(d, s1, s2); +#endif +} + +inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) { +#ifdef _LP64 + Assembler::stx(d, s1, simm13a); +#else + Assembler::std(d, s1, simm13a); +#endif +} + +inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) { +#ifdef _LP64 + stx(d, s1, s2); +#else + std(d, s1, s2); +#endif +} + +inline void MacroAssembler::st_long( Register d, const Address& a, int offset ) { +#ifdef _LP64 + stx(d, a, offset); +#else + std(d, a, offset); +#endif +} + +// Functions for isolating 64 bit shifts for LP64 + +inline void MacroAssembler::sll_ptr( Register s1, Register s2, Register d ) { +#ifdef _LP64 + Assembler::sllx(s1, s2, d); +#else + Assembler::sll( s1, s2, d); +#endif +} + +inline void MacroAssembler::sll_ptr( Register s1, int imm6a, Register d ) { +#ifdef _LP64 + Assembler::sllx(s1, imm6a, d); +#else + Assembler::sll( s1, imm6a, d); +#endif +} + +inline void MacroAssembler::srl_ptr( Register s1, Register s2, Register d ) { +#ifdef _LP64 + Assembler::srlx(s1, s2, d); +#else + Assembler::srl( s1, s2, d); +#endif +} + +inline void MacroAssembler::srl_ptr( Register s1, int imm6a, Register d ) { +#ifdef _LP64 + Assembler::srlx(s1, imm6a, d); +#else + Assembler::srl( s1, imm6a, d); +#endif +} + +inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) { + if (s2.is_register()) sll_ptr(s1, s2.as_register(), d); + else sll_ptr(s1, s2.as_constant(), d); +} + +// Use the right branch for the platform + +inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) + Assembler::bp(c, a, icc, p, d, rt); + else + Assembler::br(c, a, d, rt); +} + +inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) { + br(c, a, p, target(L)); +} + + +// Branch that tests either xcc or icc depending on the +// architecture compiled (LP64 or not) +inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { +#ifdef _LP64 + Assembler::bp(c, a, xcc, p, d, rt); +#else + MacroAssembler::br(c, a, p, d, rt); +#endif +} + +inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) { + brx(c, a, p, target(L)); +} + +inline void MacroAssembler::ba( Label& L ) { + br(always, false, pt, L); +} + +// Warning: V9 only functions +inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { + Assembler::bp(c, a, cc, p, d, rt); +} + +inline void MacroAssembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { + Assembler::bp(c, a, cc, p, L); +} + +inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) + fbp(c, a, fcc0, p, d, rt); + else + Assembler::fb(c, a, d, rt); +} + +inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) { + fb(c, a, p, target(L)); +} + +inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { + Assembler::fbp(c, a, cc, p, d, rt); +} + +inline void MacroAssembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { + Assembler::fbp(c, a, cc, p, L); +} + +inline void MacroAssembler::jmp( Register s1, Register s2 ) { jmpl( s1, s2, G0 ); } +inline void MacroAssembler::jmp( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, G0, rspec); } + +inline bool MacroAssembler::is_far_target(address d) { + if (ForceUnreachable) { + // References outside the code cache should be treated as far + return d < CodeCache::low_bound() || d > CodeCache::high_bound(); + } + return !is_in_wdisp30_range(d, CodeCache::low_bound()) || !is_in_wdisp30_range(d, CodeCache::high_bound()); +} + +// Call with a check to see if we need to deal with the added +// expense of relocation and if we overflow the displacement +// of the quick call instruction. +inline void MacroAssembler::call( address d, relocInfo::relocType rt ) { +#ifdef _LP64 + intptr_t disp; + // NULL is ok because it will be relocated later. + // Must change NULL to a reachable address in order to + // pass asserts here and in wdisp. + if ( d == NULL ) + d = pc(); + + // Is this address within range of the call instruction? + // If not, use the expensive instruction sequence + if (is_far_target(d)) { + relocate(rt); + AddressLiteral dest(d); + jumpl_to(dest, O7, O7); + } else { + Assembler::call(d, rt); + } +#else + Assembler::call( d, rt ); +#endif +} + +inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) { + MacroAssembler::call( target(L), rt); +} + + + +inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); } +inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); } + +// prefetch instruction +inline void MacroAssembler::iprefetch( address d, relocInfo::relocType rt ) { + if (VM_Version::v9_instructions_work()) + Assembler::bp( never, true, xcc, pt, d, rt ); +} +inline void MacroAssembler::iprefetch( Label& L) { iprefetch( target(L) ); } + + +// clobbers o7 on V8!! +// returns delta from gotten pc to addr after +inline int MacroAssembler::get_pc( Register d ) { + int x = offset(); + if (VM_Version::v9_instructions_work()) + rdpc(d); + else { + Label lbl; + Assembler::call(lbl, relocInfo::none); // No relocation as this is call to pc+0x8 + if (d == O7) delayed()->nop(); + else delayed()->mov(O7, d); + bind(lbl); + } + return offset() - x; +} + + +// Note: All MacroAssembler::set_foo functions are defined out-of-line. + + +// Loads the current PC of the following instruction as an immediate value in +// 2 instructions. All PCs in the CodeCache are within 2 Gig of each other. +inline intptr_t MacroAssembler::load_pc_address( Register reg, int bytes_to_skip ) { + intptr_t thepc = (intptr_t)pc() + 2*BytesPerInstWord + bytes_to_skip; +#ifdef _LP64 + Unimplemented(); +#else + Assembler::sethi( thepc & ~0x3ff, reg, internal_word_Relocation::spec((address)thepc)); + add(reg, thepc & 0x3ff, reg, internal_word_Relocation::spec((address)thepc)); +#endif + return thepc; +} + + +inline void MacroAssembler::load_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ld(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::load_bool_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ldub(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::load_ptr_contents(const AddressLiteral& addrlit, Register d, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, d); + } else { + sethi(addrlit, d); + } + ld_ptr(d, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::store_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } + st(s, temp, addrlit.low10() + offset); +} + + +inline void MacroAssembler::store_ptr_contents(Register s, const AddressLiteral& addrlit, Register temp, int offset) { + assert_not_delayed(); + if (ForceUnreachable) { + patchable_sethi(addrlit, temp); + } else { + sethi(addrlit, temp); + } + st_ptr(s, temp, addrlit.low10() + offset); +} + + +// This code sequence is relocatable to any address, even on LP64. +inline void MacroAssembler::jumpl_to(const AddressLiteral& addrlit, Register temp, Register d, int offset) { + assert_not_delayed(); + // Force fixed length sethi because NativeJump and NativeFarCall don't handle + // variable length instruction streams. + patchable_sethi(addrlit, temp); + jmpl(temp, addrlit.low10() + offset, d); +} + + +inline void MacroAssembler::jump_to(const AddressLiteral& addrlit, Register temp, int offset) { + jumpl_to(addrlit, temp, G0, offset); +} + + +inline void MacroAssembler::jump_indirect_to(Address& a, Register temp, + int ld_offset, int jmp_offset) { + assert_not_delayed(); + //sethi(al); // sethi is caller responsibility for this one + ld_ptr(a, temp, ld_offset); + jmp(temp, jmp_offset); +} + + +inline void MacroAssembler::set_metadata(Metadata* obj, Register d) { + set_metadata(allocate_metadata_address(obj), d); +} + +inline void MacroAssembler::set_metadata_constant(Metadata* obj, Register d) { + set_metadata(constant_metadata_address(obj), d); +} + +inline void MacroAssembler::set_metadata(const AddressLiteral& obj_addr, Register d) { + assert(obj_addr.rspec().type() == relocInfo::metadata_type, "must be a metadata reloc"); + set(obj_addr, d); +} + +inline void MacroAssembler::set_oop(jobject obj, Register d) { + set_oop(allocate_oop_address(obj), d); +} + + +inline void MacroAssembler::set_oop_constant(jobject obj, Register d) { + set_oop(constant_oop_address(obj), d); +} + + +inline void MacroAssembler::set_oop(const AddressLiteral& obj_addr, Register d) { + assert(obj_addr.rspec().type() == relocInfo::oop_type, "must be an oop reloc"); + set(obj_addr, d); +} + + +inline void MacroAssembler::load_argument( Argument& a, Register d ) { + if (a.is_register()) + mov(a.as_register(), d); + else + ld (a.as_address(), d); +} + +inline void MacroAssembler::store_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + st_ptr (s, a.as_address()); // ABI says everything is right justified. +} + +inline void MacroAssembler::store_ptr_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + st_ptr (s, a.as_address()); +} + + +#ifdef _LP64 +inline void MacroAssembler::store_float_argument( FloatRegister s, Argument& a ) { + if (a.is_float_register()) +// V9 ABI has F1, F3, F5 are used to pass instead of O0, O1, O2 + fmov(FloatRegisterImpl::S, s, a.as_float_register() ); + else + // Floats are stored in the high half of the stack entry + // The low half is undefined per the ABI. + stf(FloatRegisterImpl::S, s, a.as_address(), sizeof(jfloat)); +} + +inline void MacroAssembler::store_double_argument( FloatRegister s, Argument& a ) { + if (a.is_float_register()) +// V9 ABI has D0, D2, D4 are used to pass instead of O0, O1, O2 + fmov(FloatRegisterImpl::D, s, a.as_double_register() ); + else + stf(FloatRegisterImpl::D, s, a.as_address()); +} + +inline void MacroAssembler::store_long_argument( Register s, Argument& a ) { + if (a.is_register()) + mov(s, a.as_register()); + else + stx(s, a.as_address()); +} +#endif + +inline void MacroAssembler::add(Register s1, int simm13a, Register d, relocInfo::relocType rtype) { + relocate(rtype); + add(s1, simm13a, d); +} +inline void MacroAssembler::add(Register s1, int simm13a, Register d, RelocationHolder const& rspec) { + relocate(rspec); + add(s1, simm13a, d); +} + +// form effective addresses this way: +inline void MacroAssembler::add(const Address& a, Register d, int offset) { + if (a.has_index()) add(a.base(), a.index(), d); + else { add(a.base(), a.disp() + offset, d, a.rspec(offset)); offset = 0; } + if (offset != 0) add(d, offset, d); +} +inline void MacroAssembler::add(Register s1, RegisterOrConstant s2, Register d, int offset) { + if (s2.is_register()) add(s1, s2.as_register(), d); + else { add(s1, s2.as_constant() + offset, d); offset = 0; } + if (offset != 0) add(d, offset, d); +} + +inline void MacroAssembler::andn(Register s1, RegisterOrConstant s2, Register d) { + if (s2.is_register()) andn(s1, s2.as_register(), d); + else andn(s1, s2.as_constant(), d); +} + +inline void MacroAssembler::clrb( Register s1, Register s2) { stb( G0, s1, s2 ); } +inline void MacroAssembler::clrh( Register s1, Register s2) { sth( G0, s1, s2 ); } +inline void MacroAssembler::clr( Register s1, Register s2) { stw( G0, s1, s2 ); } +inline void MacroAssembler::clrx( Register s1, Register s2) { stx( G0, s1, s2 ); } + +inline void MacroAssembler::clrb( Register s1, int simm13a) { stb( G0, s1, simm13a); } +inline void MacroAssembler::clrh( Register s1, int simm13a) { sth( G0, s1, simm13a); } +inline void MacroAssembler::clr( Register s1, int simm13a) { stw( G0, s1, simm13a); } +inline void MacroAssembler::clrx( Register s1, int simm13a) { stx( G0, s1, simm13a); } + +#ifdef _LP64 +// Make all 32 bit loads signed so 64 bit registers maintain proper sign +inline void MacroAssembler::ld( Register s1, Register s2, Register d) { ldsw( s1, s2, d); } +inline void MacroAssembler::ld( Register s1, int simm13a, Register d) { ldsw( s1, simm13a, d); } +#else +inline void MacroAssembler::ld( Register s1, Register s2, Register d) { lduw( s1, s2, d); } +inline void MacroAssembler::ld( Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); } +#endif + +#ifdef ASSERT + // ByteSize is only a class when ASSERT is defined, otherwise it's an int. +# ifdef _LP64 +inline void MacroAssembler::ld(Register s1, ByteSize simm13a, Register d) { ldsw( s1, in_bytes(simm13a), d); } +# else +inline void MacroAssembler::ld(Register s1, ByteSize simm13a, Register d) { lduw( s1, in_bytes(simm13a), d); } +# endif +#endif + +inline void MacroAssembler::ld( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ld( a.base(), a.index(), d); } + else { ld( a.base(), a.disp() + offset, d); } +} + +inline void MacroAssembler::ldsb(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsb(a.base(), a.index(), d); } + else { ldsb(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldsh(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsh(a.base(), a.index(), d); } + else { ldsh(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldsw(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldsw(a.base(), a.index(), d); } + else { ldsw(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldub(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldub(a.base(), a.index(), d); } + else { ldub(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::lduh(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); lduh(a.base(), a.index(), d); } + else { lduh(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::lduw(const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); lduw(a.base(), a.index(), d); } + else { lduw(a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldd( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldd( a.base(), a.index(), d); } + else { ldd( a.base(), a.disp() + offset, d); } +} +inline void MacroAssembler::ldx( const Address& a, Register d, int offset) { + if (a.has_index()) { assert(offset == 0, ""); ldx( a.base(), a.index(), d); } + else { ldx( a.base(), a.disp() + offset, d); } +} + +inline void MacroAssembler::ldub(Register s1, RegisterOrConstant s2, Register d) { ldub(Address(s1, s2), d); } +inline void MacroAssembler::ldsb(Register s1, RegisterOrConstant s2, Register d) { ldsb(Address(s1, s2), d); } +inline void MacroAssembler::lduh(Register s1, RegisterOrConstant s2, Register d) { lduh(Address(s1, s2), d); } +inline void MacroAssembler::ldsh(Register s1, RegisterOrConstant s2, Register d) { ldsh(Address(s1, s2), d); } +inline void MacroAssembler::lduw(Register s1, RegisterOrConstant s2, Register d) { lduw(Address(s1, s2), d); } +inline void MacroAssembler::ldsw(Register s1, RegisterOrConstant s2, Register d) { ldsw(Address(s1, s2), d); } +inline void MacroAssembler::ldx( Register s1, RegisterOrConstant s2, Register d) { ldx( Address(s1, s2), d); } +inline void MacroAssembler::ld( Register s1, RegisterOrConstant s2, Register d) { ld( Address(s1, s2), d); } +inline void MacroAssembler::ldd( Register s1, RegisterOrConstant s2, Register d) { ldd( Address(s1, s2), d); } + +inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d) { + if (s2.is_register()) ldf(w, s1, s2.as_register(), d); + else ldf(w, s1, s2.as_constant(), d); +} + +inline void MacroAssembler::ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset) { + relocate(a.rspec(offset)); + ldf(w, a.base(), a.disp() + offset, d); +} + +// returns if membar generates anything, obviously this code should mirror +// membar below. +inline bool MacroAssembler::membar_has_effect( Membar_mask_bits const7a ) { + if( !os::is_MP() ) return false; // Not needed on single CPU + if( VM_Version::v9_instructions_work() ) { + const Membar_mask_bits effective_mask = + Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); + return (effective_mask != 0); + } else { + return true; + } +} + +inline void MacroAssembler::membar( Membar_mask_bits const7a ) { + // Uniprocessors do not need memory barriers + if (!os::is_MP()) return; + // Weakened for current Sparcs and TSO. See the v9 manual, sections 8.4.3, + // 8.4.4.3, a.31 and a.50. + if( VM_Version::v9_instructions_work() ) { + // Under TSO, setting bit 3, 2, or 0 is redundant, so the only value + // of the mmask subfield of const7a that does anything that isn't done + // implicitly is StoreLoad. + const Membar_mask_bits effective_mask = + Membar_mask_bits(const7a & ~(LoadLoad | LoadStore | StoreStore)); + if ( effective_mask != 0 ) { + Assembler::membar( effective_mask ); + } + } else { + // stbar is the closest there is on v8. Equivalent to membar(StoreStore). We + // do not issue the stbar because to my knowledge all v8 machines implement TSO, + // which guarantees that all stores behave as if an stbar were issued just after + // each one of them. On these machines, stbar ought to be a nop. There doesn't + // appear to be an equivalent of membar(StoreLoad) on v8: TSO doesn't require it, + // it can't be specified by stbar, nor have I come up with a way to simulate it. + // + // Addendum. Dave says that ldstub guarantees a write buffer flush to coherent + // space. Put one here to be on the safe side. + Assembler::ldstub(SP, 0, G0); + } +} + +inline void MacroAssembler::prefetch(const Address& a, PrefetchFcn f, int offset) { + relocate(a.rspec(offset)); + assert(!a.has_index(), ""); + prefetch(a.base(), a.disp() + offset, f); +} + +inline void MacroAssembler::st(Register d, Register s1, Register s2) { stw(d, s1, s2); } +inline void MacroAssembler::st(Register d, Register s1, int simm13a) { stw(d, s1, simm13a); } + +#ifdef ASSERT +// ByteSize is only a class when ASSERT is defined, otherwise it's an int. +inline void MacroAssembler::st(Register d, Register s1, ByteSize simm13a) { stw(d, s1, in_bytes(simm13a)); } +#endif + +inline void MacroAssembler::st(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); st( d, a.base(), a.index() ); } + else { st( d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::stb(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stb(d, a.base(), a.index() ); } + else { stb(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::sth(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); sth(d, a.base(), a.index() ); } + else { sth(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::stw(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stw(d, a.base(), a.index() ); } + else { stw(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::std(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); std(d, a.base(), a.index() ); } + else { std(d, a.base(), a.disp() + offset); } +} +inline void MacroAssembler::stx(Register d, const Address& a, int offset) { + if (a.has_index()) { assert(offset == 0, ""); stx(d, a.base(), a.index() ); } + else { stx(d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::stb(Register d, Register s1, RegisterOrConstant s2) { stb(d, Address(s1, s2)); } +inline void MacroAssembler::sth(Register d, Register s1, RegisterOrConstant s2) { sth(d, Address(s1, s2)); } +inline void MacroAssembler::stw(Register d, Register s1, RegisterOrConstant s2) { stw(d, Address(s1, s2)); } +inline void MacroAssembler::stx(Register d, Register s1, RegisterOrConstant s2) { stx(d, Address(s1, s2)); } +inline void MacroAssembler::std(Register d, Register s1, RegisterOrConstant s2) { std(d, Address(s1, s2)); } +inline void MacroAssembler::st( Register d, Register s1, RegisterOrConstant s2) { st( d, Address(s1, s2)); } + +inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, RegisterOrConstant s2) { + if (s2.is_register()) stf(w, d, s1, s2.as_register()); + else stf(w, d, s1, s2.as_constant()); +} + +inline void MacroAssembler::stf(FloatRegisterImpl::Width w, FloatRegister d, const Address& a, int offset) { + relocate(a.rspec(offset)); + if (a.has_index()) { assert(offset == 0, ""); stf(w, d, a.base(), a.index() ); } + else { stf(w, d, a.base(), a.disp() + offset); } +} + +inline void MacroAssembler::sub(Register s1, RegisterOrConstant s2, Register d, int offset) { + if (s2.is_register()) sub(s1, s2.as_register(), d); + else { sub(s1, s2.as_constant() + offset, d); offset = 0; } + if (offset != 0) sub(d, offset, d); +} + +inline void MacroAssembler::swap(Address& a, Register d, int offset) { + relocate(a.rspec(offset)); + if (a.has_index()) { assert(offset == 0, ""); swap(a.base(), a.index(), d ); } + else { swap(a.base(), a.disp() + offset, d); } +} + +#endif // CPU_SPARC_VM_MACROASSEMBLER_SPARC_INLINE_HPP diff --git a/hotspot/src/cpu/sparc/vm/metaspaceShared_sparc.cpp b/hotspot/src/cpu/sparc/vm/metaspaceShared_sparc.cpp index 70ef0c78bb6..28f606be3b0 100644 --- a/hotspot/src/cpu/sparc/vm/metaspaceShared_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/metaspaceShared_sparc.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "asm/codeBuffer.hpp" #include "memory/metaspaceShared.hpp" // Generate the self-patching vtable method: diff --git a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp index 9245f3b40ff..f4637bf76de 100644 --- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/interpreter.hpp" #include "memory/allocation.inline.hpp" #include "prims/methodHandles.hpp" diff --git a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.cpp b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.cpp index 97108bfe82c..71a938e59fc 100644 --- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.hpp" #include "memory/resourceArea.hpp" #include "nativeInst_sparc.hpp" #include "oops/oop.inline.hpp" diff --git a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp index 8617ec37e03..3c9a5e9b7a7 100644 --- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp @@ -25,7 +25,7 @@ #ifndef CPU_SPARC_VM_NATIVEINST_SPARC_HPP #define CPU_SPARC_VM_NATIVEINST_SPARC_HPP -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "memory/allocation.hpp" #include "runtime/icache.hpp" #include "runtime/os.hpp" @@ -194,11 +194,10 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC { static int inv_simm( int x, int nbits ) { return Assembler::inv_simm(x, nbits); } static intptr_t inv_wdisp( int x, int nbits ) { return Assembler::inv_wdisp( x, 0, nbits); } static intptr_t inv_wdisp16( int x ) { return Assembler::inv_wdisp16(x, 0); } - static int branch_destination_offset(int x) { return Assembler::branch_destination(x, 0); } + static int branch_destination_offset(int x) { return MacroAssembler::branch_destination(x, 0); } static int patch_branch_destination_offset(int dest_offset, int x) { - return Assembler::patched_branch(dest_offset, x, 0); + return MacroAssembler::patched_branch(dest_offset, x, 0); } - void set_annul_bit() { set_long_at(0, long_at(0) | Assembler::annul(true)); } // utility for checking if x is either of 2 small constants static bool is_either(int x, int k1, int k2) { @@ -889,7 +888,6 @@ class NativeGeneralJump: public NativeInstruction { int patched_instr = patch_branch_destination_offset(dest - addr_at(0), long_at(0)); set_long_at(0, patched_instr); } - void set_annul() { set_annul_bit(); } NativeInstruction *delay_slot_instr() { return nativeInstruction_at(addr_at(4));} void fill_delay_slot(int instr) { set_long_at(4, instr);} Assembler::Condition condition() { diff --git a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp index aeb2420c640..2a3516dcf6b 100644 --- a/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/relocInfo_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.inline.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/assembler.hpp" #include "code/relocInfo.hpp" #include "nativeInst_sparc.hpp" #include "oops/oop.inline.hpp" diff --git a/hotspot/src/cpu/sparc/vm/runtime_sparc.cpp b/hotspot/src/cpu/sparc/vm/runtime_sparc.cpp index ff04e6fef14..0e498d71285 100644 --- a/hotspot/src/cpu/sparc/vm/runtime_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/runtime_sparc.cpp @@ -24,8 +24,7 @@ #include "precompiled.hpp" #ifdef COMPILER2 -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "classfile/systemDictionary.hpp" #include "code/vmreg.hpp" #include "interpreter/interpreter.hpp" diff --git a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp index ef8273f58f8..b6a338c34ee 100644 --- a/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/sharedRuntime_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" diff --git a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp index e8e7499b5f9..3cd4ff2d3f5 100644 --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/interpreter.hpp" #include "nativeInst_sparc.hpp" #include "oops/instanceOop.hpp" diff --git a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp index d51b9e5bb0e..2de866adb07 100644 --- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp index aaa601fbbc9..7d3becef336 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "memory/resourceArea.hpp" #include "runtime/java.hpp" #include "runtime/stubCodeGenerator.hpp" diff --git a/hotspot/src/cpu/sparc/vm/vmreg_sparc.cpp b/hotspot/src/cpu/sparc/vm/vmreg_sparc.cpp index 470f354bf46..c261a5dd09f 100644 --- a/hotspot/src/cpu/sparc/vm/vmreg_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vmreg_sparc.cpp @@ -23,7 +23,6 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" #include "code/vmreg.hpp" diff --git a/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp b/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp index 8e7021fa1a7..3c4f24c3867 100644 --- a/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vtableStubs_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_sparc.hpp" #include "memory/resourceArea.hpp" diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index 6b9677d30c5..0a51534ed24 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" #include "gc_interface/collectedHeap.inline.hpp" #include "interpreter/interpreter.hpp" #include "memory/cardTableModRefBS.hpp" @@ -1154,7 +1155,7 @@ void Assembler::call_literal(address entry, RelocationHolder const& rspec) { assert(entry != NULL, "call most probably wrong"); InstructionMark im(this); emit_byte(0xE8); - intptr_t disp = entry - (_code_pos + sizeof(int32_t)); + intptr_t disp = entry - (pc() + sizeof(int32_t)); assert(is_simm32(disp), "must be 32bit offset (call2)"); // Technically, should use call32_operand, but this format is // implied by the fact that we're emitting a call instruction. @@ -1167,6 +1168,10 @@ void Assembler::cdql() { emit_byte(0x99); } +void Assembler::cld() { + emit_byte(0xfc); +} + void Assembler::cmovl(Condition cc, Register dst, Register src) { NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); int encode = prefix_and_encode(dst->encoding(), src->encoding()); @@ -1260,6 +1265,11 @@ void Assembler::comiss(XMMRegister dst, XMMRegister src) { emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE); } +void Assembler::cpuid() { + emit_byte(0x0F); + emit_byte(0xA2); +} + void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3); @@ -1417,7 +1427,7 @@ void Assembler::jcc(Condition cc, Label& L, bool maybe_short) { const int short_size = 2; const int long_size = 6; - intptr_t offs = (intptr_t)dst - (intptr_t)_code_pos; + intptr_t offs = (intptr_t)dst - (intptr_t)pc(); if (maybe_short && is8bit(offs - short_size)) { // 0111 tttn #8-bit disp emit_byte(0x70 | cc); @@ -1447,14 +1457,14 @@ void Assembler::jccb(Condition cc, Label& L) { const int short_size = 2; address entry = target(L); #ifdef ASSERT - intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size); intptr_t delta = short_branch_delta(); if (delta != 0) { dist += (dist < 0 ? (-delta) :delta); } assert(is8bit(dist), "Dispacement too large for a short jmp"); #endif - intptr_t offs = (intptr_t)entry - (intptr_t)_code_pos; + intptr_t offs = (intptr_t)entry - (intptr_t)pc(); // 0111 tttn #8-bit disp emit_byte(0x70 | cc); emit_byte((offs - short_size) & 0xFF); @@ -1480,7 +1490,7 @@ void Assembler::jmp(Label& L, bool maybe_short) { InstructionMark im(this); const int short_size = 2; const int long_size = 5; - intptr_t offs = entry - _code_pos; + intptr_t offs = entry - pc(); if (maybe_short && is8bit(offs - short_size)) { emit_byte(0xEB); emit_byte((offs - short_size) & 0xFF); @@ -1510,7 +1520,7 @@ void Assembler::jmp_literal(address dest, RelocationHolder const& rspec) { InstructionMark im(this); emit_byte(0xE9); assert(dest != NULL, "must have a target"); - intptr_t disp = dest - (_code_pos + sizeof(int32_t)); + intptr_t disp = dest - (pc() + sizeof(int32_t)); assert(is_simm32(disp), "must be 32bit offset (jmp)"); emit_data(disp, rspec.reloc(), call32_operand); } @@ -1521,14 +1531,14 @@ void Assembler::jmpb(Label& L) { address entry = target(L); assert(entry != NULL, "jmp most probably wrong"); #ifdef ASSERT - intptr_t dist = (intptr_t)entry - ((intptr_t)_code_pos + short_size); + intptr_t dist = (intptr_t)entry - ((intptr_t)pc() + short_size); intptr_t delta = short_branch_delta(); if (delta != 0) { dist += (dist < 0 ? (-delta) :delta); } assert(is8bit(dist), "Dispacement too large for a short jmp"); #endif - intptr_t offs = entry - _code_pos; + intptr_t offs = entry - pc(); emit_byte(0xEB); emit_byte((offs - short_size) & 0xFF); } else { @@ -1558,6 +1568,12 @@ void Assembler::leal(Register dst, Address src) { emit_operand(dst, src); } +void Assembler::lfence() { + emit_byte(0x0F); + emit_byte(0xAE); + emit_byte(0xE8); +} + void Assembler::lock() { emit_byte(0xF0); } @@ -2671,6 +2687,10 @@ void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); } +void Assembler::std() { + emit_byte(0xfd); +} + void Assembler::sqrtss(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse(), "")); emit_simd_arith(0x51, dst, src, VEX_SIMD_F3); @@ -2816,6 +2836,12 @@ void Assembler::xchgl(Register dst, Register src) { emit_byte(0xc0 | encode); } +void Assembler::xgetbv() { + emit_byte(0x0F); + emit_byte(0x01); + emit_byte(0xD0); +} + void Assembler::xorl(Register dst, int32_t imm32) { prefix(dst); emit_arith(0x81, 0xF0, dst, imm32); @@ -4361,7 +4387,7 @@ bool Assembler::reachable(AddressLiteral adr) { disp = (int64_t)adr._target - ((int64_t)CodeCache::high_bound() + sizeof(int)); if (!is_simm32(disp)) return false; - disp = (int64_t)adr._target - ((int64_t)_code_pos + sizeof(int)); + disp = (int64_t)adr._target - ((int64_t)pc() + sizeof(int)); // Because rip relative is a disp + address_of_next_instruction and we // don't know the value of address_of_next_instruction we apply a fudge factor @@ -4392,7 +4418,7 @@ void Assembler::emit_data64(jlong data, relocInfo::relocType rtype, int format) { if (rtype == relocInfo::none) { - emit_long64(data); + emit_int64(data); } else { emit_data64(data, Relocation::spec_simple(rtype), format); } @@ -4410,7 +4436,7 @@ void Assembler::emit_data64(jlong data, #ifdef ASSERT check_relocation(rspec, format); #endif - emit_long64(data); + emit_int64(data); } int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { @@ -4943,7 +4969,7 @@ void Assembler::mov64(Register dst, int64_t imm64) { InstructionMark im(this); int encode = prefixq_and_encode(dst->encoding()); emit_byte(0xB8 | encode); - emit_long64(imm64); + emit_int64(imm64); } void Assembler::mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec) { @@ -5417,6043 +5443,3 @@ void Assembler::xorq(Register dst, Address src) { } #endif // !LP64 - -static Assembler::Condition reverse[] = { - Assembler::noOverflow /* overflow = 0x0 */ , - Assembler::overflow /* noOverflow = 0x1 */ , - Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , - Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , - Assembler::notZero /* zero = 0x4, equal = 0x4 */ , - Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , - Assembler::above /* belowEqual = 0x6 */ , - Assembler::belowEqual /* above = 0x7 */ , - Assembler::positive /* negative = 0x8 */ , - Assembler::negative /* positive = 0x9 */ , - Assembler::noParity /* parity = 0xa */ , - Assembler::parity /* noParity = 0xb */ , - Assembler::greaterEqual /* less = 0xc */ , - Assembler::less /* greaterEqual = 0xd */ , - Assembler::greater /* lessEqual = 0xe */ , - Assembler::lessEqual /* greater = 0xf, */ - -}; - - -// Implementation of MacroAssembler - -// First all the versions that have distinct versions depending on 32/64 bit -// Unless the difference is trivial (1 line or so). - -#ifndef _LP64 - -// 32bit versions - -Address MacroAssembler::as_Address(AddressLiteral adr) { - return Address(adr.target(), adr.rspec()); -} - -Address MacroAssembler::as_Address(ArrayAddress adr) { - return Address::make_array(adr); -} - -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); - assert_different_registers(lock_reg, obj_reg, swap_reg); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - bool need_tmp_reg = false; - if (tmp_reg == noreg) { - need_tmp_reg = true; - tmp_reg = lock_reg; - } else { - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - } - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movl(swap_reg, mark_addr); - } - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, swap_reg); - andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpl(tmp_reg, markOopDesc::biased_lock_pattern); - if (need_tmp_reg) { - pop(tmp_reg); - } - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - // Note that because there is no current thread register on x86 we - // need to store off the mark word we read out of the object to - // avoid reloading it and needing to recheck invariants below. This - // store is unfortunate but it makes the overall code shorter and - // simpler. - movl(saved_mark_addr, swap_reg); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - xorl(swap_reg, tmp_reg); - if (swap_reg_contains_mark) { - null_check_offset = offset(); - } - movl(tmp_reg, klass_addr); - xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); - andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); - if (need_tmp_reg) { - pop(tmp_reg); - } - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testl(swap_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testl(swap_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - movl(swap_reg, saved_mark_addr); - andl(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - orl(tmp_reg, swap_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - if (need_tmp_reg) { - push(tmp_reg); - } - get_thread(tmp_reg); - movl(swap_reg, klass_addr); - orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); - movl(swap_reg, saved_mark_addr); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - movl(swap_reg, saved_mark_addr); - if (need_tmp_reg) { - push(tmp_reg); - } - movl(tmp_reg, klass_addr); - movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); - if (os::is_MP()) { - lock(); - } - cmpxchgptr(tmp_reg, Address(obj_reg, 0)); - if (need_tmp_reg) { - pop(tmp_reg); - } - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address)counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} -void MacroAssembler::call_VM_leaf_base(address entry_point, - int number_of_arguments) { - call(RuntimeAddress(entry_point)); - increment(rsp, number_of_arguments * wordSize); -} - -void MacroAssembler::cmpklass(Address src1, Metadata* obj) { - cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::cmpklass(Register src1, Metadata* obj) { - cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::cmpoop(Address src1, jobject obj) { - cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::cmpoop(Register src1, jobject obj) { - cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::extend_sign(Register hi, Register lo) { - // According to Intel Doc. AP-526, "Integer Divide", p.18. - if (VM_Version::is_P6() && hi == rdx && lo == rax) { - cdql(); - } else { - movl(hi, lo); - sarl(hi, 31); - } -} - -void MacroAssembler::jC2(Register tmp, Label& L) { - // set parity bit if FPU flag C2 is set (via rax) - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - // branch - jcc(Assembler::parity, L); -} - -void MacroAssembler::jnC2(Register tmp, Label& L) { - // set parity bit if FPU flag C2 is set (via rax) - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - // branch - jcc(Assembler::noParity, L); -} - -// 32bit can do a case table jump in one instruction but we no longer allow the base -// to be installed in the Address class -void MacroAssembler::jump(ArrayAddress entry) { - jmp(as_Address(entry)); -} - -// Note: y_lo will be destroyed -void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { - // Long compare for Java (semantics as described in JVM spec.) - Label high, low, done; - - cmpl(x_hi, y_hi); - jcc(Assembler::less, low); - jcc(Assembler::greater, high); - // x_hi is the return register - xorl(x_hi, x_hi); - cmpl(x_lo, y_lo); - jcc(Assembler::below, low); - jcc(Assembler::equal, done); - - bind(high); - xorl(x_hi, x_hi); - increment(x_hi); - jmp(done); - - bind(low); - xorl(x_hi, x_hi); - decrementl(x_hi); - - bind(done); -} - -void MacroAssembler::lea(Register dst, AddressLiteral src) { - mov_literal32(dst, (int32_t)src.target(), src.rspec()); -} - -void MacroAssembler::lea(Address dst, AddressLiteral adr) { - // leal(dst, as_Address(adr)); - // see note in movl as to why we must use a move - mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); -} - -void MacroAssembler::leave() { - mov(rsp, rbp); - pop(rbp); -} - -void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { - // Multiplication of two Java long values stored on the stack - // as illustrated below. Result is in rdx:rax. - // - // rsp ---> [ ?? ] \ \ - // .... | y_rsp_offset | - // [ y_lo ] / (in bytes) | x_rsp_offset - // [ y_hi ] | (in bytes) - // .... | - // [ x_lo ] / - // [ x_hi ] - // .... - // - // Basic idea: lo(result) = lo(x_lo * y_lo) - // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); - Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); - Label quick; - // load x_hi, y_hi and check if quick - // multiplication is possible - movl(rbx, x_hi); - movl(rcx, y_hi); - movl(rax, rbx); - orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 - jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply - // do full multiplication - // 1st step - mull(y_lo); // x_hi * y_lo - movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, - // 2nd step - movl(rax, x_lo); - mull(rcx); // x_lo * y_hi - addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, - // 3rd step - bind(quick); // note: rbx, = 0 if quick multiply! - movl(rax, x_lo); - mull(y_lo); // x_lo * y_lo - addl(rdx, rbx); // correct hi(x_lo * y_lo) -} - -void MacroAssembler::lneg(Register hi, Register lo) { - negl(lo); - adcl(hi, 0); - negl(hi); -} - -void MacroAssembler::lshl(Register hi, Register lo) { - // Java shift left long support (semantics as described in JVM spec., p.305) - // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) - // shift value is in rcx ! - assert(hi != rcx, "must not use rcx"); - assert(lo != rcx, "must not use rcx"); - const Register s = rcx; // shift count - const int n = BitsPerWord; - Label L; - andl(s, 0x3f); // s := s & 0x3f (s < 0x40) - cmpl(s, n); // if (s < n) - jcc(Assembler::less, L); // else (s >= n) - movl(hi, lo); // x := x << n - xorl(lo, lo); - // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! - bind(L); // s (mod n) < n - shldl(hi, lo); // x := x << s - shll(lo); -} - - -void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { - // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) - // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) - assert(hi != rcx, "must not use rcx"); - assert(lo != rcx, "must not use rcx"); - const Register s = rcx; // shift count - const int n = BitsPerWord; - Label L; - andl(s, 0x3f); // s := s & 0x3f (s < 0x40) - cmpl(s, n); // if (s < n) - jcc(Assembler::less, L); // else (s >= n) - movl(lo, hi); // x := x >> n - if (sign_extension) sarl(hi, 31); - else xorl(hi, hi); - // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! - bind(L); // s (mod n) < n - shrdl(lo, hi); // x := x >> s - if (sign_extension) sarl(hi); - else shrl(hi); -} - -void MacroAssembler::movoop(Register dst, jobject obj) { - mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::movoop(Address dst, jobject obj) { - mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { - mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { - mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::movptr(Register dst, AddressLiteral src) { - if (src.is_lval()) { - mov_literal32(dst, (intptr_t)src.target(), src.rspec()); - } else { - movl(dst, as_Address(src)); - } -} - -void MacroAssembler::movptr(ArrayAddress dst, Register src) { - movl(as_Address(dst), src); -} - -void MacroAssembler::movptr(Register dst, ArrayAddress src) { - movl(dst, as_Address(src)); -} - -// src should NEVER be a real pointer. Use AddressLiteral for true pointers -void MacroAssembler::movptr(Address dst, intptr_t src) { - movl(dst, src); -} - - -void MacroAssembler::pop_callee_saved_registers() { - pop(rcx); - pop(rdx); - pop(rdi); - pop(rsi); -} - -void MacroAssembler::pop_fTOS() { - fld_d(Address(rsp, 0)); - addl(rsp, 2 * wordSize); -} - -void MacroAssembler::push_callee_saved_registers() { - push(rsi); - push(rdi); - push(rdx); - push(rcx); -} - -void MacroAssembler::push_fTOS() { - subl(rsp, 2 * wordSize); - fstp_d(Address(rsp, 0)); -} - - -void MacroAssembler::pushoop(jobject obj) { - push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::pushklass(Metadata* obj) { - push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::pushptr(AddressLiteral src) { - if (src.is_lval()) { - push_literal32((int32_t)src.target(), src.rspec()); - } else { - pushl(as_Address(src)); - } -} - -void MacroAssembler::set_word_if_not_zero(Register dst) { - xorl(dst, dst); - set_byte_if_not_zero(dst); -} - -static void pass_arg0(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg1(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg2(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg3(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -#ifndef PRODUCT -extern "C" void findpc(intptr_t x); -#endif - -void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { - // In order to get locks to work, we need to fake a in_VM state - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); - if (ShowMessageBoxOnError) { - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); - if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { - ttyLocker ttyl; - BytecodeCounter::print(); - } - // To see where a verify_oop failed, get $ebx+40/X for this frame. - // This is the value of eip which points to where verify_oop will return. - if (os::message_box(msg, "Execution stopped, print registers?")) { - print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); - BREAKPOINT; - } - } else { - ttyLocker ttyl; - ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); - } - // Don't assert holding the ttyLock - assert(false, err_msg("DEBUG MESSAGE: %s", msg)); - ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); -} - -void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { - ttyLocker ttyl; - FlagSetting fs(Debugging, true); - tty->print_cr("eip = 0x%08x", eip); -#ifndef PRODUCT - if ((WizardMode || Verbose) && PrintMiscellaneous) { - tty->cr(); - findpc(eip); - tty->cr(); - } -#endif -#define PRINT_REG(rax) \ - { tty->print("%s = ", #rax); os::print_location(tty, rax); } - PRINT_REG(rax); - PRINT_REG(rbx); - PRINT_REG(rcx); - PRINT_REG(rdx); - PRINT_REG(rdi); - PRINT_REG(rsi); - PRINT_REG(rbp); - PRINT_REG(rsp); -#undef PRINT_REG - // Print some words near top of staack. - int* dump_sp = (int*) rsp; - for (int col1 = 0; col1 < 8; col1++) { - tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); - os::print_location(tty, *dump_sp++); - } - for (int row = 0; row < 16; row++) { - tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); - for (int col = 0; col < 8; col++) { - tty->print(" 0x%08x", *dump_sp++); - } - tty->cr(); - } - // Print some instructions around pc: - Disassembler::decode((address)eip-64, (address)eip); - tty->print_cr("--------"); - Disassembler::decode((address)eip, (address)eip+32); -} - -void MacroAssembler::stop(const char* msg) { - ExternalAddress message((address)msg); - // push address of message - pushptr(message.addr()); - { Label L; call(L, relocInfo::none); bind(L); } // push eip - pusha(); // push registers - call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); - hlt(); -} - -void MacroAssembler::warn(const char* msg) { - push_CPU_state(); - - ExternalAddress message((address) msg); - // push address of message - pushptr(message.addr()); - - call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); - addl(rsp, wordSize); // discard argument - pop_CPU_state(); -} - -void MacroAssembler::print_state() { - { Label L; call(L, relocInfo::none); bind(L); } // push eip - pusha(); // push registers - - push_CPU_state(); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); - pop_CPU_state(); - - popa(); - addl(rsp, wordSize); -} - -#else // _LP64 - -// 64 bit versions - -Address MacroAssembler::as_Address(AddressLiteral adr) { - // amd64 always does this as a pc-rel - // we can be absolute or disp based on the instruction type - // jmp/call are displacements others are absolute - assert(!adr.is_lval(), "must be rval"); - assert(reachable(adr), "must be"); - return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); - -} - -Address MacroAssembler::as_Address(ArrayAddress adr) { - AddressLiteral base = adr.base(); - lea(rscratch1, base); - Address index = adr.index(); - assert(index._disp == 0, "must not have disp"); // maybe it can? - Address array(rscratch1, index._index, index._scale, index._disp); - return array; -} - -int MacroAssembler::biased_locking_enter(Register lock_reg, - Register obj_reg, - Register swap_reg, - Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, - Label* slow_case, - BiasedLockingCounters* counters) { - assert(UseBiasedLocking, "why call this otherwise?"); - assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); - assert(tmp_reg != noreg, "tmp_reg must be supplied"); - assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); - assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); - Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); - Address saved_mark_addr(lock_reg, 0); - - if (PrintBiasedLockingStatistics && counters == NULL) - counters = BiasedLocking::counters(); - - // Biased locking - // See whether the lock is currently biased toward our thread and - // whether the epoch is still valid - // Note that the runtime guarantees sufficient alignment of JavaThread - // pointers to allow age to be placed into low bits - // First check to see whether biasing is even enabled for this object - Label cas_label; - int null_check_offset = -1; - if (!swap_reg_contains_mark) { - null_check_offset = offset(); - movq(swap_reg, mark_addr); - } - movq(tmp_reg, swap_reg); - andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - cmpq(tmp_reg, markOopDesc::biased_lock_pattern); - jcc(Assembler::notEqual, cas_label); - // The bias pattern is present in the object's header. Need to check - // whether the bias owner and the epoch are both still current. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - xorq(tmp_reg, swap_reg); - andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - jcc(Assembler::equal, done); - - Label try_revoke_bias; - Label try_rebias; - - // At this point we know that the header has the bias pattern and - // that we are not the bias owner in the current epoch. We need to - // figure out more details about the state of the header in order to - // know what operations can be legally performed on the object's - // header. - - // If the low three bits in the xor result aren't clear, that means - // the prototype header is no longer biased and we have to revoke - // the bias on this object. - testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); - jcc(Assembler::notZero, try_revoke_bias); - - // Biasing is still enabled for this data type. See whether the - // epoch of the current bias is still valid, meaning that the epoch - // bits of the mark word are equal to the epoch bits of the - // prototype header. (Note that the prototype header's epoch bits - // only change at a safepoint.) If not, attempt to rebias the object - // toward the current thread. Note that we must be absolutely sure - // that the current epoch is invalid in order to do this because - // otherwise the manipulations it performs on the mark word are - // illegal. - testq(tmp_reg, markOopDesc::epoch_mask_in_place); - jcc(Assembler::notZero, try_rebias); - - // The epoch of the current bias is still valid but we know nothing - // about the owner; it might be set or it might be clear. Try to - // acquire the bias of the object using an atomic operation. If this - // fails we will go in to the runtime to revoke the object's bias. - // Note that we first construct the presumed unbiased header so we - // don't accidentally blow away another thread's valid bias. - andq(swap_reg, - markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); - movq(tmp_reg, swap_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, this means that - // another thread succeeded in biasing it toward itself and we - // need to revoke that bias. The revocation will occur in the - // interpreter runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_rebias); - // At this point we know the epoch has expired, meaning that the - // current "bias owner", if any, is actually invalid. Under these - // circumstances _only_, we are allowed to use the current header's - // value as the comparison value when doing the cas to acquire the - // bias in the current epoch. In other words, we allow transfer of - // the bias from one thread to another directly in this situation. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - orq(tmp_reg, r15_thread); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // If the biasing toward our thread failed, then another thread - // succeeded in biasing it toward itself and we need to revoke that - // bias. The revocation will occur in the runtime in the slow case. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); - } - if (slow_case != NULL) { - jcc(Assembler::notZero, *slow_case); - } - jmp(done); - - bind(try_revoke_bias); - // The prototype mark in the klass doesn't have the bias bit set any - // more, indicating that objects of this data type are not supposed - // to be biased any more. We are going to try to reset the mark of - // this object to the prototype value and fall through to the - // CAS-based locking scheme. Note that if our CAS fails, it means - // that another thread raced us for the privilege of revoking the - // bias of this particular object, so it's okay to continue in the - // normal locking code. - // - // FIXME: due to a lack of registers we currently blow away the age - // bits in this situation. Should attempt to preserve them. - load_prototype_header(tmp_reg, obj_reg); - if (os::is_MP()) { - lock(); - } - cmpxchgq(tmp_reg, Address(obj_reg, 0)); - // Fall through to the normal CAS-based lock, because no matter what - // the result of the above CAS, some thread must have succeeded in - // removing the bias bit from the object's header. - if (counters != NULL) { - cond_inc32(Assembler::zero, - ExternalAddress((address) counters->revoked_lock_entry_count_addr())); - } - - bind(cas_label); - - return null_check_offset; -} - -void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { - Label L, E; - -#ifdef _WIN64 - // Windows always allocates space for it's register args - assert(num_args <= 4, "only register arguments supported"); - subq(rsp, frame::arg_reg_save_area_bytes); -#endif - - // Align stack if necessary - testl(rsp, 15); - jcc(Assembler::zero, L); - - subq(rsp, 8); - { - call(RuntimeAddress(entry_point)); - } - addq(rsp, 8); - jmp(E); - - bind(L); - { - call(RuntimeAddress(entry_point)); - } - - bind(E); - -#ifdef _WIN64 - // restore stack pointer - addq(rsp, frame::arg_reg_save_area_bytes); -#endif - -} - -void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { - assert(!src2.is_lval(), "should use cmpptr"); - - if (reachable(src2)) { - cmpq(src1, as_Address(src2)); - } else { - lea(rscratch1, src2); - Assembler::cmpq(src1, Address(rscratch1, 0)); - } -} - -int MacroAssembler::corrected_idivq(Register reg) { - // Full implementation of Java ldiv and lrem; checks for special - // case as described in JVM spec., p.243 & p.271. The function - // returns the (pc) offset of the idivl instruction - may be needed - // for implicit exceptions. - // - // normal case special case - // - // input : rax: dividend min_long - // reg: divisor (may not be eax/edx) -1 - // - // output: rax: quotient (= rax idiv reg) min_long - // rdx: remainder (= rax irem reg) 0 - assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); - static const int64_t min_long = 0x8000000000000000; - Label normal_case, special_case; - - // check for special case - cmp64(rax, ExternalAddress((address) &min_long)); - jcc(Assembler::notEqual, normal_case); - xorl(rdx, rdx); // prepare rdx for possible special case (where - // remainder = 0) - cmpq(reg, -1); - jcc(Assembler::equal, special_case); - - // handle normal case - bind(normal_case); - cdqq(); - int idivq_offset = offset(); - idivq(reg); - - // normal and special case exit - bind(special_case); - - return idivq_offset; -} - -void MacroAssembler::decrementq(Register reg, int value) { - if (value == min_jint) { subq(reg, value); return; } - if (value < 0) { incrementq(reg, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { decq(reg) ; return; } - /* else */ { subq(reg, value) ; return; } -} - -void MacroAssembler::decrementq(Address dst, int value) { - if (value == min_jint) { subq(dst, value); return; } - if (value < 0) { incrementq(dst, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { decq(dst) ; return; } - /* else */ { subq(dst, value) ; return; } -} - -void MacroAssembler::incrementq(Register reg, int value) { - if (value == min_jint) { addq(reg, value); return; } - if (value < 0) { decrementq(reg, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { incq(reg) ; return; } - /* else */ { addq(reg, value) ; return; } -} - -void MacroAssembler::incrementq(Address dst, int value) { - if (value == min_jint) { addq(dst, value); return; } - if (value < 0) { decrementq(dst, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { incq(dst) ; return; } - /* else */ { addq(dst, value) ; return; } -} - -// 32bit can do a case table jump in one instruction but we no longer allow the base -// to be installed in the Address class -void MacroAssembler::jump(ArrayAddress entry) { - lea(rscratch1, entry.base()); - Address dispatch = entry.index(); - assert(dispatch._base == noreg, "must be"); - dispatch._base = rscratch1; - jmp(dispatch); -} - -void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { - ShouldNotReachHere(); // 64bit doesn't use two regs - cmpq(x_lo, y_lo); -} - -void MacroAssembler::lea(Register dst, AddressLiteral src) { - mov_literal64(dst, (intptr_t)src.target(), src.rspec()); -} - -void MacroAssembler::lea(Address dst, AddressLiteral adr) { - mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); - movptr(dst, rscratch1); -} - -void MacroAssembler::leave() { - // %%% is this really better? Why not on 32bit too? - emit_byte(0xC9); // LEAVE -} - -void MacroAssembler::lneg(Register hi, Register lo) { - ShouldNotReachHere(); // 64bit doesn't use two regs - negq(lo); -} - -void MacroAssembler::movoop(Register dst, jobject obj) { - mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::movoop(Address dst, jobject obj) { - mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); - movq(dst, rscratch1); -} - -void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { - mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { - mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); - movq(dst, rscratch1); -} - -void MacroAssembler::movptr(Register dst, AddressLiteral src) { - if (src.is_lval()) { - mov_literal64(dst, (intptr_t)src.target(), src.rspec()); - } else { - if (reachable(src)) { - movq(dst, as_Address(src)); - } else { - lea(rscratch1, src); - movq(dst, Address(rscratch1,0)); - } - } -} - -void MacroAssembler::movptr(ArrayAddress dst, Register src) { - movq(as_Address(dst), src); -} - -void MacroAssembler::movptr(Register dst, ArrayAddress src) { - movq(dst, as_Address(src)); -} - -// src should NEVER be a real pointer. Use AddressLiteral for true pointers -void MacroAssembler::movptr(Address dst, intptr_t src) { - mov64(rscratch1, src); - movq(dst, rscratch1); -} - -// These are mostly for initializing NULL -void MacroAssembler::movptr(Address dst, int32_t src) { - movslq(dst, src); -} - -void MacroAssembler::movptr(Register dst, int32_t src) { - mov64(dst, (intptr_t)src); -} - -void MacroAssembler::pushoop(jobject obj) { - movoop(rscratch1, obj); - push(rscratch1); -} - -void MacroAssembler::pushklass(Metadata* obj) { - mov_metadata(rscratch1, obj); - push(rscratch1); -} - -void MacroAssembler::pushptr(AddressLiteral src) { - lea(rscratch1, src); - if (src.is_lval()) { - push(rscratch1); - } else { - pushq(Address(rscratch1, 0)); - } -} - -void MacroAssembler::reset_last_Java_frame(bool clear_fp, - bool clear_pc) { - // we must set sp to zero to clear frame - movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); - // must clear fp, so that compiled frames are not confused; it is - // possible that we need it only for debugging - if (clear_fp) { - movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); - } - - if (clear_pc) { - movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); - } -} - -void MacroAssembler::set_last_Java_frame(Register last_java_sp, - Register last_java_fp, - address last_java_pc) { - // determine last_java_sp register - if (!last_java_sp->is_valid()) { - last_java_sp = rsp; - } - - // last_java_fp is optional - if (last_java_fp->is_valid()) { - movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), - last_java_fp); - } - - // last_java_pc is optional - if (last_java_pc != NULL) { - Address java_pc(r15_thread, - JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); - lea(rscratch1, InternalAddress(last_java_pc)); - movptr(java_pc, rscratch1); - } - - movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); -} - -static void pass_arg0(MacroAssembler* masm, Register arg) { - if (c_rarg0 != arg ) { - masm->mov(c_rarg0, arg); - } -} - -static void pass_arg1(MacroAssembler* masm, Register arg) { - if (c_rarg1 != arg ) { - masm->mov(c_rarg1, arg); - } -} - -static void pass_arg2(MacroAssembler* masm, Register arg) { - if (c_rarg2 != arg ) { - masm->mov(c_rarg2, arg); - } -} - -static void pass_arg3(MacroAssembler* masm, Register arg) { - if (c_rarg3 != arg ) { - masm->mov(c_rarg3, arg); - } -} - -void MacroAssembler::stop(const char* msg) { - address rip = pc(); - pusha(); // get regs on stack - lea(c_rarg0, ExternalAddress((address) msg)); - lea(c_rarg1, InternalAddress(rip)); - movq(c_rarg2, rsp); // pass pointer to regs array - andq(rsp, -16); // align stack as required by ABI - call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); - hlt(); -} - -void MacroAssembler::warn(const char* msg) { - push(rbp); - movq(rbp, rsp); - andq(rsp, -16); // align stack as required by push_CPU_state and call - push_CPU_state(); // keeps alignment at 16 bytes - lea(c_rarg0, ExternalAddress((address) msg)); - call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); - pop_CPU_state(); - mov(rsp, rbp); - pop(rbp); -} - -void MacroAssembler::print_state() { - address rip = pc(); - pusha(); // get regs on stack - push(rbp); - movq(rbp, rsp); - andq(rsp, -16); // align stack as required by push_CPU_state and call - push_CPU_state(); // keeps alignment at 16 bytes - - lea(c_rarg0, InternalAddress(rip)); - lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array - call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); - - pop_CPU_state(); - mov(rsp, rbp); - pop(rbp); - popa(); -} - -#ifndef PRODUCT -extern "C" void findpc(intptr_t x); -#endif - -void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { - // In order to get locks to work, we need to fake a in_VM state - if (ShowMessageBoxOnError) { - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); -#ifndef PRODUCT - if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { - ttyLocker ttyl; - BytecodeCounter::print(); - } -#endif - // To see where a verify_oop failed, get $ebx+40/X for this frame. - // XXX correct this offset for amd64 - // This is the value of eip which points to where verify_oop will return. - if (os::message_box(msg, "Execution stopped, print registers?")) { - print_state64(pc, regs); - BREAKPOINT; - assert(false, "start up GDB"); - } - ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); - } else { - ttyLocker ttyl; - ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", - msg); - assert(false, err_msg("DEBUG MESSAGE: %s", msg)); - } -} - -void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { - ttyLocker ttyl; - FlagSetting fs(Debugging, true); - tty->print_cr("rip = 0x%016lx", pc); -#ifndef PRODUCT - tty->cr(); - findpc(pc); - tty->cr(); -#endif -#define PRINT_REG(rax, value) \ - { tty->print("%s = ", #rax); os::print_location(tty, value); } - PRINT_REG(rax, regs[15]); - PRINT_REG(rbx, regs[12]); - PRINT_REG(rcx, regs[14]); - PRINT_REG(rdx, regs[13]); - PRINT_REG(rdi, regs[8]); - PRINT_REG(rsi, regs[9]); - PRINT_REG(rbp, regs[10]); - PRINT_REG(rsp, regs[11]); - PRINT_REG(r8 , regs[7]); - PRINT_REG(r9 , regs[6]); - PRINT_REG(r10, regs[5]); - PRINT_REG(r11, regs[4]); - PRINT_REG(r12, regs[3]); - PRINT_REG(r13, regs[2]); - PRINT_REG(r14, regs[1]); - PRINT_REG(r15, regs[0]); -#undef PRINT_REG - // Print some words near top of staack. - int64_t* rsp = (int64_t*) regs[11]; - int64_t* dump_sp = rsp; - for (int col1 = 0; col1 < 8; col1++) { - tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); - os::print_location(tty, *dump_sp++); - } - for (int row = 0; row < 25; row++) { - tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); - for (int col = 0; col < 4; col++) { - tty->print(" 0x%016lx", *dump_sp++); - } - tty->cr(); - } - // Print some instructions around pc: - Disassembler::decode((address)pc-64, (address)pc); - tty->print_cr("--------"); - Disassembler::decode((address)pc, (address)pc+32); -} - -#endif // _LP64 - -// Now versions that are common to 32/64 bit - -void MacroAssembler::addptr(Register dst, int32_t imm32) { - LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); -} - -void MacroAssembler::addptr(Register dst, Register src) { - LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); -} - -void MacroAssembler::addptr(Address dst, Register src) { - LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); -} - -void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::addsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::addsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - addss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - addss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::align(int modulus) { - if (offset() % modulus != 0) { - nop(modulus - (offset() % modulus)); - } -} - -void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { - // Used in sign-masking with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); - if (reachable(src)) { - Assembler::andpd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::andpd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { - // Used in sign-masking with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); - if (reachable(src)) { - Assembler::andps(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::andps(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::andptr(Register dst, int32_t imm32) { - LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); -} - -void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { - pushf(); - if (os::is_MP()) - lock(); - incrementl(counter_addr); - popf(); -} - -// Writes to stack successive pages until offset reached to check for -// stack overflow + shadow pages. This clobbers tmp. -void MacroAssembler::bang_stack_size(Register size, Register tmp) { - movptr(tmp, rsp); - // Bang stack for total size given plus shadow page size. - // Bang one page at a time because large size can bang beyond yellow and - // red zones. - Label loop; - bind(loop); - movl(Address(tmp, (-os::vm_page_size())), size ); - subptr(tmp, os::vm_page_size()); - subl(size, os::vm_page_size()); - jcc(Assembler::greater, loop); - - // Bang down shadow pages too. - // The -1 because we already subtracted 1 page. - for (int i = 0; i< StackShadowPages-1; i++) { - // this could be any sized move but this is can be a debugging crumb - // so the bigger the better. - movptr(Address(tmp, (-i*os::vm_page_size())), size ); - } -} - -void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { - assert(UseBiasedLocking, "why call this otherwise?"); - - // Check for biased locking unlock case, which is a no-op - // Note: we do not have to check the thread ID for two reasons. - // First, the interpreter checks for IllegalMonitorStateException at - // a higher level. Second, if the bias was revoked while we held the - // lock, the object could not be rebiased toward another thread, so - // the bias bit would be clear. - movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); - cmpptr(temp_reg, markOopDesc::biased_lock_pattern); - jcc(Assembler::equal, done); -} - -void MacroAssembler::c2bool(Register x) { - // implements x == 0 ? 0 : 1 - // note: must only look at least-significant byte of x - // since C-style booleans are stored in one byte - // only! (was bug) - andl(x, 0xFF); - setb(Assembler::notZero, x); -} - -// Wouldn't need if AddressLiteral version had new name -void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { - Assembler::call(L, rtype); -} - -void MacroAssembler::call(Register entry) { - Assembler::call(entry); -} - -void MacroAssembler::call(AddressLiteral entry) { - if (reachable(entry)) { - Assembler::call_literal(entry.target(), entry.rspec()); - } else { - lea(rscratch1, entry); - Assembler::call(rscratch1); - } -} - -void MacroAssembler::ic_call(address entry) { - RelocationHolder rh = virtual_call_Relocation::spec(pc()); - movptr(rax, (intptr_t)Universe::non_oop_word()); - call(AddressLiteral(entry, rh)); -} - -// Implementation of call_VM versions - -void MacroAssembler::call_VM(Register oop_result, - address entry_point, - bool check_exceptions) { - Label C, E; - call(C, relocInfo::none); - jmp(E); - - bind(C); - call_VM_helper(oop_result, entry_point, 0, check_exceptions); - ret(0); - - bind(E); -} - -void MacroAssembler::call_VM(Register oop_result, - address entry_point, - Register arg_1, - bool check_exceptions) { - Label C, E; - call(C, relocInfo::none); - jmp(E); - - bind(C); - pass_arg1(this, arg_1); - call_VM_helper(oop_result, entry_point, 1, check_exceptions); - ret(0); - - bind(E); -} - -void MacroAssembler::call_VM(Register oop_result, - address entry_point, - Register arg_1, - Register arg_2, - bool check_exceptions) { - Label C, E; - call(C, relocInfo::none); - jmp(E); - - bind(C); - - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - - pass_arg2(this, arg_2); - pass_arg1(this, arg_1); - call_VM_helper(oop_result, entry_point, 2, check_exceptions); - ret(0); - - bind(E); -} - -void MacroAssembler::call_VM(Register oop_result, - address entry_point, - Register arg_1, - Register arg_2, - Register arg_3, - bool check_exceptions) { - Label C, E; - call(C, relocInfo::none); - jmp(E); - - bind(C); - - LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); - LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); - pass_arg3(this, arg_3); - - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - - pass_arg1(this, arg_1); - call_VM_helper(oop_result, entry_point, 3, check_exceptions); - ret(0); - - bind(E); -} - -void MacroAssembler::call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - int number_of_arguments, - bool check_exceptions) { - Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); - call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); -} - -void MacroAssembler::call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - bool check_exceptions) { - pass_arg1(this, arg_1); - call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); -} - -void MacroAssembler::call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - Register arg_2, - bool check_exceptions) { - - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - pass_arg1(this, arg_1); - call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); -} - -void MacroAssembler::call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - Register arg_2, - Register arg_3, - bool check_exceptions) { - LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); - LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); - pass_arg3(this, arg_3); - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - pass_arg1(this, arg_1); - call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); -} - -void MacroAssembler::super_call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - int number_of_arguments, - bool check_exceptions) { - Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); - MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); -} - -void MacroAssembler::super_call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - bool check_exceptions) { - pass_arg1(this, arg_1); - super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); -} - -void MacroAssembler::super_call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - Register arg_2, - bool check_exceptions) { - - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - pass_arg1(this, arg_1); - super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); -} - -void MacroAssembler::super_call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, - Register arg_2, - Register arg_3, - bool check_exceptions) { - LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); - LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); - pass_arg3(this, arg_3); - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - pass_arg1(this, arg_1); - super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); -} - -void MacroAssembler::call_VM_base(Register oop_result, - Register java_thread, - Register last_java_sp, - address entry_point, - int number_of_arguments, - bool check_exceptions) { - // determine java_thread register - if (!java_thread->is_valid()) { -#ifdef _LP64 - java_thread = r15_thread; -#else - java_thread = rdi; - get_thread(java_thread); -#endif // LP64 - } - // determine last_java_sp register - if (!last_java_sp->is_valid()) { - last_java_sp = rsp; - } - // debugging support - assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); - LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); -#ifdef ASSERT - // TraceBytecodes does not use r12 but saves it over the call, so don't verify - // r12 is the heapbase. - LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) -#endif // ASSERT - - assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); - assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); - - // push java thread (becomes first argument of C function) - - NOT_LP64(push(java_thread); number_of_arguments++); - LP64_ONLY(mov(c_rarg0, r15_thread)); - - // set last Java frame before call - assert(last_java_sp != rbp, "can't use ebp/rbp"); - - // Only interpreter should have to set fp - set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); - - // do the call, remove parameters - MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); - - // restore the thread (cannot use the pushed argument since arguments - // may be overwritten by C code generated by an optimizing compiler); - // however can use the register value directly if it is callee saved. - if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { - // rdi & rsi (also r15) are callee saved -> nothing to do -#ifdef ASSERT - guarantee(java_thread != rax, "change this code"); - push(rax); - { Label L; - get_thread(rax); - cmpptr(java_thread, rax); - jcc(Assembler::equal, L); - STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); - bind(L); - } - pop(rax); -#endif - } else { - get_thread(java_thread); - } - // reset last Java frame - // Only interpreter should have to clear fp - reset_last_Java_frame(java_thread, true, false); - -#ifndef CC_INTERP - // C++ interp handles this in the interpreter - check_and_handle_popframe(java_thread); - check_and_handle_earlyret(java_thread); -#endif /* CC_INTERP */ - - if (check_exceptions) { - // check for pending exceptions (java_thread is set upon return) - cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); -#ifndef _LP64 - jump_cc(Assembler::notEqual, - RuntimeAddress(StubRoutines::forward_exception_entry())); -#else - // This used to conditionally jump to forward_exception however it is - // possible if we relocate that the branch will not reach. So we must jump - // around so we can always reach - - Label ok; - jcc(Assembler::equal, ok); - jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - bind(ok); -#endif // LP64 - } - - // get oop result if there is one and reset the value in the thread - if (oop_result->is_valid()) { - get_vm_result(oop_result, java_thread); - } -} - -void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { - - // Calculate the value for last_Java_sp - // somewhat subtle. call_VM does an intermediate call - // which places a return address on the stack just under the - // stack pointer as the user finsihed with it. This allows - // use to retrieve last_Java_pc from last_Java_sp[-1]. - // On 32bit we then have to push additional args on the stack to accomplish - // the actual requested call. On 64bit call_VM only can use register args - // so the only extra space is the return address that call_VM created. - // This hopefully explains the calculations here. - -#ifdef _LP64 - // We've pushed one address, correct last_Java_sp - lea(rax, Address(rsp, wordSize)); -#else - lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); -#endif // LP64 - - call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); - -} - -void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { - call_VM_leaf_base(entry_point, number_of_arguments); -} - -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { - pass_arg0(this, arg_0); - call_VM_leaf(entry_point, 1); -} - -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { - - LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); - pass_arg1(this, arg_1); - pass_arg0(this, arg_0); - call_VM_leaf(entry_point, 2); -} - -void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { - LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); - pass_arg1(this, arg_1); - pass_arg0(this, arg_0); - call_VM_leaf(entry_point, 3); -} - -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { - pass_arg0(this, arg_0); - MacroAssembler::call_VM_leaf_base(entry_point, 1); -} - -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { - - LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); - pass_arg1(this, arg_1); - pass_arg0(this, arg_0); - MacroAssembler::call_VM_leaf_base(entry_point, 2); -} - -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { - LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); - pass_arg1(this, arg_1); - pass_arg0(this, arg_0); - MacroAssembler::call_VM_leaf_base(entry_point, 3); -} - -void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { - LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); - LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); - LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); - pass_arg3(this, arg_3); - LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); - LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); - pass_arg2(this, arg_2); - LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); - pass_arg1(this, arg_1); - pass_arg0(this, arg_0); - MacroAssembler::call_VM_leaf_base(entry_point, 4); -} - -void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { - movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); - movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); - verify_oop(oop_result, "broken oop in call_VM_base"); -} - -void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { - movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); - movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); -} - -void MacroAssembler::check_and_handle_earlyret(Register java_thread) { -} - -void MacroAssembler::check_and_handle_popframe(Register java_thread) { -} - -void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { - if (reachable(src1)) { - cmpl(as_Address(src1), imm); - } else { - lea(rscratch1, src1); - cmpl(Address(rscratch1, 0), imm); - } -} - -void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { - assert(!src2.is_lval(), "use cmpptr"); - if (reachable(src2)) { - cmpl(src1, as_Address(src2)); - } else { - lea(rscratch1, src2); - cmpl(src1, Address(rscratch1, 0)); - } -} - -void MacroAssembler::cmp32(Register src1, int32_t imm) { - Assembler::cmpl(src1, imm); -} - -void MacroAssembler::cmp32(Register src1, Address src2) { - Assembler::cmpl(src1, src2); -} - -void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { - ucomisd(opr1, opr2); - - Label L; - if (unordered_is_less) { - movl(dst, -1); - jcc(Assembler::parity, L); - jcc(Assembler::below , L); - movl(dst, 0); - jcc(Assembler::equal , L); - increment(dst); - } else { // unordered is greater - movl(dst, 1); - jcc(Assembler::parity, L); - jcc(Assembler::above , L); - movl(dst, 0); - jcc(Assembler::equal , L); - decrementl(dst); - } - bind(L); -} - -void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { - ucomiss(opr1, opr2); - - Label L; - if (unordered_is_less) { - movl(dst, -1); - jcc(Assembler::parity, L); - jcc(Assembler::below , L); - movl(dst, 0); - jcc(Assembler::equal , L); - increment(dst); - } else { // unordered is greater - movl(dst, 1); - jcc(Assembler::parity, L); - jcc(Assembler::above , L); - movl(dst, 0); - jcc(Assembler::equal , L); - decrementl(dst); - } - bind(L); -} - - -void MacroAssembler::cmp8(AddressLiteral src1, int imm) { - if (reachable(src1)) { - cmpb(as_Address(src1), imm); - } else { - lea(rscratch1, src1); - cmpb(Address(rscratch1, 0), imm); - } -} - -void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { -#ifdef _LP64 - if (src2.is_lval()) { - movptr(rscratch1, src2); - Assembler::cmpq(src1, rscratch1); - } else if (reachable(src2)) { - cmpq(src1, as_Address(src2)); - } else { - lea(rscratch1, src2); - Assembler::cmpq(src1, Address(rscratch1, 0)); - } -#else - if (src2.is_lval()) { - cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); - } else { - cmpl(src1, as_Address(src2)); - } -#endif // _LP64 -} - -void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { - assert(src2.is_lval(), "not a mem-mem compare"); -#ifdef _LP64 - // moves src2's literal address - movptr(rscratch1, src2); - Assembler::cmpq(src1, rscratch1); -#else - cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); -#endif // _LP64 -} - -void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { - if (reachable(adr)) { - if (os::is_MP()) - lock(); - cmpxchgptr(reg, as_Address(adr)); - } else { - lea(rscratch1, adr); - if (os::is_MP()) - lock(); - cmpxchgptr(reg, Address(rscratch1, 0)); - } -} - -void MacroAssembler::cmpxchgptr(Register reg, Address adr) { - LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); -} - -void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::comisd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::comisd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::comiss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::comiss(dst, Address(rscratch1, 0)); - } -} - - -void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { - Condition negated_cond = negate_condition(cond); - Label L; - jcc(negated_cond, L); - atomic_incl(counter_addr); - bind(L); -} - -int MacroAssembler::corrected_idivl(Register reg) { - // Full implementation of Java idiv and irem; checks for - // special case as described in JVM spec., p.243 & p.271. - // The function returns the (pc) offset of the idivl - // instruction - may be needed for implicit exceptions. - // - // normal case special case - // - // input : rax,: dividend min_int - // reg: divisor (may not be rax,/rdx) -1 - // - // output: rax,: quotient (= rax, idiv reg) min_int - // rdx: remainder (= rax, irem reg) 0 - assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); - const int min_int = 0x80000000; - Label normal_case, special_case; - - // check for special case - cmpl(rax, min_int); - jcc(Assembler::notEqual, normal_case); - xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) - cmpl(reg, -1); - jcc(Assembler::equal, special_case); - - // handle normal case - bind(normal_case); - cdql(); - int idivl_offset = offset(); - idivl(reg); - - // normal and special case exit - bind(special_case); - - return idivl_offset; -} - - - -void MacroAssembler::decrementl(Register reg, int value) { - if (value == min_jint) {subl(reg, value) ; return; } - if (value < 0) { incrementl(reg, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { decl(reg) ; return; } - /* else */ { subl(reg, value) ; return; } -} - -void MacroAssembler::decrementl(Address dst, int value) { - if (value == min_jint) {subl(dst, value) ; return; } - if (value < 0) { incrementl(dst, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { decl(dst) ; return; } - /* else */ { subl(dst, value) ; return; } -} - -void MacroAssembler::division_with_shift (Register reg, int shift_value) { - assert (shift_value > 0, "illegal shift value"); - Label _is_positive; - testl (reg, reg); - jcc (Assembler::positive, _is_positive); - int offset = (1 << shift_value) - 1 ; - - if (offset == 1) { - incrementl(reg); - } else { - addl(reg, offset); - } - - bind (_is_positive); - sarl(reg, shift_value); -} - -void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::divsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::divsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::divss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::divss(dst, Address(rscratch1, 0)); - } -} - -// !defined(COMPILER2) is because of stupid core builds -#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) -void MacroAssembler::empty_FPU_stack() { - if (VM_Version::supports_mmx()) { - emms(); - } else { - for (int i = 8; i-- > 0; ) ffree(i); - } -} -#endif // !LP64 || C1 || !C2 - - -// Defines obj, preserves var_size_in_bytes -void MacroAssembler::eden_allocate(Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, - Label& slow_case) { - assert(obj == rax, "obj must be in rax, for cmpxchg"); - assert_different_registers(obj, var_size_in_bytes, t1); - if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { - jmp(slow_case); - } else { - Register end = t1; - Label retry; - bind(retry); - ExternalAddress heap_top((address) Universe::heap()->top_addr()); - movptr(obj, heap_top); - if (var_size_in_bytes == noreg) { - lea(end, Address(obj, con_size_in_bytes)); - } else { - lea(end, Address(obj, var_size_in_bytes, Address::times_1)); - } - // if end < obj then we wrapped around => object too long => slow case - cmpptr(end, obj); - jcc(Assembler::below, slow_case); - cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); - jcc(Assembler::above, slow_case); - // Compare obj with the top addr, and if still equal, store the new top addr in - // end at the address of the top addr pointer. Sets ZF if was equal, and clears - // it otherwise. Use lock prefix for atomicity on MPs. - locked_cmpxchgptr(end, heap_top); - jcc(Assembler::notEqual, retry); - } -} - -void MacroAssembler::enter() { - push(rbp); - mov(rbp, rsp); -} - -// A 5 byte nop that is safe for patching (see patch_verified_entry) -void MacroAssembler::fat_nop() { - if (UseAddressNop) { - addr_nop_5(); - } else { - emit_byte(0x26); // es: - emit_byte(0x2e); // cs: - emit_byte(0x64); // fs: - emit_byte(0x65); // gs: - emit_byte(0x90); - } -} - -void MacroAssembler::fcmp(Register tmp) { - fcmp(tmp, 1, true, true); -} - -void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { - assert(!pop_right || pop_left, "usage error"); - if (VM_Version::supports_cmov()) { - assert(tmp == noreg, "unneeded temp"); - if (pop_left) { - fucomip(index); - } else { - fucomi(index); - } - if (pop_right) { - fpop(); - } - } else { - assert(tmp != noreg, "need temp"); - if (pop_left) { - if (pop_right) { - fcompp(); - } else { - fcomp(index); - } - } else { - fcom(index); - } - // convert FPU condition into eflags condition via rax, - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - } - // condition codes set as follows: - // - // CF (corresponds to C0) if x < y - // PF (corresponds to C2) if unordered - // ZF (corresponds to C3) if x = y -} - -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { - fcmp2int(dst, unordered_is_less, 1, true, true); -} - -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { - fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); - Label L; - if (unordered_is_less) { - movl(dst, -1); - jcc(Assembler::parity, L); - jcc(Assembler::below , L); - movl(dst, 0); - jcc(Assembler::equal , L); - increment(dst); - } else { // unordered is greater - movl(dst, 1); - jcc(Assembler::parity, L); - jcc(Assembler::above , L); - movl(dst, 0); - jcc(Assembler::equal , L); - decrementl(dst); - } - bind(L); -} - -void MacroAssembler::fld_d(AddressLiteral src) { - fld_d(as_Address(src)); -} - -void MacroAssembler::fld_s(AddressLiteral src) { - fld_s(as_Address(src)); -} - -void MacroAssembler::fld_x(AddressLiteral src) { - Assembler::fld_x(as_Address(src)); -} - -void MacroAssembler::fldcw(AddressLiteral src) { - Assembler::fldcw(as_Address(src)); -} - -void MacroAssembler::pow_exp_core_encoding() { - // kills rax, rcx, rdx - subptr(rsp,sizeof(jdouble)); - // computes 2^X. Stack: X ... - // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and - // keep it on the thread's stack to compute 2^int(X) later - // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) - // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) - fld_s(0); // Stack: X X ... - frndint(); // Stack: int(X) X ... - fsuba(1); // Stack: int(X) X-int(X) ... - fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... - f2xm1(); // Stack: 2^(X-int(X))-1 ... - fld1(); // Stack: 1 2^(X-int(X))-1 ... - faddp(1); // Stack: 2^(X-int(X)) - // computes 2^(int(X)): add exponent bias (1023) to int(X), then - // shift int(X)+1023 to exponent position. - // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 - // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent - // values so detect them and set result to NaN. - movl(rax,Address(rsp,0)); - movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding - addl(rax, 1023); - movl(rdx,rax); - shll(rax,20); - // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. - addl(rdx,1); - // Check that 1 < int(X)+1023+1 < 2048 - // in 3 steps: - // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 - // 2- (int(X)+1023+1)&-2048 != 0 - // 3- (int(X)+1023+1)&-2048 != 1 - // Do 2- first because addl just updated the flags. - cmov32(Assembler::equal,rax,rcx); - cmpl(rdx,1); - cmov32(Assembler::equal,rax,rcx); - testl(rdx,rcx); - cmov32(Assembler::notEqual,rax,rcx); - movl(Address(rsp,4),rax); - movl(Address(rsp,0),0); - fmul_d(Address(rsp,0)); // Stack: 2^X ... - addptr(rsp,sizeof(jdouble)); -} - -void MacroAssembler::increase_precision() { - subptr(rsp, BytesPerWord); - fnstcw(Address(rsp, 0)); - movl(rax, Address(rsp, 0)); - orl(rax, 0x300); - push(rax); - fldcw(Address(rsp, 0)); - pop(rax); -} - -void MacroAssembler::restore_precision() { - fldcw(Address(rsp, 0)); - addptr(rsp, BytesPerWord); -} - -void MacroAssembler::fast_pow() { - // computes X^Y = 2^(Y * log2(X)) - // if fast computation is not possible, result is NaN. Requires - // fallback from user of this macro. - // increase precision for intermediate steps of the computation - increase_precision(); - fyl2x(); // Stack: (Y*log2(X)) ... - pow_exp_core_encoding(); // Stack: exp(X) ... - restore_precision(); -} - -void MacroAssembler::fast_exp() { - // computes exp(X) = 2^(X * log2(e)) - // if fast computation is not possible, result is NaN. Requires - // fallback from user of this macro. - // increase precision for intermediate steps of the computation - increase_precision(); - fldl2e(); // Stack: log2(e) X ... - fmulp(1); // Stack: (X*log2(e)) ... - pow_exp_core_encoding(); // Stack: exp(X) ... - restore_precision(); -} - -void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { - // kills rax, rcx, rdx - // pow and exp needs 2 extra registers on the fpu stack. - Label slow_case, done; - Register tmp = noreg; - if (!VM_Version::supports_cmov()) { - // fcmp needs a temporary so preserve rdx, - tmp = rdx; - } - Register tmp2 = rax; - Register tmp3 = rcx; - - if (is_exp) { - // Stack: X - fld_s(0); // duplicate argument for runtime call. Stack: X X - fast_exp(); // Stack: exp(X) X - fcmp(tmp, 0, false, false); // Stack: exp(X) X - // exp(X) not equal to itself: exp(X) is NaN go to slow case. - jcc(Assembler::parity, slow_case); - // get rid of duplicate argument. Stack: exp(X) - if (num_fpu_regs_in_use > 0) { - fxch(); - fpop(); - } else { - ffree(1); - } - jmp(done); - } else { - // Stack: X Y - Label x_negative, y_odd; - - fldz(); // Stack: 0 X Y - fcmp(tmp, 1, true, false); // Stack: X Y - jcc(Assembler::above, x_negative); - - // X >= 0 - - fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y - fld_s(1); // Stack: X Y X Y - fast_pow(); // Stack: X^Y X Y - fcmp(tmp, 0, false, false); // Stack: X^Y X Y - // X^Y not equal to itself: X^Y is NaN go to slow case. - jcc(Assembler::parity, slow_case); - // get rid of duplicate arguments. Stack: X^Y - if (num_fpu_regs_in_use > 0) { - fxch(); fpop(); - fxch(); fpop(); - } else { - ffree(2); - ffree(1); - } - jmp(done); - - // X <= 0 - bind(x_negative); - - fld_s(1); // Stack: Y X Y - frndint(); // Stack: int(Y) X Y - fcmp(tmp, 2, false, false); // Stack: int(Y) X Y - jcc(Assembler::notEqual, slow_case); - - subptr(rsp, 8); - - // For X^Y, when X < 0, Y has to be an integer and the final - // result depends on whether it's odd or even. We just checked - // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit - // integer to test its parity. If int(Y) is huge and doesn't fit - // in the 64 bit integer range, the integer indefinite value will - // end up in the gp registers. Huge numbers are all even, the - // integer indefinite number is even so it's fine. - -#ifdef ASSERT - // Let's check we don't end up with an integer indefinite number - // when not expected. First test for huge numbers: check whether - // int(Y)+1 == int(Y) which is true for very large numbers and - // those are all even. A 64 bit integer is guaranteed to not - // overflow for numbers where y+1 != y (when precision is set to - // double precision). - Label y_not_huge; - - fld1(); // Stack: 1 int(Y) X Y - fadd(1); // Stack: 1+int(Y) int(Y) X Y - -#ifdef _LP64 - // trip to memory to force the precision down from double extended - // precision - fstp_d(Address(rsp, 0)); - fld_d(Address(rsp, 0)); -#endif - - fcmp(tmp, 1, true, false); // Stack: int(Y) X Y -#endif - - // move int(Y) as 64 bit integer to thread's stack - fistp_d(Address(rsp,0)); // Stack: X Y - -#ifdef ASSERT - jcc(Assembler::notEqual, y_not_huge); - - // Y is huge so we know it's even. It may not fit in a 64 bit - // integer and we don't want the debug code below to see the - // integer indefinite value so overwrite int(Y) on the thread's - // stack with 0. - movl(Address(rsp, 0), 0); - movl(Address(rsp, 4), 0); - - bind(y_not_huge); -#endif - - fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y - fld_s(1); // Stack: X Y X Y - fabs(); // Stack: abs(X) Y X Y - fast_pow(); // Stack: abs(X)^Y X Y - fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y - // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. - - pop(tmp2); - NOT_LP64(pop(tmp3)); - jcc(Assembler::parity, slow_case); - -#ifdef ASSERT - // Check that int(Y) is not integer indefinite value (int - // overflow). Shouldn't happen because for values that would - // overflow, 1+int(Y)==Y which was tested earlier. -#ifndef _LP64 - { - Label integer; - testl(tmp2, tmp2); - jcc(Assembler::notZero, integer); - cmpl(tmp3, 0x80000000); - jcc(Assembler::notZero, integer); - STOP("integer indefinite value shouldn't be seen here"); - bind(integer); - } -#else - { - Label integer; - mov(tmp3, tmp2); // preserve tmp2 for parity check below - shlq(tmp3, 1); - jcc(Assembler::carryClear, integer); - jcc(Assembler::notZero, integer); - STOP("integer indefinite value shouldn't be seen here"); - bind(integer); - } -#endif -#endif - - // get rid of duplicate arguments. Stack: X^Y - if (num_fpu_regs_in_use > 0) { - fxch(); fpop(); - fxch(); fpop(); - } else { - ffree(2); - ffree(1); - } - - testl(tmp2, 1); - jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y - // X <= 0, Y even: X^Y = -abs(X)^Y - - fchs(); // Stack: -abs(X)^Y Y - jmp(done); - } - - // slow case: runtime call - bind(slow_case); - - fpop(); // pop incorrect result or int(Y) - - fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), - is_exp ? 1 : 2, num_fpu_regs_in_use); - - // Come here with result in F-TOS - bind(done); -} - -void MacroAssembler::fpop() { - ffree(); - fincstp(); -} - -void MacroAssembler::fremr(Register tmp) { - save_rax(tmp); - { Label L; - bind(L); - fprem(); - fwait(); fnstsw_ax(); -#ifdef _LP64 - testl(rax, 0x400); - jcc(Assembler::notEqual, L); -#else - sahf(); - jcc(Assembler::parity, L); -#endif // _LP64 - } - restore_rax(tmp); - // Result is in ST0. - // Note: fxch & fpop to get rid of ST1 - // (otherwise FPU stack could overflow eventually) - fxch(1); - fpop(); -} - - -void MacroAssembler::incrementl(AddressLiteral dst) { - if (reachable(dst)) { - incrementl(as_Address(dst)); - } else { - lea(rscratch1, dst); - incrementl(Address(rscratch1, 0)); - } -} - -void MacroAssembler::incrementl(ArrayAddress dst) { - incrementl(as_Address(dst)); -} - -void MacroAssembler::incrementl(Register reg, int value) { - if (value == min_jint) {addl(reg, value) ; return; } - if (value < 0) { decrementl(reg, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { incl(reg) ; return; } - /* else */ { addl(reg, value) ; return; } -} - -void MacroAssembler::incrementl(Address dst, int value) { - if (value == min_jint) {addl(dst, value) ; return; } - if (value < 0) { decrementl(dst, -value); return; } - if (value == 0) { ; return; } - if (value == 1 && UseIncDec) { incl(dst) ; return; } - /* else */ { addl(dst, value) ; return; } -} - -void MacroAssembler::jump(AddressLiteral dst) { - if (reachable(dst)) { - jmp_literal(dst.target(), dst.rspec()); - } else { - lea(rscratch1, dst); - jmp(rscratch1); - } -} - -void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { - if (reachable(dst)) { - InstructionMark im(this); - relocate(dst.reloc()); - const int short_size = 2; - const int long_size = 6; - int offs = (intptr_t)dst.target() - ((intptr_t)_code_pos); - if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { - // 0111 tttn #8-bit disp - emit_byte(0x70 | cc); - emit_byte((offs - short_size) & 0xFF); - } else { - // 0000 1111 1000 tttn #32-bit disp - emit_byte(0x0F); - emit_byte(0x80 | cc); - emit_long(offs - long_size); - } - } else { -#ifdef ASSERT - warning("reversing conditional branch"); -#endif /* ASSERT */ - Label skip; - jccb(reverse[cc], skip); - lea(rscratch1, dst); - Assembler::jmp(rscratch1); - bind(skip); - } -} - -void MacroAssembler::ldmxcsr(AddressLiteral src) { - if (reachable(src)) { - Assembler::ldmxcsr(as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::ldmxcsr(Address(rscratch1, 0)); - } -} - -int MacroAssembler::load_signed_byte(Register dst, Address src) { - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - off = offset(); - movsbl(dst, src); // movsxb - } else { - off = load_unsigned_byte(dst, src); - shll(dst, 24); - sarl(dst, 24); - } - return off; -} - -// Note: load_signed_short used to be called load_signed_word. -// Although the 'w' in x86 opcodes refers to the term "word" in the assembler -// manual, which means 16 bits, that usage is found nowhere in HotSpot code. -// The term "word" in HotSpot means a 32- or 64-bit machine word. -int MacroAssembler::load_signed_short(Register dst, Address src) { - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - // This is dubious to me since it seems safe to do a signed 16 => 64 bit - // version but this is what 64bit has always done. This seems to imply - // that users are only using 32bits worth. - off = offset(); - movswl(dst, src); // movsxw - } else { - off = load_unsigned_short(dst, src); - shll(dst, 16); - sarl(dst, 16); - } - return off; -} - -int MacroAssembler::load_unsigned_byte(Register dst, Address src) { - // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, - // and "3.9 Partial Register Penalties", p. 22). - int off; - if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { - off = offset(); - movzbl(dst, src); // movzxb - } else { - xorl(dst, dst); - off = offset(); - movb(dst, src); - } - return off; -} - -// Note: load_unsigned_short used to be called load_unsigned_word. -int MacroAssembler::load_unsigned_short(Register dst, Address src) { - // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, - // and "3.9 Partial Register Penalties", p. 22). - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { - off = offset(); - movzwl(dst, src); // movzxw - } else { - xorl(dst, dst); - off = offset(); - movw(dst, src); - } - return off; -} - -void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { - switch (size_in_bytes) { -#ifndef _LP64 - case 8: - assert(dst2 != noreg, "second dest register required"); - movl(dst, src); - movl(dst2, src.plus_disp(BytesPerInt)); - break; -#else - case 8: movq(dst, src); break; -#endif - case 4: movl(dst, src); break; - case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; - case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; - default: ShouldNotReachHere(); - } -} - -void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { - switch (size_in_bytes) { -#ifndef _LP64 - case 8: - assert(src2 != noreg, "second source register required"); - movl(dst, src); - movl(dst.plus_disp(BytesPerInt), src2); - break; -#else - case 8: movq(dst, src); break; -#endif - case 4: movl(dst, src); break; - case 2: movw(dst, src); break; - case 1: movb(dst, src); break; - default: ShouldNotReachHere(); - } -} - -void MacroAssembler::mov32(AddressLiteral dst, Register src) { - if (reachable(dst)) { - movl(as_Address(dst), src); - } else { - lea(rscratch1, dst); - movl(Address(rscratch1, 0), src); - } -} - -void MacroAssembler::mov32(Register dst, AddressLiteral src) { - if (reachable(src)) { - movl(dst, as_Address(src)); - } else { - lea(rscratch1, src); - movl(dst, Address(rscratch1, 0)); - } -} - -// C++ bool manipulation - -void MacroAssembler::movbool(Register dst, Address src) { - if(sizeof(bool) == 1) - movb(dst, src); - else if(sizeof(bool) == 2) - movw(dst, src); - else if(sizeof(bool) == 4) - movl(dst, src); - else - // unsupported - ShouldNotReachHere(); -} - -void MacroAssembler::movbool(Address dst, bool boolconst) { - if(sizeof(bool) == 1) - movb(dst, (int) boolconst); - else if(sizeof(bool) == 2) - movw(dst, (int) boolconst); - else if(sizeof(bool) == 4) - movl(dst, (int) boolconst); - else - // unsupported - ShouldNotReachHere(); -} - -void MacroAssembler::movbool(Address dst, Register src) { - if(sizeof(bool) == 1) - movb(dst, src); - else if(sizeof(bool) == 2) - movw(dst, src); - else if(sizeof(bool) == 4) - movl(dst, src); - else - // unsupported - ShouldNotReachHere(); -} - -void MacroAssembler::movbyte(ArrayAddress dst, int src) { - movb(as_Address(dst), src); -} - -void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - movdl(dst, as_Address(src)); - } else { - lea(rscratch1, src); - movdl(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - movq(dst, as_Address(src)); - } else { - lea(rscratch1, src); - movq(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - if (UseXmmLoadAndClearUpper) { - movsd (dst, as_Address(src)); - } else { - movlpd(dst, as_Address(src)); - } - } else { - lea(rscratch1, src); - if (UseXmmLoadAndClearUpper) { - movsd (dst, Address(rscratch1, 0)); - } else { - movlpd(dst, Address(rscratch1, 0)); - } - } -} - -void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - movss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - movss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::movptr(Register dst, Register src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); -} - -void MacroAssembler::movptr(Register dst, Address src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); -} - -// src should NEVER be a real pointer. Use AddressLiteral for true pointers -void MacroAssembler::movptr(Register dst, intptr_t src) { - LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); -} - -void MacroAssembler::movptr(Address dst, Register src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); -} - -void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::movdqu(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::movdqu(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::movsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::movsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::movss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::movss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::mulsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::mulsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::mulss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::mulss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::null_check(Register reg, int offset) { - if (needs_explicit_null_check(offset)) { - // provoke OS NULL exception if reg = NULL by - // accessing M[reg] w/o changing any (non-CC) registers - // NOTE: cmpl is plenty here to provoke a segv - cmpptr(rax, Address(reg, 0)); - // Note: should probably use testl(rax, Address(reg, 0)); - // may be shorter code (however, this version of - // testl needs to be implemented first) - } else { - // nothing to do, (later) access of M[reg + offset] - // will provoke OS NULL exception if reg = NULL - } -} - -void MacroAssembler::os_breakpoint() { - // instead of directly emitting a breakpoint, call os:breakpoint for better debugability - // (e.g., MSVC can't call ps() otherwise) - call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); -} - -void MacroAssembler::pop_CPU_state() { - pop_FPU_state(); - pop_IU_state(); -} - -void MacroAssembler::pop_FPU_state() { - NOT_LP64(frstor(Address(rsp, 0));) - LP64_ONLY(fxrstor(Address(rsp, 0));) - addptr(rsp, FPUStateSizeInWords * wordSize); -} - -void MacroAssembler::pop_IU_state() { - popa(); - LP64_ONLY(addq(rsp, 8)); - popf(); -} - -// Save Integer and Float state -// Warning: Stack must be 16 byte aligned (64bit) -void MacroAssembler::push_CPU_state() { - push_IU_state(); - push_FPU_state(); -} - -void MacroAssembler::push_FPU_state() { - subptr(rsp, FPUStateSizeInWords * wordSize); -#ifndef _LP64 - fnsave(Address(rsp, 0)); - fwait(); -#else - fxsave(Address(rsp, 0)); -#endif // LP64 -} - -void MacroAssembler::push_IU_state() { - // Push flags first because pusha kills them - pushf(); - // Make sure rsp stays 16-byte aligned - LP64_ONLY(subq(rsp, 8)); - pusha(); -} - -void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { - // determine java_thread register - if (!java_thread->is_valid()) { - java_thread = rdi; - get_thread(java_thread); - } - // we must set sp to zero to clear frame - movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); - if (clear_fp) { - movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); - } - - if (clear_pc) - movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); - -} - -void MacroAssembler::restore_rax(Register tmp) { - if (tmp == noreg) pop(rax); - else if (tmp != rax) mov(rax, tmp); -} - -void MacroAssembler::round_to(Register reg, int modulus) { - addptr(reg, modulus - 1); - andptr(reg, -modulus); -} - -void MacroAssembler::save_rax(Register tmp) { - if (tmp == noreg) push(rax); - else if (tmp != rax) mov(tmp, rax); -} - -// Write serialization page so VM thread can do a pseudo remote membar. -// We use the current thread pointer to calculate a thread specific -// offset to write to within the page. This minimizes bus traffic -// due to cache line collision. -void MacroAssembler::serialize_memory(Register thread, Register tmp) { - movl(tmp, thread); - shrl(tmp, os::get_serialize_page_shift_count()); - andl(tmp, (os::vm_page_size() - sizeof(int))); - - Address index(noreg, tmp, Address::times_1); - ExternalAddress page(os::get_memory_serialize_page()); - - // Size of store must match masking code above - movl(as_Address(ArrayAddress(page, index)), tmp); -} - -// Calls to C land -// -// When entering C land, the rbp, & rsp of the last Java frame have to be recorded -// in the (thread-local) JavaThread object. When leaving C land, the last Java fp -// has to be reset to 0. This is required to allow proper stack traversal. -void MacroAssembler::set_last_Java_frame(Register java_thread, - Register last_java_sp, - Register last_java_fp, - address last_java_pc) { - // determine java_thread register - if (!java_thread->is_valid()) { - java_thread = rdi; - get_thread(java_thread); - } - // determine last_java_sp register - if (!last_java_sp->is_valid()) { - last_java_sp = rsp; - } - - // last_java_fp is optional - - if (last_java_fp->is_valid()) { - movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); - } - - // last_java_pc is optional - - if (last_java_pc != NULL) { - lea(Address(java_thread, - JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), - InternalAddress(last_java_pc)); - - } - movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); -} - -void MacroAssembler::shlptr(Register dst, int imm8) { - LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); -} - -void MacroAssembler::shrptr(Register dst, int imm8) { - LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); -} - -void MacroAssembler::sign_extend_byte(Register reg) { - if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { - movsbl(reg, reg); // movsxb - } else { - shll(reg, 24); - sarl(reg, 24); - } -} - -void MacroAssembler::sign_extend_short(Register reg) { - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - movswl(reg, reg); // movsxw - } else { - shll(reg, 16); - sarl(reg, 16); - } -} - -void MacroAssembler::testl(Register dst, AddressLiteral src) { - assert(reachable(src), "Address should be reachable"); - testl(dst, as_Address(src)); -} - -void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::sqrtsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::sqrtsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::sqrtss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::sqrtss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::subsd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::subsd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::subss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::subss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::ucomisd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::ucomisd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { - if (reachable(src)) { - Assembler::ucomiss(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::ucomiss(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { - // Used in sign-bit flipping with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); - if (reachable(src)) { - Assembler::xorpd(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::xorpd(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { - // Used in sign-bit flipping with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); - if (reachable(src)) { - Assembler::xorps(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::xorps(dst, Address(rscratch1, 0)); - } -} - -void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { - // Used in sign-bit flipping with aligned address. - assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); - if (reachable(src)) { - Assembler::pshufb(dst, as_Address(src)); - } else { - lea(rscratch1, src); - Assembler::pshufb(dst, Address(rscratch1, 0)); - } -} - -// AVX 3-operands instructions - -void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vaddsd(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vaddsd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vaddss(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vaddss(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { - if (reachable(src)) { - vandpd(dst, nds, as_Address(src), vector256); - } else { - lea(rscratch1, src); - vandpd(dst, nds, Address(rscratch1, 0), vector256); - } -} - -void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { - if (reachable(src)) { - vandps(dst, nds, as_Address(src), vector256); - } else { - lea(rscratch1, src); - vandps(dst, nds, Address(rscratch1, 0), vector256); - } -} - -void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vdivsd(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vdivsd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vdivss(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vdivss(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vmulsd(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vmulsd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vmulss(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vmulss(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vsubsd(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vsubsd(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { - if (reachable(src)) { - vsubss(dst, nds, as_Address(src)); - } else { - lea(rscratch1, src); - vsubss(dst, nds, Address(rscratch1, 0)); - } -} - -void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { - if (reachable(src)) { - vxorpd(dst, nds, as_Address(src), vector256); - } else { - lea(rscratch1, src); - vxorpd(dst, nds, Address(rscratch1, 0), vector256); - } -} - -void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { - if (reachable(src)) { - vxorps(dst, nds, as_Address(src), vector256); - } else { - lea(rscratch1, src); - vxorps(dst, nds, Address(rscratch1, 0), vector256); - } -} - - -////////////////////////////////////////////////////////////////////////////////// -#ifndef SERIALGC - -void MacroAssembler::g1_write_barrier_pre(Register obj, - Register pre_val, - Register thread, - Register tmp, - bool tosca_live, - bool expand_call) { - - // If expand_call is true then we expand the call_VM_leaf macro - // directly to skip generating the check by - // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. - -#ifdef _LP64 - assert(thread == r15_thread, "must be"); -#endif // _LP64 - - Label done; - Label runtime; - - assert(pre_val != noreg, "check this code"); - - if (obj != noreg) { - assert_different_registers(obj, pre_val, tmp); - assert(pre_val != rax, "check this code"); - } - - Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_active())); - Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_index())); - Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + - PtrQueue::byte_offset_of_buf())); - - - // Is marking active? - if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { - cmpl(in_progress, 0); - } else { - assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); - cmpb(in_progress, 0); - } - jcc(Assembler::equal, done); - - // Do we need to load the previous value? - if (obj != noreg) { - load_heap_oop(pre_val, Address(obj, 0)); - } - - // Is the previous value null? - cmpptr(pre_val, (int32_t) NULL_WORD); - jcc(Assembler::equal, done); - - // Can we store original value in the thread's buffer? - // Is index == 0? - // (The index field is typed as size_t.) - - movptr(tmp, index); // tmp := *index_adr - cmpptr(tmp, 0); // tmp == 0? - jcc(Assembler::equal, runtime); // If yes, goto runtime - - subptr(tmp, wordSize); // tmp := tmp - wordSize - movptr(index, tmp); // *index_adr := tmp - addptr(tmp, buffer); // tmp := tmp + *buffer_adr - - // Record the previous value - movptr(Address(tmp, 0), pre_val); - jmp(done); - - bind(runtime); - // save the live input values - if(tosca_live) push(rax); - - if (obj != noreg && obj != rax) - push(obj); - - if (pre_val != rax) - push(pre_val); - - // Calling the runtime using the regular call_VM_leaf mechanism generates - // code (generated by InterpreterMacroAssember::call_VM_leaf_base) - // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. - // - // If we care generating the pre-barrier without a frame (e.g. in the - // intrinsified Reference.get() routine) then ebp might be pointing to - // the caller frame and so this check will most likely fail at runtime. - // - // Expanding the call directly bypasses the generation of the check. - // So when we do not have have a full interpreter frame on the stack - // expand_call should be passed true. - - NOT_LP64( push(thread); ) - - if (expand_call) { - LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) - pass_arg1(this, thread); - pass_arg0(this, pre_val); - MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); - } else { - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); - } - - NOT_LP64( pop(thread); ) - - // save the live input values - if (pre_val != rax) - pop(pre_val); - - if (obj != noreg && obj != rax) - pop(obj); - - if(tosca_live) pop(rax); - - bind(done); -} - -void MacroAssembler::g1_write_barrier_post(Register store_addr, - Register new_val, - Register thread, - Register tmp, - Register tmp2) { -#ifdef _LP64 - assert(thread == r15_thread, "must be"); -#endif // _LP64 - - Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + - PtrQueue::byte_offset_of_index())); - Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + - PtrQueue::byte_offset_of_buf())); - - BarrierSet* bs = Universe::heap()->barrier_set(); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - Label done; - Label runtime; - - // Does store cross heap regions? - - movptr(tmp, store_addr); - xorptr(tmp, new_val); - shrptr(tmp, HeapRegion::LogOfHRGrainBytes); - jcc(Assembler::equal, done); - - // crosses regions, storing NULL? - - cmpptr(new_val, (int32_t) NULL_WORD); - jcc(Assembler::equal, done); - - // storing region crossing non-NULL, is card already dirty? - - ExternalAddress cardtable((address) ct->byte_map_base); - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); -#ifdef _LP64 - const Register card_addr = tmp; - - movq(card_addr, store_addr); - shrq(card_addr, CardTableModRefBS::card_shift); - - lea(tmp2, cardtable); - - // get the address of the card - addq(card_addr, tmp2); -#else - const Register card_index = tmp; - - movl(card_index, store_addr); - shrl(card_index, CardTableModRefBS::card_shift); - - Address index(noreg, card_index, Address::times_1); - const Register card_addr = tmp; - lea(card_addr, as_Address(ArrayAddress(cardtable, index))); -#endif - cmpb(Address(card_addr, 0), 0); - jcc(Assembler::equal, done); - - // storing a region crossing, non-NULL oop, card is clean. - // dirty card and log. - - movb(Address(card_addr, 0), 0); - - cmpl(queue_index, 0); - jcc(Assembler::equal, runtime); - subl(queue_index, wordSize); - movptr(tmp2, buffer); -#ifdef _LP64 - movslq(rscratch1, queue_index); - addq(tmp2, rscratch1); - movq(Address(tmp2, 0), card_addr); -#else - addl(tmp2, queue_index); - movl(Address(tmp2, 0), card_index); -#endif - jmp(done); - - bind(runtime); - // save the live input values - push(store_addr); - push(new_val); -#ifdef _LP64 - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); -#else - push(thread); - call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); - pop(thread); -#endif - pop(new_val); - pop(store_addr); - - bind(done); -} - -#endif // SERIALGC -////////////////////////////////////////////////////////////////////////////////// - - -void MacroAssembler::store_check(Register obj) { - // Does a store check for the oop in register obj. The content of - // register obj is destroyed afterwards. - store_check_part_1(obj); - store_check_part_2(obj); -} - -void MacroAssembler::store_check(Register obj, Address dst) { - store_check(obj); -} - - -// split the store check operation so that other instructions can be scheduled inbetween -void MacroAssembler::store_check_part_1(Register obj) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - shrptr(obj, CardTableModRefBS::card_shift); -} - -void MacroAssembler::store_check_part_2(Register obj) { - BarrierSet* bs = Universe::heap()->barrier_set(); - assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); - CardTableModRefBS* ct = (CardTableModRefBS*)bs; - assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); - - // The calculation for byte_map_base is as follows: - // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); - // So this essentially converts an address to a displacement and - // it will never need to be relocated. On 64bit however the value may be too - // large for a 32bit displacement - - intptr_t disp = (intptr_t) ct->byte_map_base; - if (is_simm32(disp)) { - Address cardtable(noreg, obj, Address::times_1, disp); - movb(cardtable, 0); - } else { - // By doing it as an ExternalAddress disp could be converted to a rip-relative - // displacement and done in a single instruction given favorable mapping and - // a smarter version of as_Address. Worst case it is two instructions which - // is no worse off then loading disp into a register and doing as a simple - // Address() as above. - // We can't do as ExternalAddress as the only style since if disp == 0 we'll - // assert since NULL isn't acceptable in a reloci (see 6644928). In any case - // in some cases we'll get a single instruction version. - - ExternalAddress cardtable((address)disp); - Address index(noreg, obj, Address::times_1); - movb(as_Address(ArrayAddress(cardtable, index)), 0); - } -} - -void MacroAssembler::subptr(Register dst, int32_t imm32) { - LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); -} - -// Force generation of a 4 byte immediate value even if it fits into 8bit -void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { - LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); -} - -void MacroAssembler::subptr(Register dst, Register src) { - LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); -} - -// C++ bool manipulation -void MacroAssembler::testbool(Register dst) { - if(sizeof(bool) == 1) - testb(dst, 0xff); - else if(sizeof(bool) == 2) { - // testw implementation needed for two byte bools - ShouldNotReachHere(); - } else if(sizeof(bool) == 4) - testl(dst, dst); - else - // unsupported - ShouldNotReachHere(); -} - -void MacroAssembler::testptr(Register dst, Register src) { - LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); -} - -// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. -void MacroAssembler::tlab_allocate(Register obj, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1, - Register t2, - Label& slow_case) { - assert_different_registers(obj, t1, t2); - assert_different_registers(obj, var_size_in_bytes, t1); - Register end = t2; - Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); - - verify_tlab(); - - NOT_LP64(get_thread(thread)); - - movptr(obj, Address(thread, JavaThread::tlab_top_offset())); - if (var_size_in_bytes == noreg) { - lea(end, Address(obj, con_size_in_bytes)); - } else { - lea(end, Address(obj, var_size_in_bytes, Address::times_1)); - } - cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); - jcc(Assembler::above, slow_case); - - // update the tlab top pointer - movptr(Address(thread, JavaThread::tlab_top_offset()), end); - - // recover var_size_in_bytes if necessary - if (var_size_in_bytes == end) { - subptr(var_size_in_bytes, obj); - } - verify_tlab(); -} - -// Preserves rbx, and rdx. -Register MacroAssembler::tlab_refill(Label& retry, - Label& try_eden, - Label& slow_case) { - Register top = rax; - Register t1 = rcx; - Register t2 = rsi; - Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); - assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); - Label do_refill, discard_tlab; - - if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { - // No allocation in the shared eden. - jmp(slow_case); - } - - NOT_LP64(get_thread(thread_reg)); - - movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); - movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); - - // calculate amount of free space - subptr(t1, top); - shrptr(t1, LogHeapWordSize); - - // Retain tlab and allocate object in shared space if - // the amount free in the tlab is too large to discard. - cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); - jcc(Assembler::lessEqual, discard_tlab); - - // Retain - // %%% yuck as movptr... - movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); - addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); - if (TLABStats) { - // increment number of slow_allocations - addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); - } - jmp(try_eden); - - bind(discard_tlab); - if (TLABStats) { - // increment number of refills - addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); - // accumulate wastage -- t1 is amount free in tlab - addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); - } - - // if tlab is currently allocated (top or end != null) then - // fill [top, end + alignment_reserve) with array object - testptr(top, top); - jcc(Assembler::zero, do_refill); - - // set up the mark word - movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); - // set the length to the remaining space - subptr(t1, typeArrayOopDesc::header_size(T_INT)); - addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); - shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); - movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); - // set klass to intArrayKlass - // dubious reloc why not an oop reloc? - movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); - // store klass last. concurrent gcs assumes klass length is valid if - // klass field is not null. - store_klass(top, t1); - - movptr(t1, top); - subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); - incr_allocated_bytes(thread_reg, t1, 0); - - // refill the tlab with an eden allocation - bind(do_refill); - movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); - shlptr(t1, LogHeapWordSize); - // allocate new tlab, address returned in top - eden_allocate(top, t1, 0, t2, slow_case); - - // Check that t1 was preserved in eden_allocate. -#ifdef ASSERT - if (UseTLAB) { - Label ok; - Register tsize = rsi; - assert_different_registers(tsize, thread_reg, t1); - push(tsize); - movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); - shlptr(tsize, LogHeapWordSize); - cmpptr(t1, tsize); - jcc(Assembler::equal, ok); - STOP("assert(t1 != tlab size)"); - should_not_reach_here(); - - bind(ok); - pop(tsize); - } -#endif - movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); - movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); - addptr(top, t1); - subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); - movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); - verify_tlab(); - jmp(retry); - - return thread_reg; // for use by caller -} - -void MacroAssembler::incr_allocated_bytes(Register thread, - Register var_size_in_bytes, - int con_size_in_bytes, - Register t1) { - if (!thread->is_valid()) { -#ifdef _LP64 - thread = r15_thread; -#else - assert(t1->is_valid(), "need temp reg"); - thread = t1; - get_thread(thread); -#endif - } - -#ifdef _LP64 - if (var_size_in_bytes->is_valid()) { - addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); - } else { - addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); - } -#else - if (var_size_in_bytes->is_valid()) { - addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); - } else { - addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); - } - adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); -#endif -} - -void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { - pusha(); - - // if we are coming from c1, xmm registers may be live - int off = 0; - if (UseSSE == 1) { - subptr(rsp, sizeof(jdouble)*8); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); - movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); - } else if (UseSSE >= 2) { -#ifdef COMPILER2 - if (MaxVectorSize > 16) { - assert(UseAVX > 0, "256bit vectors are supported only with AVX"); - // Save upper half of YMM registes - subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); - vextractf128h(Address(rsp, 0),xmm0); - vextractf128h(Address(rsp, 16),xmm1); - vextractf128h(Address(rsp, 32),xmm2); - vextractf128h(Address(rsp, 48),xmm3); - vextractf128h(Address(rsp, 64),xmm4); - vextractf128h(Address(rsp, 80),xmm5); - vextractf128h(Address(rsp, 96),xmm6); - vextractf128h(Address(rsp,112),xmm7); -#ifdef _LP64 - vextractf128h(Address(rsp,128),xmm8); - vextractf128h(Address(rsp,144),xmm9); - vextractf128h(Address(rsp,160),xmm10); - vextractf128h(Address(rsp,176),xmm11); - vextractf128h(Address(rsp,192),xmm12); - vextractf128h(Address(rsp,208),xmm13); - vextractf128h(Address(rsp,224),xmm14); - vextractf128h(Address(rsp,240),xmm15); -#endif - } -#endif - // Save whole 128bit (16 bytes) XMM regiters - subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); - movdqu(Address(rsp,off++*16),xmm0); - movdqu(Address(rsp,off++*16),xmm1); - movdqu(Address(rsp,off++*16),xmm2); - movdqu(Address(rsp,off++*16),xmm3); - movdqu(Address(rsp,off++*16),xmm4); - movdqu(Address(rsp,off++*16),xmm5); - movdqu(Address(rsp,off++*16),xmm6); - movdqu(Address(rsp,off++*16),xmm7); -#ifdef _LP64 - movdqu(Address(rsp,off++*16),xmm8); - movdqu(Address(rsp,off++*16),xmm9); - movdqu(Address(rsp,off++*16),xmm10); - movdqu(Address(rsp,off++*16),xmm11); - movdqu(Address(rsp,off++*16),xmm12); - movdqu(Address(rsp,off++*16),xmm13); - movdqu(Address(rsp,off++*16),xmm14); - movdqu(Address(rsp,off++*16),xmm15); -#endif - } - - // Preserve registers across runtime call - int incoming_argument_and_return_value_offset = -1; - if (num_fpu_regs_in_use > 1) { - // Must preserve all other FPU regs (could alternatively convert - // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash - // FPU state, but can not trust C compiler) - NEEDS_CLEANUP; - // NOTE that in this case we also push the incoming argument(s) to - // the stack and restore it later; we also use this stack slot to - // hold the return value from dsin, dcos etc. - for (int i = 0; i < num_fpu_regs_in_use; i++) { - subptr(rsp, sizeof(jdouble)); - fstp_d(Address(rsp, 0)); - } - incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); - for (int i = nb_args-1; i >= 0; i--) { - fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); - } - } - - subptr(rsp, nb_args*sizeof(jdouble)); - for (int i = 0; i < nb_args; i++) { - fstp_d(Address(rsp, i*sizeof(jdouble))); - } - -#ifdef _LP64 - if (nb_args > 0) { - movdbl(xmm0, Address(rsp, 0)); - } - if (nb_args > 1) { - movdbl(xmm1, Address(rsp, sizeof(jdouble))); - } - assert(nb_args <= 2, "unsupported number of args"); -#endif // _LP64 - - // NOTE: we must not use call_VM_leaf here because that requires a - // complete interpreter frame in debug mode -- same bug as 4387334 - // MacroAssembler::call_VM_leaf_base is perfectly safe and will - // do proper 64bit abi - - NEEDS_CLEANUP; - // Need to add stack banging before this runtime call if it needs to - // be taken; however, there is no generic stack banging routine at - // the MacroAssembler level - - MacroAssembler::call_VM_leaf_base(runtime_entry, 0); - -#ifdef _LP64 - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); -#endif // _LP64 - addptr(rsp, sizeof(jdouble) * nb_args); - if (num_fpu_regs_in_use > 1) { - // Must save return value to stack and then restore entire FPU - // stack except incoming arguments - fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); - for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { - fld_d(Address(rsp, 0)); - addptr(rsp, sizeof(jdouble)); - } - fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); - addptr(rsp, sizeof(jdouble) * nb_args); - } - - off = 0; - if (UseSSE == 1) { - movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); - movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); - addptr(rsp, sizeof(jdouble)*8); - } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM regiters - movdqu(xmm0, Address(rsp,off++*16)); - movdqu(xmm1, Address(rsp,off++*16)); - movdqu(xmm2, Address(rsp,off++*16)); - movdqu(xmm3, Address(rsp,off++*16)); - movdqu(xmm4, Address(rsp,off++*16)); - movdqu(xmm5, Address(rsp,off++*16)); - movdqu(xmm6, Address(rsp,off++*16)); - movdqu(xmm7, Address(rsp,off++*16)); -#ifdef _LP64 - movdqu(xmm8, Address(rsp,off++*16)); - movdqu(xmm9, Address(rsp,off++*16)); - movdqu(xmm10, Address(rsp,off++*16)); - movdqu(xmm11, Address(rsp,off++*16)); - movdqu(xmm12, Address(rsp,off++*16)); - movdqu(xmm13, Address(rsp,off++*16)); - movdqu(xmm14, Address(rsp,off++*16)); - movdqu(xmm15, Address(rsp,off++*16)); -#endif - addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); -#ifdef COMPILER2 - if (MaxVectorSize > 16) { - // Restore upper half of YMM registes. - vinsertf128h(xmm0, Address(rsp, 0)); - vinsertf128h(xmm1, Address(rsp, 16)); - vinsertf128h(xmm2, Address(rsp, 32)); - vinsertf128h(xmm3, Address(rsp, 48)); - vinsertf128h(xmm4, Address(rsp, 64)); - vinsertf128h(xmm5, Address(rsp, 80)); - vinsertf128h(xmm6, Address(rsp, 96)); - vinsertf128h(xmm7, Address(rsp,112)); -#ifdef _LP64 - vinsertf128h(xmm8, Address(rsp,128)); - vinsertf128h(xmm9, Address(rsp,144)); - vinsertf128h(xmm10, Address(rsp,160)); - vinsertf128h(xmm11, Address(rsp,176)); - vinsertf128h(xmm12, Address(rsp,192)); - vinsertf128h(xmm13, Address(rsp,208)); - vinsertf128h(xmm14, Address(rsp,224)); - vinsertf128h(xmm15, Address(rsp,240)); -#endif - addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); - } -#endif - } - popa(); -} - -static const double pi_4 = 0.7853981633974483; - -void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { - // A hand-coded argument reduction for values in fabs(pi/4, pi/2) - // was attempted in this code; unfortunately it appears that the - // switch to 80-bit precision and back causes this to be - // unprofitable compared with simply performing a runtime call if - // the argument is out of the (-pi/4, pi/4) range. - - Register tmp = noreg; - if (!VM_Version::supports_cmov()) { - // fcmp needs a temporary so preserve rbx, - tmp = rbx; - push(tmp); - } - - Label slow_case, done; - - ExternalAddress pi4_adr = (address)&pi_4; - if (reachable(pi4_adr)) { - // x ?<= pi/4 - fld_d(pi4_adr); - fld_s(1); // Stack: X PI/4 X - fabs(); // Stack: |X| PI/4 X - fcmp(tmp); - jcc(Assembler::above, slow_case); - - // fastest case: -pi/4 <= x <= pi/4 - switch(trig) { - case 's': - fsin(); - break; - case 'c': - fcos(); - break; - case 't': - ftan(); - break; - default: - assert(false, "bad intrinsic"); - break; - } - jmp(done); - } - - // slow case: runtime call - bind(slow_case); - - switch(trig) { - case 's': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); - } - break; - case 'c': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); - } - break; - case 't': - { - fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); - } - break; - default: - assert(false, "bad intrinsic"); - break; - } - - // Come here with result in F-TOS - bind(done); - - if (tmp != noreg) { - pop(tmp); - } -} - - -// Look up the method for a megamorphic invokeinterface call. -// The target method is determined by . -// The receiver klass is in recv_klass. -// On success, the result will be in method_result, and execution falls through. -// On failure, execution transfers to the given label. -void MacroAssembler::lookup_interface_method(Register recv_klass, - Register intf_klass, - RegisterOrConstant itable_index, - Register method_result, - Register scan_temp, - Label& L_no_such_interface) { - assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); - assert(itable_index.is_constant() || itable_index.as_register() == method_result, - "caller must use same register for non-constant itable index as for method"); - - // Compute start of first itableOffsetEntry (which is at the end of the vtable) - int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; - int itentry_off = itableMethodEntry::method_offset_in_bytes(); - int scan_step = itableOffsetEntry::size() * wordSize; - int vte_size = vtableEntry::size() * wordSize; - Address::ScaleFactor times_vte_scale = Address::times_ptr; - assert(vte_size == wordSize, "else adjust times_vte_scale"); - - movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); - - // %%% Could store the aligned, prescaled offset in the klassoop. - lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); - if (HeapWordsPerLong > 1) { - // Round up to align_object_offset boundary - // see code for InstanceKlass::start_of_itable! - round_to(scan_temp, BytesPerLong); - } - - // Adjust recv_klass by scaled itable_index, so we can free itable_index. - assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); - lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); - - // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { - // if (scan->interface() == intf) { - // result = (klass + scan->offset() + itable_index); - // } - // } - Label search, found_method; - - for (int peel = 1; peel >= 0; peel--) { - movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); - cmpptr(intf_klass, method_result); - - if (peel) { - jccb(Assembler::equal, found_method); - } else { - jccb(Assembler::notEqual, search); - // (invert the test to fall through to found_method...) - } - - if (!peel) break; - - bind(search); - - // Check that the previous entry is non-null. A null entry means that - // the receiver class doesn't implement the interface, and wasn't the - // same as when the caller was compiled. - testptr(method_result, method_result); - jcc(Assembler::zero, L_no_such_interface); - addptr(scan_temp, scan_step); - } - - bind(found_method); - - // Got a hit. - movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); - movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); -} - - -// virtual method calling -void MacroAssembler::lookup_virtual_method(Register recv_klass, - RegisterOrConstant vtable_index, - Register method_result) { - const int base = InstanceKlass::vtable_start_offset() * wordSize; - assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); - Address vtable_entry_addr(recv_klass, - vtable_index, Address::times_ptr, - base + vtableEntry::method_offset_in_bytes()); - movptr(method_result, vtable_entry_addr); -} - - -void MacroAssembler::check_klass_subtype(Register sub_klass, - Register super_klass, - Register temp_reg, - Label& L_success) { - Label L_failure; - check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); - check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); - bind(L_failure); -} - - -void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Label* L_success, - Label* L_failure, - Label* L_slow_path, - RegisterOrConstant super_check_offset) { - assert_different_registers(sub_klass, super_klass, temp_reg); - bool must_load_sco = (super_check_offset.constant_or_zero() == -1); - if (super_check_offset.is_register()) { - assert_different_registers(sub_klass, super_klass, - super_check_offset.as_register()); - } else if (must_load_sco) { - assert(temp_reg != noreg, "supply either a temp or a register offset"); - } - - Label L_fallthrough; - int label_nulls = 0; - if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } - if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } - if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1, "at most one NULL in the batch"); - - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - int sco_offset = in_bytes(Klass::super_check_offset_offset()); - Address super_check_offset_addr(super_klass, sco_offset); - - // Hacked jcc, which "knows" that L_fallthrough, at least, is in - // range of a jccb. If this routine grows larger, reconsider at - // least some of these. -#define local_jcc(assembler_cond, label) \ - if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ - else jcc( assembler_cond, label) /*omit semi*/ - - // Hacked jmp, which may only be used just before L_fallthrough. -#define final_jmp(label) \ - if (&(label) == &L_fallthrough) { /*do nothing*/ } \ - else jmp(label) /*omit semi*/ - - // If the pointers are equal, we are done (e.g., String[] elements). - // This self-check enables sharing of secondary supertype arrays among - // non-primary types such as array-of-interface. Otherwise, each such - // type would need its own customized SSA. - // We move this check to the front of the fast path because many - // type checks are in fact trivially successful in this manner, - // so we get a nicely predicted branch right at the start of the check. - cmpptr(sub_klass, super_klass); - local_jcc(Assembler::equal, *L_success); - - // Check the supertype display: - if (must_load_sco) { - // Positive movl does right thing on LP64. - movl(temp_reg, super_check_offset_addr); - super_check_offset = RegisterOrConstant(temp_reg); - } - Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); - cmpptr(super_klass, super_check_addr); // load displayed supertype - - // This check has worked decisively for primary supers. - // Secondary supers are sought in the super_cache ('super_cache_addr'). - // (Secondary supers are interfaces and very deeply nested subtypes.) - // This works in the same check above because of a tricky aliasing - // between the super_cache and the primary super display elements. - // (The 'super_check_addr' can address either, as the case requires.) - // Note that the cache is updated below if it does not help us find - // what we need immediately. - // So if it was a primary super, we can just fail immediately. - // Otherwise, it's the slow path for us (no success at this point). - - if (super_check_offset.is_register()) { - local_jcc(Assembler::equal, *L_success); - cmpl(super_check_offset.as_register(), sc_offset); - if (L_failure == &L_fallthrough) { - local_jcc(Assembler::equal, *L_slow_path); - } else { - local_jcc(Assembler::notEqual, *L_failure); - final_jmp(*L_slow_path); - } - } else if (super_check_offset.as_constant() == sc_offset) { - // Need a slow path; fast failure is impossible. - if (L_slow_path == &L_fallthrough) { - local_jcc(Assembler::equal, *L_success); - } else { - local_jcc(Assembler::notEqual, *L_slow_path); - final_jmp(*L_success); - } - } else { - // No slow path; it's a fast decision. - if (L_failure == &L_fallthrough) { - local_jcc(Assembler::equal, *L_success); - } else { - local_jcc(Assembler::notEqual, *L_failure); - final_jmp(*L_success); - } - } - - bind(L_fallthrough); - -#undef local_jcc -#undef final_jmp -} - - -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - bool set_cond_codes) { - assert_different_registers(sub_klass, super_klass, temp_reg); - if (temp2_reg != noreg) - assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); -#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) - - Label L_fallthrough; - int label_nulls = 0; - if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } - if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } - assert(label_nulls <= 1, "at most one NULL in the batch"); - - // a couple of useful fields in sub_klass: - int ss_offset = in_bytes(Klass::secondary_supers_offset()); - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - Address secondary_supers_addr(sub_klass, ss_offset); - Address super_cache_addr( sub_klass, sc_offset); - - // Do a linear scan of the secondary super-klass chain. - // This code is rarely used, so simplicity is a virtue here. - // The repne_scan instruction uses fixed registers, which we must spill. - // Don't worry too much about pre-existing connections with the input regs. - - assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) - assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) - - // Get super_klass value into rax (even if it was in rdi or rcx). - bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; - if (super_klass != rax || UseCompressedOops) { - if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } - mov(rax, super_klass); - } - if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } - if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } - -#ifndef PRODUCT - int* pst_counter = &SharedRuntime::_partial_subtype_ctr; - ExternalAddress pst_counter_addr((address) pst_counter); - NOT_LP64( incrementl(pst_counter_addr) ); - LP64_ONLY( lea(rcx, pst_counter_addr) ); - LP64_ONLY( incrementl(Address(rcx, 0)) ); -#endif //PRODUCT - - // We will consult the secondary-super array. - movptr(rdi, secondary_supers_addr); - // Load the array length. (Positive movl does right thing on LP64.) - movl(rcx, Address(rdi, Array::length_offset_in_bytes())); - // Skip to start of data. - addptr(rdi, Array::base_offset_in_bytes()); - - // Scan RCX words at [RDI] for an occurrence of RAX. - // Set NZ/Z based on last compare. - // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does - // not change flags (only scas instruction which is repeated sets flags). - // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. - - testptr(rax,rax); // Set Z = 0 - repne_scan(); - - // Unspill the temp. registers: - if (pushed_rdi) pop(rdi); - if (pushed_rcx) pop(rcx); - if (pushed_rax) pop(rax); - - if (set_cond_codes) { - // Special hack for the AD files: rdi is guaranteed non-zero. - assert(!pushed_rdi, "rdi must be left non-NULL"); - // Also, the condition codes are properly set Z/NZ on succeed/failure. - } - - if (L_failure == &L_fallthrough) - jccb(Assembler::notEqual, *L_failure); - else jcc(Assembler::notEqual, *L_failure); - - // Success. Cache the super we found and proceed in triumph. - movptr(super_cache_addr, super_klass); - - if (L_success != &L_fallthrough) { - jmp(*L_success); - } - -#undef IS_A_TEMP - - bind(L_fallthrough); -} - - -void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { - if (VM_Version::supports_cmov()) { - cmovl(cc, dst, src); - } else { - Label L; - jccb(negate_condition(cc), L); - movl(dst, src); - bind(L); - } -} - -void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { - if (VM_Version::supports_cmov()) { - cmovl(cc, dst, src); - } else { - Label L; - jccb(negate_condition(cc), L); - movl(dst, src); - bind(L); - } -} - -void MacroAssembler::verify_oop(Register reg, const char* s) { - if (!VerifyOops) return; - - // Pass register number to verify_oop_subroutine - char* b = new char[strlen(s) + 50]; - sprintf(b, "verify_oop: %s: %s", reg->name(), s); - BLOCK_COMMENT("verify_oop {"); -#ifdef _LP64 - push(rscratch1); // save r10, trashed by movptr() -#endif - push(rax); // save rax, - push(reg); // pass register argument - ExternalAddress buffer((address) b); - // avoid using pushptr, as it modifies scratch registers - // and our contract is not to modify anything - movptr(rax, buffer.addr()); - push(rax); - // call indirectly to solve generation ordering problem - movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); - call(rax); - // Caller pops the arguments (oop, message) and restores rax, r10 - BLOCK_COMMENT("} verify_oop"); -} - - -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) - return RegisterOrConstant(value + offset); - - // load indirectly to solve generation ordering problem - movptr(tmp, ExternalAddress((address) delayed_value_addr)); - -#ifdef ASSERT - { Label L; - testptr(tmp, tmp); - if (WizardMode) { - jcc(Assembler::notZero, L); - char* buf = new char[40]; - sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); - STOP(buf); - } else { - jccb(Assembler::notZero, L); - hlt(); - } - bind(L); - } -#endif - - if (offset != 0) - addptr(tmp, offset); - - return RegisterOrConstant(tmp); -} - - -Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, - int extra_slot_offset) { - // cf. TemplateTable::prepare_invoke(), if (load_receiver). - int stackElementSize = Interpreter::stackElementSize; - int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); -#ifdef ASSERT - int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); - assert(offset1 - offset == stackElementSize, "correct arithmetic"); -#endif - Register scale_reg = noreg; - Address::ScaleFactor scale_factor = Address::no_scale; - if (arg_slot.is_constant()) { - offset += arg_slot.as_constant() * stackElementSize; - } else { - scale_reg = arg_slot.as_register(); - scale_factor = Address::times(stackElementSize); - } - offset += wordSize; // return PC is on stack - return Address(rsp, scale_reg, scale_factor, offset); -} - - -void MacroAssembler::verify_oop_addr(Address addr, const char* s) { - if (!VerifyOops) return; - - // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); - // Pass register number to verify_oop_subroutine - char* b = new char[strlen(s) + 50]; - sprintf(b, "verify_oop_addr: %s", s); - -#ifdef _LP64 - push(rscratch1); // save r10, trashed by movptr() -#endif - push(rax); // save rax, - // addr may contain rsp so we will have to adjust it based on the push - // we just did (and on 64 bit we do two pushes) - // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which - // stores rax into addr which is backwards of what was intended. - if (addr.uses(rsp)) { - lea(rax, addr); - pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); - } else { - pushptr(addr); - } - - ExternalAddress buffer((address) b); - // pass msg argument - // avoid using pushptr, as it modifies scratch registers - // and our contract is not to modify anything - movptr(rax, buffer.addr()); - push(rax); - - // call indirectly to solve generation ordering problem - movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); - call(rax); - // Caller pops the arguments (addr, message) and restores rax, r10. -} - -void MacroAssembler::verify_tlab() { -#ifdef ASSERT - if (UseTLAB && VerifyOops) { - Label next, ok; - Register t1 = rsi; - Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); - - push(t1); - NOT_LP64(push(thread_reg)); - NOT_LP64(get_thread(thread_reg)); - - movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); - cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); - jcc(Assembler::aboveEqual, next); - STOP("assert(top >= start)"); - should_not_reach_here(); - - bind(next); - movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); - cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); - jcc(Assembler::aboveEqual, ok); - STOP("assert(top <= end)"); - should_not_reach_here(); - - bind(ok); - NOT_LP64(pop(thread_reg)); - pop(t1); - } -#endif -} - -class ControlWord { - public: - int32_t _value; - - int rounding_control() const { return (_value >> 10) & 3 ; } - int precision_control() const { return (_value >> 8) & 3 ; } - bool precision() const { return ((_value >> 5) & 1) != 0; } - bool underflow() const { return ((_value >> 4) & 1) != 0; } - bool overflow() const { return ((_value >> 3) & 1) != 0; } - bool zero_divide() const { return ((_value >> 2) & 1) != 0; } - bool denormalized() const { return ((_value >> 1) & 1) != 0; } - bool invalid() const { return ((_value >> 0) & 1) != 0; } - - void print() const { - // rounding control - const char* rc; - switch (rounding_control()) { - case 0: rc = "round near"; break; - case 1: rc = "round down"; break; - case 2: rc = "round up "; break; - case 3: rc = "chop "; break; - }; - // precision control - const char* pc; - switch (precision_control()) { - case 0: pc = "24 bits "; break; - case 1: pc = "reserved"; break; - case 2: pc = "53 bits "; break; - case 3: pc = "64 bits "; break; - }; - // flags - char f[9]; - f[0] = ' '; - f[1] = ' '; - f[2] = (precision ()) ? 'P' : 'p'; - f[3] = (underflow ()) ? 'U' : 'u'; - f[4] = (overflow ()) ? 'O' : 'o'; - f[5] = (zero_divide ()) ? 'Z' : 'z'; - f[6] = (denormalized()) ? 'D' : 'd'; - f[7] = (invalid ()) ? 'I' : 'i'; - f[8] = '\x0'; - // output - printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); - } - -}; - -class StatusWord { - public: - int32_t _value; - - bool busy() const { return ((_value >> 15) & 1) != 0; } - bool C3() const { return ((_value >> 14) & 1) != 0; } - bool C2() const { return ((_value >> 10) & 1) != 0; } - bool C1() const { return ((_value >> 9) & 1) != 0; } - bool C0() const { return ((_value >> 8) & 1) != 0; } - int top() const { return (_value >> 11) & 7 ; } - bool error_status() const { return ((_value >> 7) & 1) != 0; } - bool stack_fault() const { return ((_value >> 6) & 1) != 0; } - bool precision() const { return ((_value >> 5) & 1) != 0; } - bool underflow() const { return ((_value >> 4) & 1) != 0; } - bool overflow() const { return ((_value >> 3) & 1) != 0; } - bool zero_divide() const { return ((_value >> 2) & 1) != 0; } - bool denormalized() const { return ((_value >> 1) & 1) != 0; } - bool invalid() const { return ((_value >> 0) & 1) != 0; } - - void print() const { - // condition codes - char c[5]; - c[0] = (C3()) ? '3' : '-'; - c[1] = (C2()) ? '2' : '-'; - c[2] = (C1()) ? '1' : '-'; - c[3] = (C0()) ? '0' : '-'; - c[4] = '\x0'; - // flags - char f[9]; - f[0] = (error_status()) ? 'E' : '-'; - f[1] = (stack_fault ()) ? 'S' : '-'; - f[2] = (precision ()) ? 'P' : '-'; - f[3] = (underflow ()) ? 'U' : '-'; - f[4] = (overflow ()) ? 'O' : '-'; - f[5] = (zero_divide ()) ? 'Z' : '-'; - f[6] = (denormalized()) ? 'D' : '-'; - f[7] = (invalid ()) ? 'I' : '-'; - f[8] = '\x0'; - // output - printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); - } - -}; - -class TagWord { - public: - int32_t _value; - - int tag_at(int i) const { return (_value >> (i*2)) & 3; } - - void print() const { - printf("%04x", _value & 0xFFFF); - } - -}; - -class FPU_Register { - public: - int32_t _m0; - int32_t _m1; - int16_t _ex; - - bool is_indefinite() const { - return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; - } - - void print() const { - char sign = (_ex < 0) ? '-' : '+'; - const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; - printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); - }; - -}; - -class FPU_State { - public: - enum { - register_size = 10, - number_of_registers = 8, - register_mask = 7 - }; - - ControlWord _control_word; - StatusWord _status_word; - TagWord _tag_word; - int32_t _error_offset; - int32_t _error_selector; - int32_t _data_offset; - int32_t _data_selector; - int8_t _register[register_size * number_of_registers]; - - int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } - FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } - - const char* tag_as_string(int tag) const { - switch (tag) { - case 0: return "valid"; - case 1: return "zero"; - case 2: return "special"; - case 3: return "empty"; - } - ShouldNotReachHere(); - return NULL; - } - - void print() const { - // print computation registers - { int t = _status_word.top(); - for (int i = 0; i < number_of_registers; i++) { - int j = (i - t) & register_mask; - printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); - st(j)->print(); - printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); - } - } - printf("\n"); - // print control registers - printf("ctrl = "); _control_word.print(); printf("\n"); - printf("stat = "); _status_word .print(); printf("\n"); - printf("tags = "); _tag_word .print(); printf("\n"); - } - -}; - -class Flag_Register { - public: - int32_t _value; - - bool overflow() const { return ((_value >> 11) & 1) != 0; } - bool direction() const { return ((_value >> 10) & 1) != 0; } - bool sign() const { return ((_value >> 7) & 1) != 0; } - bool zero() const { return ((_value >> 6) & 1) != 0; } - bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } - bool parity() const { return ((_value >> 2) & 1) != 0; } - bool carry() const { return ((_value >> 0) & 1) != 0; } - - void print() const { - // flags - char f[8]; - f[0] = (overflow ()) ? 'O' : '-'; - f[1] = (direction ()) ? 'D' : '-'; - f[2] = (sign ()) ? 'S' : '-'; - f[3] = (zero ()) ? 'Z' : '-'; - f[4] = (auxiliary_carry()) ? 'A' : '-'; - f[5] = (parity ()) ? 'P' : '-'; - f[6] = (carry ()) ? 'C' : '-'; - f[7] = '\x0'; - // output - printf("%08x flags = %s", _value, f); - } - -}; - -class IU_Register { - public: - int32_t _value; - - void print() const { - printf("%08x %11d", _value, _value); - } - -}; - -class IU_State { - public: - Flag_Register _eflags; - IU_Register _rdi; - IU_Register _rsi; - IU_Register _rbp; - IU_Register _rsp; - IU_Register _rbx; - IU_Register _rdx; - IU_Register _rcx; - IU_Register _rax; - - void print() const { - // computation registers - printf("rax, = "); _rax.print(); printf("\n"); - printf("rbx, = "); _rbx.print(); printf("\n"); - printf("rcx = "); _rcx.print(); printf("\n"); - printf("rdx = "); _rdx.print(); printf("\n"); - printf("rdi = "); _rdi.print(); printf("\n"); - printf("rsi = "); _rsi.print(); printf("\n"); - printf("rbp, = "); _rbp.print(); printf("\n"); - printf("rsp = "); _rsp.print(); printf("\n"); - printf("\n"); - // control registers - printf("flgs = "); _eflags.print(); printf("\n"); - } -}; - - -class CPU_State { - public: - FPU_State _fpu_state; - IU_State _iu_state; - - void print() const { - printf("--------------------------------------------------\n"); - _iu_state .print(); - printf("\n"); - _fpu_state.print(); - printf("--------------------------------------------------\n"); - } - -}; - - -static void _print_CPU_state(CPU_State* state) { - state->print(); -}; - - -void MacroAssembler::print_CPU_state() { - push_CPU_state(); - push(rsp); // pass CPU state - call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); - addptr(rsp, wordSize); // discard argument - pop_CPU_state(); -} - - -static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { - static int counter = 0; - FPU_State* fs = &state->_fpu_state; - counter++; - // For leaf calls, only verify that the top few elements remain empty. - // We only need 1 empty at the top for C2 code. - if( stack_depth < 0 ) { - if( fs->tag_for_st(7) != 3 ) { - printf("FPR7 not empty\n"); - state->print(); - assert(false, "error"); - return false; - } - return true; // All other stack states do not matter - } - - assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, - "bad FPU control word"); - - // compute stack depth - int i = 0; - while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; - int d = i; - while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; - // verify findings - if (i != FPU_State::number_of_registers) { - // stack not contiguous - printf("%s: stack not contiguous at ST%d\n", s, i); - state->print(); - assert(false, "error"); - return false; - } - // check if computed stack depth corresponds to expected stack depth - if (stack_depth < 0) { - // expected stack depth is -stack_depth or less - if (d > -stack_depth) { - // too many elements on the stack - printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); - state->print(); - assert(false, "error"); - return false; - } - } else { - // expected stack depth is stack_depth - if (d != stack_depth) { - // wrong stack depth - printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); - state->print(); - assert(false, "error"); - return false; - } - } - // everything is cool - return true; -} - - -void MacroAssembler::verify_FPU(int stack_depth, const char* s) { - if (!VerifyFPU) return; - push_CPU_state(); - push(rsp); // pass CPU state - ExternalAddress msg((address) s); - // pass message string s - pushptr(msg.addr()); - push(stack_depth); // pass stack depth - call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); - addptr(rsp, 3 * wordSize); // discard arguments - // check for error - { Label L; - testl(rax, rax); - jcc(Assembler::notZero, L); - int3(); // break if error condition - bind(L); - } - pop_CPU_state(); -} - -void MacroAssembler::load_klass(Register dst, Register src) { -#ifdef _LP64 - if (UseCompressedKlassPointers) { - movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); - decode_klass_not_null(dst); - } else -#endif - movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); -} - -void MacroAssembler::load_prototype_header(Register dst, Register src) { -#ifdef _LP64 - if (UseCompressedKlassPointers) { - assert (Universe::heap() != NULL, "java heap should be initialized"); - movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); - if (Universe::narrow_klass_shift() != 0) { - assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); - movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); - } else { - movq(dst, Address(dst, Klass::prototype_header_offset())); - } - } else -#endif - { - movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); - movptr(dst, Address(dst, Klass::prototype_header_offset())); - } -} - -void MacroAssembler::store_klass(Register dst, Register src) { -#ifdef _LP64 - if (UseCompressedKlassPointers) { - encode_klass_not_null(src); - movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); - } else -#endif - movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); -} - -void MacroAssembler::load_heap_oop(Register dst, Address src) { -#ifdef _LP64 - // FIXME: Must change all places where we try to load the klass. - if (UseCompressedOops) { - movl(dst, src); - decode_heap_oop(dst); - } else -#endif - movptr(dst, src); -} - -// Doesn't do verfication, generates fixed size code -void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { -#ifdef _LP64 - if (UseCompressedOops) { - movl(dst, src); - decode_heap_oop_not_null(dst); - } else -#endif - movptr(dst, src); -} - -void MacroAssembler::store_heap_oop(Address dst, Register src) { -#ifdef _LP64 - if (UseCompressedOops) { - assert(!dst.uses(src), "not enough registers"); - encode_heap_oop(src); - movl(dst, src); - } else -#endif - movptr(dst, src); -} - -void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { - assert_different_registers(src1, tmp); -#ifdef _LP64 - if (UseCompressedOops) { - bool did_push = false; - if (tmp == noreg) { - tmp = rax; - push(tmp); - did_push = true; - assert(!src2.uses(rsp), "can't push"); - } - load_heap_oop(tmp, src2); - cmpptr(src1, tmp); - if (did_push) pop(tmp); - } else -#endif - cmpptr(src1, src2); -} - -// Used for storing NULLs. -void MacroAssembler::store_heap_oop_null(Address dst) { -#ifdef _LP64 - if (UseCompressedOops) { - movl(dst, (int32_t)NULL_WORD); - } else { - movslq(dst, (int32_t)NULL_WORD); - } -#else - movl(dst, (int32_t)NULL_WORD); -#endif -} - -#ifdef _LP64 -void MacroAssembler::store_klass_gap(Register dst, Register src) { - if (UseCompressedKlassPointers) { - // Store to klass gap in destination - movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); - } -} - -#ifdef ASSERT -void MacroAssembler::verify_heapbase(const char* msg) { - assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - if (CheckCompressedOops) { - Label ok; - push(rscratch1); // cmpptr trashes rscratch1 - cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); - jcc(Assembler::equal, ok); - STOP(msg); - bind(ok); - pop(rscratch1); - } -} -#endif - -// Algorithm must match oop.inline.hpp encode_heap_oop. -void MacroAssembler::encode_heap_oop(Register r) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); -#endif - verify_oop(r, "broken oop in encode_heap_oop"); - if (Universe::narrow_oop_base() == NULL) { - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shrq(r, LogMinObjAlignmentInBytes); - } - return; - } - testq(r, r); - cmovq(Assembler::equal, r, r12_heapbase); - subq(r, r12_heapbase); - shrq(r, LogMinObjAlignmentInBytes); -} - -void MacroAssembler::encode_heap_oop_not_null(Register r) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); - if (CheckCompressedOops) { - Label ok; - testq(r, r); - jcc(Assembler::notEqual, ok); - STOP("null oop passed to encode_heap_oop_not_null"); - bind(ok); - } -#endif - verify_oop(r, "broken oop in encode_heap_oop_not_null"); - if (Universe::narrow_oop_base() != NULL) { - subq(r, r12_heapbase); - } - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shrq(r, LogMinObjAlignmentInBytes); - } -} - -void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); - if (CheckCompressedOops) { - Label ok; - testq(src, src); - jcc(Assembler::notEqual, ok); - STOP("null oop passed to encode_heap_oop_not_null2"); - bind(ok); - } -#endif - verify_oop(src, "broken oop in encode_heap_oop_not_null2"); - if (dst != src) { - movq(dst, src); - } - if (Universe::narrow_oop_base() != NULL) { - subq(dst, r12_heapbase); - } - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shrq(dst, LogMinObjAlignmentInBytes); - } -} - -void MacroAssembler::decode_heap_oop(Register r) { -#ifdef ASSERT - verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); -#endif - if (Universe::narrow_oop_base() == NULL) { - if (Universe::narrow_oop_shift() != 0) { - assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shlq(r, LogMinObjAlignmentInBytes); - } - } else { - Label done; - shlq(r, LogMinObjAlignmentInBytes); - jccb(Assembler::equal, done); - addq(r, r12_heapbase); - bind(done); - } - verify_oop(r, "broken oop in decode_heap_oop"); -} - -void MacroAssembler::decode_heap_oop_not_null(Register r) { - // Note: it will change flags - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_oop_shift() != 0) { - assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - shlq(r, LogMinObjAlignmentInBytes); - if (Universe::narrow_oop_base() != NULL) { - addq(r, r12_heapbase); - } - } else { - assert (Universe::narrow_oop_base() == NULL, "sanity"); - } -} - -void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { - // Note: it will change flags - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_oop_shift() != 0) { - assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); - if (LogMinObjAlignmentInBytes == Address::times_8) { - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); - } else { - if (dst != src) { - movq(dst, src); - } - shlq(dst, LogMinObjAlignmentInBytes); - if (Universe::narrow_oop_base() != NULL) { - addq(dst, r12_heapbase); - } - } - } else { - assert (Universe::narrow_oop_base() == NULL, "sanity"); - if (dst != src) { - movq(dst, src); - } - } -} - -void MacroAssembler::encode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); -#endif - if (Universe::narrow_klass_base() != NULL) { - subq(r, r12_heapbase); - } - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - shrq(r, LogKlassAlignmentInBytes); - } -} - -void MacroAssembler::encode_klass_not_null(Register dst, Register src) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); -#ifdef ASSERT - verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); -#endif - if (dst != src) { - movq(dst, src); - } - if (Universe::narrow_klass_base() != NULL) { - subq(dst, r12_heapbase); - } - if (Universe::narrow_klass_shift() != 0) { - assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - shrq(dst, LogKlassAlignmentInBytes); - } -} - -void MacroAssembler::decode_klass_not_null(Register r) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - // Note: it will change flags - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_klass_shift() != 0) { - assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - shlq(r, LogKlassAlignmentInBytes); - if (Universe::narrow_klass_base() != NULL) { - addq(r, r12_heapbase); - } - } else { - assert (Universe::narrow_klass_base() == NULL, "sanity"); - } -} - -void MacroAssembler::decode_klass_not_null(Register dst, Register src) { - assert(Metaspace::is_initialized(), "metaspace should be initialized"); - // Note: it will change flags - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - // Cannot assert, unverified entry point counts instructions (see .ad file) - // vtableStubs also counts instructions in pd_code_size_limit. - // Also do not verify_oop as this is called by verify_oop. - if (Universe::narrow_klass_shift() != 0) { - assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); - assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); - leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); - } else { - assert (Universe::narrow_klass_base() == NULL, "sanity"); - if (dst != src) { - movq(dst, src); - } - } -} - -void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - mov_narrow_oop(dst, oop_index, rspec); -} - -void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - mov_narrow_oop(dst, oop_index, rspec); -} - -void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); -} - -void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); -} - -void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - Assembler::cmp_narrow_oop(dst, oop_index, rspec); -} - -void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { - assert (UseCompressedOops, "should only be used for compressed headers"); - assert (Universe::heap() != NULL, "java heap should be initialized"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int oop_index = oop_recorder()->find_index(obj); - RelocationHolder rspec = oop_Relocation::spec(oop_index); - Assembler::cmp_narrow_oop(dst, oop_index, rspec); -} - -void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); -} - -void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { - assert (UseCompressedKlassPointers, "should only be used for compressed headers"); - assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); - int klass_index = oop_recorder()->find_index(k); - RelocationHolder rspec = metadata_Relocation::spec(klass_index); - Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); -} - -void MacroAssembler::reinit_heapbase() { - if (UseCompressedOops || UseCompressedKlassPointers) { - movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); - } -} -#endif // _LP64 - - -// C2 compiled method's prolog code. -void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { - - // WARNING: Initial instruction MUST be 5 bytes or longer so that - // NativeJump::patch_verified_entry will be able to patch out the entry - // code safely. The push to verify stack depth is ok at 5 bytes, - // the frame allocation can be either 3 or 6 bytes. So if we don't do - // stack bang then we must use the 6 byte frame allocation even if - // we have no frame. :-( - - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove word for return addr - framesize -= wordSize; - - // Calls to C2R adapters often do not accept exceptional returns. - // We require that their callers must bang for them. But be careful, because - // some VM calls (such as call site linkage) can use several kilobytes of - // stack. But the stack safety zone should account for that. - // See bugs 4446381, 4468289, 4497237. - if (stack_bang) { - generate_stack_overflow_check(framesize); - - // We always push rbp, so that on return to interpreter rbp, will be - // restored correctly and we can correct the stack. - push(rbp); - // Remove word for ebp - framesize -= wordSize; - - // Create frame - if (framesize) { - subptr(rsp, framesize); - } - } else { - // Create frame (force generation of a 4 byte immediate value) - subptr_imm32(rsp, framesize); - - // Save RBP register now. - framesize -= wordSize; - movptr(Address(rsp, framesize), rbp); - } - - if (VerifyStackAtCalls) { // Majik cookie to verify stack depth - framesize -= wordSize; - movptr(Address(rsp, framesize), (int32_t)0xbadb100d); - } - -#ifndef _LP64 - // If method sets FPU control word do it now - if (fp_mode_24b) { - fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); - } - if (UseSSE >= 2 && VerifyFPU) { - verify_FPU(0, "FPU stack must be clean on entry"); - } -#endif - -#ifdef ASSERT - if (VerifyStackAtCalls) { - Label L; - push(rax); - mov(rax, rsp); - andptr(rax, StackAlignmentInBytes-1); - cmpptr(rax, StackAlignmentInBytes-wordSize); - pop(rax); - jcc(Assembler::equal, L); - STOP("Stack is not properly aligned!"); - bind(L); - } -#endif - -} - - -// IndexOf for constant substrings with size >= 8 chars -// which don't need to be loaded through stack. -void MacroAssembler::string_indexofC8(Register str1, Register str2, - Register cnt1, Register cnt2, - int int_cnt2, Register result, - XMMRegister vec, Register tmp) { - ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); - - // This method uses pcmpestri inxtruction with bound registers - // inputs: - // xmm - substring - // rax - substring length (elements count) - // mem - scanned string - // rdx - string length (elements count) - // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) - // outputs: - // rcx - matched index in string - assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); - - Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, - RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, - MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; - - // Note, inline_string_indexOf() generates checks: - // if (substr.count > string.count) return -1; - // if (substr.count == 0) return 0; - assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); - - // Load substring. - movdqu(vec, Address(str2, 0)); - movl(cnt2, int_cnt2); - movptr(result, str1); // string addr - - if (int_cnt2 > 8) { - jmpb(SCAN_TO_SUBSTR); - - // Reload substr for rescan, this code - // is executed only for large substrings (> 8 chars) - bind(RELOAD_SUBSTR); - movdqu(vec, Address(str2, 0)); - negptr(cnt2); // Jumped here with negative cnt2, convert to positive - - bind(RELOAD_STR); - // We came here after the beginning of the substring was - // matched but the rest of it was not so we need to search - // again. Start from the next element after the previous match. - - // cnt2 is number of substring reminding elements and - // cnt1 is number of string reminding elements when cmp failed. - // Restored cnt1 = cnt1 - cnt2 + int_cnt2 - subl(cnt1, cnt2); - addl(cnt1, int_cnt2); - movl(cnt2, int_cnt2); // Now restore cnt2 - - decrementl(cnt1); // Shift to next element - cmpl(cnt1, cnt2); - jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring - - addptr(result, 2); - - } // (int_cnt2 > 8) - - // Scan string for start of substr in 16-byte vectors - bind(SCAN_TO_SUBSTR); - pcmpestri(vec, Address(result, 0), 0x0d); - jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 - subl(cnt1, 8); - jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string - cmpl(cnt1, cnt2); - jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring - addptr(result, 16); - jmpb(SCAN_TO_SUBSTR); - - // Found a potential substr - bind(FOUND_CANDIDATE); - // Matched whole vector if first element matched (tmp(rcx) == 0). - if (int_cnt2 == 8) { - jccb(Assembler::overflow, RET_FOUND); // OF == 1 - } else { // int_cnt2 > 8 - jccb(Assembler::overflow, FOUND_SUBSTR); - } - // After pcmpestri tmp(rcx) contains matched element index - // Compute start addr of substr - lea(result, Address(result, tmp, Address::times_2)); - - // Make sure string is still long enough - subl(cnt1, tmp); - cmpl(cnt1, cnt2); - if (int_cnt2 == 8) { - jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); - } else { // int_cnt2 > 8 - jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); - } - // Left less then substring. - - bind(RET_NOT_FOUND); - movl(result, -1); - jmpb(EXIT); - - if (int_cnt2 > 8) { - // This code is optimized for the case when whole substring - // is matched if its head is matched. - bind(MATCH_SUBSTR_HEAD); - pcmpestri(vec, Address(result, 0), 0x0d); - // Reload only string if does not match - jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 - - Label CONT_SCAN_SUBSTR; - // Compare the rest of substring (> 8 chars). - bind(FOUND_SUBSTR); - // First 8 chars are already matched. - negptr(cnt2); - addptr(cnt2, 8); - - bind(SCAN_SUBSTR); - subl(cnt1, 8); - cmpl(cnt2, -8); // Do not read beyond substring - jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); - // Back-up strings to avoid reading beyond substring: - // cnt1 = cnt1 - cnt2 + 8 - addl(cnt1, cnt2); // cnt2 is negative - addl(cnt1, 8); - movl(cnt2, 8); negptr(cnt2); - bind(CONT_SCAN_SUBSTR); - if (int_cnt2 < (int)G) { - movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); - pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); - } else { - // calculate index in register to avoid integer overflow (int_cnt2*2) - movl(tmp, int_cnt2); - addptr(tmp, cnt2); - movdqu(vec, Address(str2, tmp, Address::times_2, 0)); - pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); - } - // Need to reload strings pointers if not matched whole vector - jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 - addptr(cnt2, 8); - jcc(Assembler::negative, SCAN_SUBSTR); - // Fall through if found full substring - - } // (int_cnt2 > 8) - - bind(RET_FOUND); - // Found result if we matched full small substring. - // Compute substr offset - subptr(result, str1); - shrl(result, 1); // index - bind(EXIT); - -} // string_indexofC8 - -// Small strings are loaded through stack if they cross page boundary. -void MacroAssembler::string_indexof(Register str1, Register str2, - Register cnt1, Register cnt2, - int int_cnt2, Register result, - XMMRegister vec, Register tmp) { - ShortBranchVerifier sbv(this); - assert(UseSSE42Intrinsics, "SSE4.2 is required"); - // - // int_cnt2 is length of small (< 8 chars) constant substring - // or (-1) for non constant substring in which case its length - // is in cnt2 register. - // - // Note, inline_string_indexOf() generates checks: - // if (substr.count > string.count) return -1; - // if (substr.count == 0) return 0; - // - assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); - - // This method uses pcmpestri inxtruction with bound registers - // inputs: - // xmm - substring - // rax - substring length (elements count) - // mem - scanned string - // rdx - string length (elements count) - // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) - // outputs: - // rcx - matched index in string - assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); - - Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, - RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, - FOUND_CANDIDATE; - - { //======================================================== - // We don't know where these strings are located - // and we can't read beyond them. Load them through stack. - Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; - - movptr(tmp, rsp); // save old SP - - if (int_cnt2 > 0) { // small (< 8 chars) constant substring - if (int_cnt2 == 1) { // One char - load_unsigned_short(result, Address(str2, 0)); - movdl(vec, result); // move 32 bits - } else if (int_cnt2 == 2) { // Two chars - movdl(vec, Address(str2, 0)); // move 32 bits - } else if (int_cnt2 == 4) { // Four chars - movq(vec, Address(str2, 0)); // move 64 bits - } else { // cnt2 = { 3, 5, 6, 7 } - // Array header size is 12 bytes in 32-bit VM - // + 6 bytes for 3 chars == 18 bytes, - // enough space to load vec and shift. - assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); - movdqu(vec, Address(str2, (int_cnt2*2)-16)); - psrldq(vec, 16-(int_cnt2*2)); - } - } else { // not constant substring - cmpl(cnt2, 8); - jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough - - // We can read beyond string if srt+16 does not cross page boundary - // since heaps are aligned and mapped by pages. - assert(os::vm_page_size() < (int)G, "default page should be small"); - movl(result, str2); // We need only low 32 bits - andl(result, (os::vm_page_size()-1)); - cmpl(result, (os::vm_page_size()-16)); - jccb(Assembler::belowEqual, CHECK_STR); - - // Move small strings to stack to allow load 16 bytes into vec. - subptr(rsp, 16); - int stk_offset = wordSize-2; - push(cnt2); - - bind(COPY_SUBSTR); - load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); - movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); - decrement(cnt2); - jccb(Assembler::notZero, COPY_SUBSTR); - - pop(cnt2); - movptr(str2, rsp); // New substring address - } // non constant - - bind(CHECK_STR); - cmpl(cnt1, 8); - jccb(Assembler::aboveEqual, BIG_STRINGS); - - // Check cross page boundary. - movl(result, str1); // We need only low 32 bits - andl(result, (os::vm_page_size()-1)); - cmpl(result, (os::vm_page_size()-16)); - jccb(Assembler::belowEqual, BIG_STRINGS); - - subptr(rsp, 16); - int stk_offset = -2; - if (int_cnt2 < 0) { // not constant - push(cnt2); - stk_offset += wordSize; - } - movl(cnt2, cnt1); - - bind(COPY_STR); - load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); - movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); - decrement(cnt2); - jccb(Assembler::notZero, COPY_STR); - - if (int_cnt2 < 0) { // not constant - pop(cnt2); - } - movptr(str1, rsp); // New string address - - bind(BIG_STRINGS); - // Load substring. - if (int_cnt2 < 0) { // -1 - movdqu(vec, Address(str2, 0)); - push(cnt2); // substr count - push(str2); // substr addr - push(str1); // string addr - } else { - // Small (< 8 chars) constant substrings are loaded already. - movl(cnt2, int_cnt2); - } - push(tmp); // original SP - - } // Finished loading - - //======================================================== - // Start search - // - - movptr(result, str1); // string addr - - if (int_cnt2 < 0) { // Only for non constant substring - jmpb(SCAN_TO_SUBSTR); - - // SP saved at sp+0 - // String saved at sp+1*wordSize - // Substr saved at sp+2*wordSize - // Substr count saved at sp+3*wordSize - - // Reload substr for rescan, this code - // is executed only for large substrings (> 8 chars) - bind(RELOAD_SUBSTR); - movptr(str2, Address(rsp, 2*wordSize)); - movl(cnt2, Address(rsp, 3*wordSize)); - movdqu(vec, Address(str2, 0)); - // We came here after the beginning of the substring was - // matched but the rest of it was not so we need to search - // again. Start from the next element after the previous match. - subptr(str1, result); // Restore counter - shrl(str1, 1); - addl(cnt1, str1); - decrementl(cnt1); // Shift to next element - cmpl(cnt1, cnt2); - jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring - - addptr(result, 2); - } // non constant - - // Scan string for start of substr in 16-byte vectors - bind(SCAN_TO_SUBSTR); - assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); - pcmpestri(vec, Address(result, 0), 0x0d); - jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 - subl(cnt1, 8); - jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string - cmpl(cnt1, cnt2); - jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring - addptr(result, 16); - - bind(ADJUST_STR); - cmpl(cnt1, 8); // Do not read beyond string - jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); - // Back-up string to avoid reading beyond string. - lea(result, Address(result, cnt1, Address::times_2, -16)); - movl(cnt1, 8); - jmpb(SCAN_TO_SUBSTR); - - // Found a potential substr - bind(FOUND_CANDIDATE); - // After pcmpestri tmp(rcx) contains matched element index - - // Make sure string is still long enough - subl(cnt1, tmp); - cmpl(cnt1, cnt2); - jccb(Assembler::greaterEqual, FOUND_SUBSTR); - // Left less then substring. - - bind(RET_NOT_FOUND); - movl(result, -1); - jmpb(CLEANUP); - - bind(FOUND_SUBSTR); - // Compute start addr of substr - lea(result, Address(result, tmp, Address::times_2)); - - if (int_cnt2 > 0) { // Constant substring - // Repeat search for small substring (< 8 chars) - // from new point without reloading substring. - // Have to check that we don't read beyond string. - cmpl(tmp, 8-int_cnt2); - jccb(Assembler::greater, ADJUST_STR); - // Fall through if matched whole substring. - } else { // non constant - assert(int_cnt2 == -1, "should be != 0"); - - addl(tmp, cnt2); - // Found result if we matched whole substring. - cmpl(tmp, 8); - jccb(Assembler::lessEqual, RET_FOUND); - - // Repeat search for small substring (<= 8 chars) - // from new point 'str1' without reloading substring. - cmpl(cnt2, 8); - // Have to check that we don't read beyond string. - jccb(Assembler::lessEqual, ADJUST_STR); - - Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; - // Compare the rest of substring (> 8 chars). - movptr(str1, result); - - cmpl(tmp, cnt2); - // First 8 chars are already matched. - jccb(Assembler::equal, CHECK_NEXT); - - bind(SCAN_SUBSTR); - pcmpestri(vec, Address(str1, 0), 0x0d); - // Need to reload strings pointers if not matched whole vector - jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 - - bind(CHECK_NEXT); - subl(cnt2, 8); - jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring - addptr(str1, 16); - addptr(str2, 16); - subl(cnt1, 8); - cmpl(cnt2, 8); // Do not read beyond substring - jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); - // Back-up strings to avoid reading beyond substring. - lea(str2, Address(str2, cnt2, Address::times_2, -16)); - lea(str1, Address(str1, cnt2, Address::times_2, -16)); - subl(cnt1, cnt2); - movl(cnt2, 8); - addl(cnt1, 8); - bind(CONT_SCAN_SUBSTR); - movdqu(vec, Address(str2, 0)); - jmpb(SCAN_SUBSTR); - - bind(RET_FOUND_LONG); - movptr(str1, Address(rsp, wordSize)); - } // non constant - - bind(RET_FOUND); - // Compute substr offset - subptr(result, str1); - shrl(result, 1); // index - - bind(CLEANUP); - pop(rsp); // restore SP - -} // string_indexof - -// Compare strings. -void MacroAssembler::string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, - XMMRegister vec1) { - ShortBranchVerifier sbv(this); - Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; - - // Compute the minimum of the string lengths and the - // difference of the string lengths (stack). - // Do the conditional move stuff - movl(result, cnt1); - subl(cnt1, cnt2); - push(cnt1); - cmov32(Assembler::lessEqual, cnt2, result); - - // Is the minimum length zero? - testl(cnt2, cnt2); - jcc(Assembler::zero, LENGTH_DIFF_LABEL); - - // Load first characters - load_unsigned_short(result, Address(str1, 0)); - load_unsigned_short(cnt1, Address(str2, 0)); - - // Compare first characters - subl(result, cnt1); - jcc(Assembler::notZero, POP_LABEL); - decrementl(cnt2); - jcc(Assembler::zero, LENGTH_DIFF_LABEL); - - { - // Check after comparing first character to see if strings are equivalent - Label LSkip2; - // Check if the strings start at same location - cmpptr(str1, str2); - jccb(Assembler::notEqual, LSkip2); - - // Check if the length difference is zero (from stack) - cmpl(Address(rsp, 0), 0x0); - jcc(Assembler::equal, LENGTH_DIFF_LABEL); - - // Strings might not be equivalent - bind(LSkip2); - } - - Address::ScaleFactor scale = Address::times_2; - int stride = 8; - - // Advance to next element - addptr(str1, 16/stride); - addptr(str2, 16/stride); - - if (UseSSE42Intrinsics) { - Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; - int pcmpmask = 0x19; - // Setup to compare 16-byte vectors - movl(result, cnt2); - andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count - jccb(Assembler::zero, COMPARE_TAIL); - - lea(str1, Address(str1, result, scale)); - lea(str2, Address(str2, result, scale)); - negptr(result); - - // pcmpestri - // inputs: - // vec1- substring - // rax - negative string length (elements count) - // mem - scaned string - // rdx - string length (elements count) - // pcmpmask - cmp mode: 11000 (string compare with negated result) - // + 00 (unsigned bytes) or + 01 (unsigned shorts) - // outputs: - // rcx - first mismatched element index - assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); - - bind(COMPARE_WIDE_VECTORS); - movdqu(vec1, Address(str1, result, scale)); - pcmpestri(vec1, Address(str2, result, scale), pcmpmask); - // After pcmpestri cnt1(rcx) contains mismatched element index - - jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 - addptr(result, stride); - subptr(cnt2, stride); - jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); - - // compare wide vectors tail - testl(result, result); - jccb(Assembler::zero, LENGTH_DIFF_LABEL); - - movl(cnt2, stride); - movl(result, stride); - negptr(result); - movdqu(vec1, Address(str1, result, scale)); - pcmpestri(vec1, Address(str2, result, scale), pcmpmask); - jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); - - // Mismatched characters in the vectors - bind(VECTOR_NOT_EQUAL); - addptr(result, cnt1); - movptr(cnt2, result); - load_unsigned_short(result, Address(str1, cnt2, scale)); - load_unsigned_short(cnt1, Address(str2, cnt2, scale)); - subl(result, cnt1); - jmpb(POP_LABEL); - - bind(COMPARE_TAIL); // limit is zero - movl(cnt2, result); - // Fallthru to tail compare - } - - // Shift str2 and str1 to the end of the arrays, negate min - lea(str1, Address(str1, cnt2, scale, 0)); - lea(str2, Address(str2, cnt2, scale, 0)); - negptr(cnt2); - - // Compare the rest of the elements - bind(WHILE_HEAD_LABEL); - load_unsigned_short(result, Address(str1, cnt2, scale, 0)); - load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); - subl(result, cnt1); - jccb(Assembler::notZero, POP_LABEL); - increment(cnt2); - jccb(Assembler::notZero, WHILE_HEAD_LABEL); - - // Strings are equal up to min length. Return the length difference. - bind(LENGTH_DIFF_LABEL); - pop(result); - jmpb(DONE_LABEL); - - // Discard the stored length difference - bind(POP_LABEL); - pop(cnt1); - - // That's it - bind(DONE_LABEL); -} - -// Compare char[] arrays aligned to 4 bytes or substrings. -void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, - Register limit, Register result, Register chr, - XMMRegister vec1, XMMRegister vec2) { - ShortBranchVerifier sbv(this); - Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; - - int length_offset = arrayOopDesc::length_offset_in_bytes(); - int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); - - // Check the input args - cmpptr(ary1, ary2); - jcc(Assembler::equal, TRUE_LABEL); - - if (is_array_equ) { - // Need additional checks for arrays_equals. - testptr(ary1, ary1); - jcc(Assembler::zero, FALSE_LABEL); - testptr(ary2, ary2); - jcc(Assembler::zero, FALSE_LABEL); - - // Check the lengths - movl(limit, Address(ary1, length_offset)); - cmpl(limit, Address(ary2, length_offset)); - jcc(Assembler::notEqual, FALSE_LABEL); - } - - // count == 0 - testl(limit, limit); - jcc(Assembler::zero, TRUE_LABEL); - - if (is_array_equ) { - // Load array address - lea(ary1, Address(ary1, base_offset)); - lea(ary2, Address(ary2, base_offset)); - } - - shll(limit, 1); // byte count != 0 - movl(result, limit); // copy - - if (UseSSE42Intrinsics) { - // With SSE4.2, use double quad vector compare - Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; - - // Compare 16-byte vectors - andl(result, 0x0000000e); // tail count (in bytes) - andl(limit, 0xfffffff0); // vector count (in bytes) - jccb(Assembler::zero, COMPARE_TAIL); - - lea(ary1, Address(ary1, limit, Address::times_1)); - lea(ary2, Address(ary2, limit, Address::times_1)); - negptr(limit); - - bind(COMPARE_WIDE_VECTORS); - movdqu(vec1, Address(ary1, limit, Address::times_1)); - movdqu(vec2, Address(ary2, limit, Address::times_1)); - pxor(vec1, vec2); - - ptest(vec1, vec1); - jccb(Assembler::notZero, FALSE_LABEL); - addptr(limit, 16); - jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); - - testl(result, result); - jccb(Assembler::zero, TRUE_LABEL); - - movdqu(vec1, Address(ary1, result, Address::times_1, -16)); - movdqu(vec2, Address(ary2, result, Address::times_1, -16)); - pxor(vec1, vec2); - - ptest(vec1, vec1); - jccb(Assembler::notZero, FALSE_LABEL); - jmpb(TRUE_LABEL); - - bind(COMPARE_TAIL); // limit is zero - movl(limit, result); - // Fallthru to tail compare - } - - // Compare 4-byte vectors - andl(limit, 0xfffffffc); // vector count (in bytes) - jccb(Assembler::zero, COMPARE_CHAR); - - lea(ary1, Address(ary1, limit, Address::times_1)); - lea(ary2, Address(ary2, limit, Address::times_1)); - negptr(limit); - - bind(COMPARE_VECTORS); - movl(chr, Address(ary1, limit, Address::times_1)); - cmpl(chr, Address(ary2, limit, Address::times_1)); - jccb(Assembler::notEqual, FALSE_LABEL); - addptr(limit, 4); - jcc(Assembler::notZero, COMPARE_VECTORS); - - // Compare trailing char (final 2 bytes), if any - bind(COMPARE_CHAR); - testl(result, 0x2); // tail char - jccb(Assembler::zero, TRUE_LABEL); - load_unsigned_short(chr, Address(ary1, 0)); - load_unsigned_short(limit, Address(ary2, 0)); - cmpl(chr, limit); - jccb(Assembler::notEqual, FALSE_LABEL); - - bind(TRUE_LABEL); - movl(result, 1); // return true - jmpb(DONE); - - bind(FALSE_LABEL); - xorl(result, result); // return false - - // That's it - bind(DONE); -} - -void MacroAssembler::generate_fill(BasicType t, bool aligned, - Register to, Register value, Register count, - Register rtmp, XMMRegister xtmp) { - ShortBranchVerifier sbv(this); - assert_different_registers(to, value, count, rtmp); - Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; - Label L_fill_2_bytes, L_fill_4_bytes; - - int shift = -1; - switch (t) { - case T_BYTE: - shift = 2; - break; - case T_SHORT: - shift = 1; - break; - case T_INT: - shift = 0; - break; - default: ShouldNotReachHere(); - } - - if (t == T_BYTE) { - andl(value, 0xff); - movl(rtmp, value); - shll(rtmp, 8); - orl(value, rtmp); - } - if (t == T_SHORT) { - andl(value, 0xffff); - } - if (t == T_BYTE || t == T_SHORT) { - movl(rtmp, value); - shll(rtmp, 16); - orl(value, rtmp); - } - - cmpl(count, 2<= 2, "supported cpu only" ); - Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; - // Fill 32-byte chunks - movdl(xtmp, value); - pshufd(xtmp, xtmp, 0); - - subl(count, 8 << shift); - jcc(Assembler::less, L_check_fill_8_bytes); - align(16); - - BIND(L_fill_32_bytes_loop); - - if (UseUnalignedLoadStores) { - movdqu(Address(to, 0), xtmp); - movdqu(Address(to, 16), xtmp); - } else { - movq(Address(to, 0), xtmp); - movq(Address(to, 8), xtmp); - movq(Address(to, 16), xtmp); - movq(Address(to, 24), xtmp); - } - - addptr(to, 32); - subl(count, 8 << shift); - jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); - BIND(L_check_fill_8_bytes); - addl(count, 8 << shift); - jccb(Assembler::zero, L_exit); - jmpb(L_fill_8_bytes); - - // - // length is too short, just fill qwords - // - BIND(L_fill_8_bytes_loop); - movq(Address(to, 0), xtmp); - addptr(to, 8); - BIND(L_fill_8_bytes); - subl(count, 1 << (shift + 1)); - jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); - } - } - // fill trailing 4 bytes - BIND(L_fill_4_bytes); - testl(count, 1<cmp8(ExternalAddress((address)flag_addr), value); - _masm->jcc(Assembler::equal, _label); -} - -SkipIfEqual::~SkipIfEqual() { - _masm->bind(_label); -} diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index 8a9bbaf424e..a48aeda8dd3 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -25,6 +25,8 @@ #ifndef CPU_X86_VM_ASSEMBLER_X86_HPP #define CPU_X86_VM_ASSEMBLER_X86_HPP +#include "asm/register.hpp" + class BiasedLockingCounters; // Contains all the definitions needed for x86 assembly code generation. @@ -706,8 +708,6 @@ private: void check_relocation(RelocationHolder const& rspec, int format); #endif - inline void emit_long64(jlong x); - void emit_data(jint data, relocInfo::relocType rtype, int format); void emit_data(jint data, RelocationHolder const& rspec, int format); void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); @@ -916,7 +916,7 @@ private: void cdqq(); - void cld() { emit_byte(0xfc); } + void cld(); void clflush(Address adr); @@ -963,10 +963,7 @@ private: void comiss(XMMRegister dst, XMMRegister src); // Identify processor type and features - void cpuid() { - emit_byte(0x0F); - emit_byte(0xA2); - } + void cpuid(); // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value void cvtsd2ss(XMMRegister dst, XMMRegister src); @@ -1211,11 +1208,7 @@ private: void leaq(Register dst, Address src); - void lfence() { - emit_byte(0x0F); - emit_byte(0xAE); - emit_byte(0xE8); - } + void lfence(); void lock(); @@ -1523,7 +1516,7 @@ private: void sqrtss(XMMRegister dst, Address src); void sqrtss(XMMRegister dst, XMMRegister src); - void std() { emit_byte(0xfd); } + void std(); void stmxcsr( Address dst ); @@ -1580,11 +1573,7 @@ private: void xchgq(Register dst, Register src); // Get Value of Extended Control Register - void xgetbv() { - emit_byte(0x0F); - emit_byte(0x01); - emit_byte(0xD0); - } + void xgetbv(); void xorl(Register dst, int32_t imm32); void xorl(Register dst, Address src); @@ -1781,1114 +1770,4 @@ private: }; - -// MacroAssembler extends Assembler by frequently used macros. -// -// Instructions for which a 'better' code sequence exists depending -// on arguments should also go in here. - -class MacroAssembler: public Assembler { - friend class LIR_Assembler; - friend class Runtime1; // as_Address() - - protected: - - Address as_Address(AddressLiteral adr); - Address as_Address(ArrayAddress adr); - - // Support for VM calls - // - // This is the base routine called by the different versions of call_VM_leaf. The interpreter - // may customize this version by overriding it for its purposes (e.g., to save/restore - // additional registers when doing a VM call). -#ifdef CC_INTERP - // c++ interpreter never wants to use interp_masm version of call_VM - #define VIRTUAL -#else - #define VIRTUAL virtual -#endif - - VIRTUAL void call_VM_leaf_base( - address entry_point, // the entry point - int number_of_arguments // the number of arguments to pop after the call - ); - - // This is the base routine called by the different versions of call_VM. The interpreter - // may customize this version by overriding it for its purposes (e.g., to save/restore - // additional registers when doing a VM call). - // - // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base - // returns the register which contains the thread upon return. If a thread register has been - // specified, the return value will correspond to that register. If no last_java_sp is specified - // (noreg) than rsp will be used instead. - VIRTUAL void call_VM_base( // returns the register containing the thread upon return - Register oop_result, // where an oop-result ends up if any; use noreg otherwise - Register java_thread, // the thread if computed before ; use noreg otherwise - Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise - address entry_point, // the entry point - int number_of_arguments, // the number of arguments (w/o thread) to pop after the call - bool check_exceptions // whether to check for pending exceptions after return - ); - - // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. - // The implementation is only non-empty for the InterpreterMacroAssembler, - // as only the interpreter handles PopFrame and ForceEarlyReturn requests. - virtual void check_and_handle_popframe(Register java_thread); - virtual void check_and_handle_earlyret(Register java_thread); - - void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); - - // helpers for FPU flag access - // tmp is a temporary register, if none is available use noreg - void save_rax (Register tmp); - void restore_rax(Register tmp); - - public: - MacroAssembler(CodeBuffer* code) : Assembler(code) {} - - // Support for NULL-checks - // - // Generates code that causes a NULL OS exception if the content of reg is NULL. - // If the accessed location is M[reg + offset] and the offset is known, provide the - // offset. No explicit code generation is needed if the offset is within a certain - // range (0 <= offset <= page_size). - - void null_check(Register reg, int offset = -1); - static bool needs_explicit_null_check(intptr_t offset); - - // Required platform-specific helpers for Label::patch_instructions. - // They _shadow_ the declarations in AbstractAssembler, which are undefined. - void pd_patch_instruction(address branch, address target); -#ifndef PRODUCT - static void pd_print_patched_instruction(address branch); -#endif - - // The following 4 methods return the offset of the appropriate move instruction - - // Support for fast byte/short loading with zero extension (depending on particular CPU) - int load_unsigned_byte(Register dst, Address src); - int load_unsigned_short(Register dst, Address src); - - // Support for fast byte/short loading with sign extension (depending on particular CPU) - int load_signed_byte(Register dst, Address src); - int load_signed_short(Register dst, Address src); - - // Support for sign-extension (hi:lo = extend_sign(lo)) - void extend_sign(Register hi, Register lo); - - // Load and store values by size and signed-ness - void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); - void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); - - // Support for inc/dec with optimal instruction selection depending on value - - void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } - void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } - - void decrementl(Address dst, int value = 1); - void decrementl(Register reg, int value = 1); - - void decrementq(Register reg, int value = 1); - void decrementq(Address dst, int value = 1); - - void incrementl(Address dst, int value = 1); - void incrementl(Register reg, int value = 1); - - void incrementq(Register reg, int value = 1); - void incrementq(Address dst, int value = 1); - - - // Support optimal SSE move instructions. - void movflt(XMMRegister dst, XMMRegister src) { - if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } - else { movss (dst, src); return; } - } - void movflt(XMMRegister dst, Address src) { movss(dst, src); } - void movflt(XMMRegister dst, AddressLiteral src); - void movflt(Address dst, XMMRegister src) { movss(dst, src); } - - void movdbl(XMMRegister dst, XMMRegister src) { - if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } - else { movsd (dst, src); return; } - } - - void movdbl(XMMRegister dst, AddressLiteral src); - - void movdbl(XMMRegister dst, Address src) { - if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } - else { movlpd(dst, src); return; } - } - void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } - - void incrementl(AddressLiteral dst); - void incrementl(ArrayAddress dst); - - // Alignment - void align(int modulus); - - // A 5 byte nop that is safe for patching (see patch_verified_entry) - void fat_nop(); - - // Stack frame creation/removal - void enter(); - void leave(); - - // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) - // The pointer will be loaded into the thread register. - void get_thread(Register thread); - - - // Support for VM calls - // - // It is imperative that all calls into the VM are handled via the call_VM macros. - // They make sure that the stack linkage is setup correctly. call_VM's correspond - // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. - - - void call_VM(Register oop_result, - address entry_point, - bool check_exceptions = true); - void call_VM(Register oop_result, - address entry_point, - Register arg_1, - bool check_exceptions = true); - void call_VM(Register oop_result, - address entry_point, - Register arg_1, Register arg_2, - bool check_exceptions = true); - void call_VM(Register oop_result, - address entry_point, - Register arg_1, Register arg_2, Register arg_3, - bool check_exceptions = true); - - // Overloadings with last_Java_sp - void call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - int number_of_arguments = 0, - bool check_exceptions = true); - void call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, bool - check_exceptions = true); - void call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, Register arg_2, - bool check_exceptions = true); - void call_VM(Register oop_result, - Register last_java_sp, - address entry_point, - Register arg_1, Register arg_2, Register arg_3, - bool check_exceptions = true); - - void get_vm_result (Register oop_result, Register thread); - void get_vm_result_2(Register metadata_result, Register thread); - - // These always tightly bind to MacroAssembler::call_VM_base - // bypassing the virtual implementation - void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); - void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); - void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); - void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); - void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); - - void call_VM_leaf(address entry_point, - int number_of_arguments = 0); - void call_VM_leaf(address entry_point, - Register arg_1); - void call_VM_leaf(address entry_point, - Register arg_1, Register arg_2); - void call_VM_leaf(address entry_point, - Register arg_1, Register arg_2, Register arg_3); - - // These always tightly bind to MacroAssembler::call_VM_leaf_base - // bypassing the virtual implementation - void super_call_VM_leaf(address entry_point); - void super_call_VM_leaf(address entry_point, Register arg_1); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); - void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); - - // last Java Frame (fills frame anchor) - void set_last_Java_frame(Register thread, - Register last_java_sp, - Register last_java_fp, - address last_java_pc); - - // thread in the default location (r15_thread on 64bit) - void set_last_Java_frame(Register last_java_sp, - Register last_java_fp, - address last_java_pc); - - void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc); - - // thread in the default location (r15_thread on 64bit) - void reset_last_Java_frame(bool clear_fp, bool clear_pc); - - // Stores - void store_check(Register obj); // store check for obj - register is destroyed afterwards - void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) - -#ifndef SERIALGC - - void g1_write_barrier_pre(Register obj, - Register pre_val, - Register thread, - Register tmp, - bool tosca_live, - bool expand_call); - - void g1_write_barrier_post(Register store_addr, - Register new_val, - Register thread, - Register tmp, - Register tmp2); - -#endif // SERIALGC - - // split store_check(Register obj) to enhance instruction interleaving - void store_check_part_1(Register obj); - void store_check_part_2(Register obj); - - // C 'boolean' to Java boolean: x == 0 ? 0 : 1 - void c2bool(Register x); - - // C++ bool manipulation - - void movbool(Register dst, Address src); - void movbool(Address dst, bool boolconst); - void movbool(Address dst, Register src); - void testbool(Register dst); - - // oop manipulations - void load_klass(Register dst, Register src); - void store_klass(Register dst, Register src); - - void load_heap_oop(Register dst, Address src); - void load_heap_oop_not_null(Register dst, Address src); - void store_heap_oop(Address dst, Register src); - void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg); - - // Used for storing NULL. All other oop constants should be - // stored using routines that take a jobject. - void store_heap_oop_null(Address dst); - - void load_prototype_header(Register dst, Register src); - -#ifdef _LP64 - void store_klass_gap(Register dst, Register src); - - // This dummy is to prevent a call to store_heap_oop from - // converting a zero (like NULL) into a Register by giving - // the compiler two choices it can't resolve - - void store_heap_oop(Address dst, void* dummy); - - void encode_heap_oop(Register r); - void decode_heap_oop(Register r); - void encode_heap_oop_not_null(Register r); - void decode_heap_oop_not_null(Register r); - void encode_heap_oop_not_null(Register dst, Register src); - void decode_heap_oop_not_null(Register dst, Register src); - - void set_narrow_oop(Register dst, jobject obj); - void set_narrow_oop(Address dst, jobject obj); - void cmp_narrow_oop(Register dst, jobject obj); - void cmp_narrow_oop(Address dst, jobject obj); - - void encode_klass_not_null(Register r); - void decode_klass_not_null(Register r); - void encode_klass_not_null(Register dst, Register src); - void decode_klass_not_null(Register dst, Register src); - void set_narrow_klass(Register dst, Klass* k); - void set_narrow_klass(Address dst, Klass* k); - void cmp_narrow_klass(Register dst, Klass* k); - void cmp_narrow_klass(Address dst, Klass* k); - - // if heap base register is used - reinit it with the correct value - void reinit_heapbase(); - - DEBUG_ONLY(void verify_heapbase(const char* msg);) - -#endif // _LP64 - - // Int division/remainder for Java - // (as idivl, but checks for special case as described in JVM spec.) - // returns idivl instruction offset for implicit exception handling - int corrected_idivl(Register reg); - - // Long division/remainder for Java - // (as idivq, but checks for special case as described in JVM spec.) - // returns idivq instruction offset for implicit exception handling - int corrected_idivq(Register reg); - - void int3(); - - // Long operation macros for a 32bit cpu - // Long negation for Java - void lneg(Register hi, Register lo); - - // Long multiplication for Java - // (destroys contents of eax, ebx, ecx and edx) - void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y - - // Long shifts for Java - // (semantics as described in JVM spec.) - void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) - void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) - - // Long compare for Java - // (semantics as described in JVM spec.) - void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) - - - // misc - - // Sign extension - void sign_extend_short(Register reg); - void sign_extend_byte(Register reg); - - // Division by power of 2, rounding towards 0 - void division_with_shift(Register reg, int shift_value); - - // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: - // - // CF (corresponds to C0) if x < y - // PF (corresponds to C2) if unordered - // ZF (corresponds to C3) if x = y - // - // The arguments are in reversed order on the stack (i.e., top of stack is first argument). - // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) - void fcmp(Register tmp); - // Variant of the above which allows y to be further down the stack - // and which only pops x and y if specified. If pop_right is - // specified then pop_left must also be specified. - void fcmp(Register tmp, int index, bool pop_left, bool pop_right); - - // Floating-point comparison for Java - // Compares the top-most stack entries on the FPU stack and stores the result in dst. - // The arguments are in reversed order on the stack (i.e., top of stack is first argument). - // (semantics as described in JVM spec.) - void fcmp2int(Register dst, bool unordered_is_less); - // Variant of the above which allows y to be further down the stack - // and which only pops x and y if specified. If pop_right is - // specified then pop_left must also be specified. - void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); - - // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) - // tmp is a temporary register, if none is available use noreg - void fremr(Register tmp); - - - // same as fcmp2int, but using SSE2 - void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); - void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); - - // Inlined sin/cos generator for Java; must not use CPU instruction - // directly on Intel as it does not have high enough precision - // outside of the range [-pi/4, pi/4]. Extra argument indicate the - // number of FPU stack slots in use; all but the topmost will - // require saving if a slow case is necessary. Assumes argument is - // on FP TOS; result is on FP TOS. No cpu registers are changed by - // this code. - void trigfunc(char trig, int num_fpu_regs_in_use = 1); - - // branch to L if FPU flag C2 is set/not set - // tmp is a temporary register, if none is available use noreg - void jC2 (Register tmp, Label& L); - void jnC2(Register tmp, Label& L); - - // Pop ST (ffree & fincstp combined) - void fpop(); - - // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack - void push_fTOS(); - - // pops double TOS element from CPU stack and pushes on FPU stack - void pop_fTOS(); - - void empty_FPU_stack(); - - void push_IU_state(); - void pop_IU_state(); - - void push_FPU_state(); - void pop_FPU_state(); - - void push_CPU_state(); - void pop_CPU_state(); - - // Round up to a power of two - void round_to(Register reg, int modulus); - - // Callee saved registers handling - void push_callee_saved_registers(); - void pop_callee_saved_registers(); - - // allocation - void eden_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Label& slow_case // continuation point if fast allocation fails - ); - void tlab_allocate( - Register obj, // result: pointer to object after successful allocation - Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise - int con_size_in_bytes, // object size in bytes if known at compile time - Register t1, // temp register - Register t2, // temp register - Label& slow_case // continuation point if fast allocation fails - ); - Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address - void incr_allocated_bytes(Register thread, - Register var_size_in_bytes, int con_size_in_bytes, - Register t1 = noreg); - - // interface method calling - void lookup_interface_method(Register recv_klass, - Register intf_klass, - RegisterOrConstant itable_index, - Register method_result, - Register scan_temp, - Label& no_such_interface); - - // virtual method calling - void lookup_virtual_method(Register recv_klass, - RegisterOrConstant vtable_index, - Register method_result); - - // Test sub_klass against super_klass, with fast and slow paths. - - // The fast path produces a tri-state answer: yes / no / maybe-slow. - // One of the three labels can be NULL, meaning take the fall-through. - // If super_check_offset is -1, the value is loaded up from super_klass. - // No registers are killed, except temp_reg. - void check_klass_subtype_fast_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Label* L_success, - Label* L_failure, - Label* L_slow_path, - RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); - - // The rest of the type check; must be wired to a corresponding fast path. - // It does not repeat the fast path logic, so don't use it standalone. - // The temp_reg and temp2_reg can be noreg, if no temps are available. - // Updates the sub's secondary super cache as necessary. - // If set_cond_codes, condition codes will be Z on success, NZ on failure. - void check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - bool set_cond_codes = false); - - // Simplified, combined version, good for typical uses. - // Falls through on failure. - void check_klass_subtype(Register sub_klass, - Register super_klass, - Register temp_reg, - Label& L_success); - - // method handles (JSR 292) - Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); - - //---- - void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 - - // Debugging - - // only if +VerifyOops - // TODO: Make these macros with file and line like sparc version! - void verify_oop(Register reg, const char* s = "broken oop"); - void verify_oop_addr(Address addr, const char * s = "broken oop addr"); - - // TODO: verify method and klass metadata (compare against vptr?) - void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} - void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} - -#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) -#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) - - // only if +VerifyFPU - void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); - - // prints msg, dumps registers and stops execution - void stop(const char* msg); - - // prints msg and continues - void warn(const char* msg); - - // dumps registers and other state - void print_state(); - - static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); - static void debug64(char* msg, int64_t pc, int64_t regs[]); - static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); - static void print_state64(int64_t pc, int64_t regs[]); - - void os_breakpoint(); - - void untested() { stop("untested"); } - - void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } - - void should_not_reach_here() { stop("should not reach here"); } - - void print_CPU_state(); - - // Stack overflow checking - void bang_stack_with_offset(int offset) { - // stack grows down, caller passes positive offset - assert(offset > 0, "must bang with negative offset"); - movl(Address(rsp, (-offset)), rax); - } - - // Writes to stack successive pages until offset reached to check for - // stack overflow + shadow pages. Also, clobbers tmp - void bang_stack_size(Register size, Register tmp); - - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset); - - // Support for serializing memory accesses between threads - void serialize_memory(Register thread, Register tmp); - - void verify_tlab(); - - // Biased locking support - // lock_reg and obj_reg must be loaded up with the appropriate values. - // swap_reg must be rax, and is killed. - // tmp_reg is optional. If it is supplied (i.e., != noreg) it will - // be killed; if not supplied, push/pop will be used internally to - // allocate a temporary (inefficient, avoid if possible). - // Optional slow case is for implementations (interpreter and C1) which branch to - // slow case directly. Leaves condition codes set for C2's Fast_Lock node. - // Returns offset of first potentially-faulting instruction for null - // check info (currently consumed only by C1). If - // swap_reg_contains_mark is true then returns -1 as it is assumed - // the calling code has already passed any potential faults. - int biased_locking_enter(Register lock_reg, Register obj_reg, - Register swap_reg, Register tmp_reg, - bool swap_reg_contains_mark, - Label& done, Label* slow_case = NULL, - BiasedLockingCounters* counters = NULL); - void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); - - - Condition negate_condition(Condition cond); - - // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit - // operands. In general the names are modified to avoid hiding the instruction in Assembler - // so that we don't need to implement all the varieties in the Assembler with trivial wrappers - // here in MacroAssembler. The major exception to this rule is call - - // Arithmetics - - - void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } - void addptr(Address dst, Register src); - - void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } - void addptr(Register dst, int32_t src); - void addptr(Register dst, Register src); - void addptr(Register dst, RegisterOrConstant src) { - if (src.is_constant()) addptr(dst, (int) src.as_constant()); - else addptr(dst, src.as_register()); - } - - void andptr(Register dst, int32_t src); - void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } - - void cmp8(AddressLiteral src1, int imm); - - // renamed to drag out the casting of address to int32_t/intptr_t - void cmp32(Register src1, int32_t imm); - - void cmp32(AddressLiteral src1, int32_t imm); - // compare reg - mem, or reg - &mem - void cmp32(Register src1, AddressLiteral src2); - - void cmp32(Register src1, Address src2); - -#ifndef _LP64 - void cmpklass(Address dst, Metadata* obj); - void cmpklass(Register dst, Metadata* obj); - void cmpoop(Address dst, jobject obj); - void cmpoop(Register dst, jobject obj); -#endif // _LP64 - - // NOTE src2 must be the lval. This is NOT an mem-mem compare - void cmpptr(Address src1, AddressLiteral src2); - - void cmpptr(Register src1, AddressLiteral src2); - - void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - - void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - - // cmp64 to avoild hiding cmpq - void cmp64(Register src1, AddressLiteral src); - - void cmpxchgptr(Register reg, Address adr); - - void locked_cmpxchgptr(Register reg, AddressLiteral adr); - - - void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } - - - void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } - - void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } - - void shlptr(Register dst, int32_t shift); - void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } - - void shrptr(Register dst, int32_t shift); - void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } - - void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } - void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } - - void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } - - void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } - void subptr(Register dst, int32_t src); - // Force generation of a 4 byte immediate value even if it fits into 8bit - void subptr_imm32(Register dst, int32_t src); - void subptr(Register dst, Register src); - void subptr(Register dst, RegisterOrConstant src) { - if (src.is_constant()) subptr(dst, (int) src.as_constant()); - else subptr(dst, src.as_register()); - } - - void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } - void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } - - void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } - void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } - - void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } - - - - // Helper functions for statistics gathering. - // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. - void cond_inc32(Condition cond, AddressLiteral counter_addr); - // Unconditional atomic increment. - void atomic_incl(AddressLiteral counter_addr); - - void lea(Register dst, AddressLiteral adr); - void lea(Address dst, AddressLiteral adr); - void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } - - void leal32(Register dst, Address src) { leal(dst, src); } - - // Import other testl() methods from the parent class or else - // they will be hidden by the following overriding declaration. - using Assembler::testl; - void testl(Register dst, AddressLiteral src); - - void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - - void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } - void testptr(Register src1, Register src2); - - void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } - void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } - - // Calls - - void call(Label& L, relocInfo::relocType rtype); - void call(Register entry); - - // NOTE: this call tranfers to the effective address of entry NOT - // the address contained by entry. This is because this is more natural - // for jumps/calls. - void call(AddressLiteral entry); - - // Emit the CompiledIC call idiom - void ic_call(address entry); - - // Jumps - - // NOTE: these jumps tranfer to the effective address of dst NOT - // the address contained by dst. This is because this is more natural - // for jumps/calls. - void jump(AddressLiteral dst); - void jump_cc(Condition cc, AddressLiteral dst); - - // 32bit can do a case table jump in one instruction but we no longer allow the base - // to be installed in the Address class. This jump will tranfers to the address - // contained in the location described by entry (not the address of entry) - void jump(ArrayAddress entry); - - // Floating - - void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } - void andpd(XMMRegister dst, AddressLiteral src); - - void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } - void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } - void andps(XMMRegister dst, AddressLiteral src); - - void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } - void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } - void comiss(XMMRegister dst, AddressLiteral src); - - void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } - void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } - void comisd(XMMRegister dst, AddressLiteral src); - - void fadd_s(Address src) { Assembler::fadd_s(src); } - void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } - - void fldcw(Address src) { Assembler::fldcw(src); } - void fldcw(AddressLiteral src); - - void fld_s(int index) { Assembler::fld_s(index); } - void fld_s(Address src) { Assembler::fld_s(src); } - void fld_s(AddressLiteral src); - - void fld_d(Address src) { Assembler::fld_d(src); } - void fld_d(AddressLiteral src); - - void fld_x(Address src) { Assembler::fld_x(src); } - void fld_x(AddressLiteral src); - - void fmul_s(Address src) { Assembler::fmul_s(src); } - void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } - - void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } - void ldmxcsr(AddressLiteral src); - - // compute pow(x,y) and exp(x) with x86 instructions. Don't cover - // all corner cases and may result in NaN and require fallback to a - // runtime call. - void fast_pow(); - void fast_exp(); - void increase_precision(); - void restore_precision(); - - // computes exp(x). Fallback to runtime call included. - void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); } - // computes pow(x,y). Fallback to runtime call included. - void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); } - -private: - - // call runtime as a fallback for trig functions and pow/exp. - void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use); - - // computes 2^(Ylog2X); Ylog2X in ST(0) - void pow_exp_core_encoding(); - - // computes pow(x,y) or exp(x). Fallback to runtime call included. - void pow_or_exp(bool is_exp, int num_fpu_regs_in_use); - - // these are private because users should be doing movflt/movdbl - - void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } - void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } - void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } - void movss(XMMRegister dst, AddressLiteral src); - - void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } - void movlpd(XMMRegister dst, AddressLiteral src); - -public: - - void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } - void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } - void addsd(XMMRegister dst, AddressLiteral src); - - void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } - void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } - void addss(XMMRegister dst, AddressLiteral src); - - void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } - void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } - void divsd(XMMRegister dst, AddressLiteral src); - - void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } - void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } - void divss(XMMRegister dst, AddressLiteral src); - - // Move Unaligned Double Quadword - void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } - void movdqu(XMMRegister dst, AddressLiteral src); - - void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } - void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } - void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } - void movsd(XMMRegister dst, AddressLiteral src); - - void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } - void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } - void mulsd(XMMRegister dst, AddressLiteral src); - - void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } - void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } - void mulss(XMMRegister dst, AddressLiteral src); - - void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } - void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } - void sqrtsd(XMMRegister dst, AddressLiteral src); - - void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } - void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } - void sqrtss(XMMRegister dst, AddressLiteral src); - - void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } - void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } - void subsd(XMMRegister dst, AddressLiteral src); - - void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } - void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } - void subss(XMMRegister dst, AddressLiteral src); - - void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } - void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } - void ucomiss(XMMRegister dst, AddressLiteral src); - - void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } - void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } - void ucomisd(XMMRegister dst, AddressLiteral src); - - // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values - void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } - void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } - void xorpd(XMMRegister dst, AddressLiteral src); - - // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values - void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } - void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } - void xorps(XMMRegister dst, AddressLiteral src); - - // Shuffle Bytes - void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } - void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } - void pshufb(XMMRegister dst, AddressLiteral src); - // AVX 3-operands instructions - - void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } - void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } - void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } - void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } - void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } - void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } - void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); - - void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } - void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } - void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); - - void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } - void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } - void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } - void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } - void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } - void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } - void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } - void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } - void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } - void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } - void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } - void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } - void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); - - // AVX Vector instructions - - void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } - void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } - void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); - - void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } - void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } - void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); - - void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { - if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 - Assembler::vpxor(dst, nds, src, vector256); - else - Assembler::vxorpd(dst, nds, src, vector256); - } - void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { - if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 - Assembler::vpxor(dst, nds, src, vector256); - else - Assembler::vxorpd(dst, nds, src, vector256); - } - - // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. - void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { - if (UseAVX > 1) // vinserti128h is available only in AVX2 - Assembler::vinserti128h(dst, nds, src); - else - Assembler::vinsertf128h(dst, nds, src); - } - - // Data - - void cmov32( Condition cc, Register dst, Address src); - void cmov32( Condition cc, Register dst, Register src); - - void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } - - void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } - void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } - - void movoop(Register dst, jobject obj); - void movoop(Address dst, jobject obj); - - void mov_metadata(Register dst, Metadata* obj); - void mov_metadata(Address dst, Metadata* obj); - - void movptr(ArrayAddress dst, Register src); - // can this do an lea? - void movptr(Register dst, ArrayAddress src); - - void movptr(Register dst, Address src); - - void movptr(Register dst, AddressLiteral src); - - void movptr(Register dst, intptr_t src); - void movptr(Register dst, Register src); - void movptr(Address dst, intptr_t src); - - void movptr(Address dst, Register src); - - void movptr(Register dst, RegisterOrConstant src) { - if (src.is_constant()) movptr(dst, src.as_constant()); - else movptr(dst, src.as_register()); - } - -#ifdef _LP64 - // Generally the next two are only used for moving NULL - // Although there are situations in initializing the mark word where - // they could be used. They are dangerous. - - // They only exist on LP64 so that int32_t and intptr_t are not the same - // and we have ambiguous declarations. - - void movptr(Address dst, int32_t imm32); - void movptr(Register dst, int32_t imm32); -#endif // _LP64 - - // to avoid hiding movl - void mov32(AddressLiteral dst, Register src); - void mov32(Register dst, AddressLiteral src); - - // to avoid hiding movb - void movbyte(ArrayAddress dst, int src); - - // Import other mov() methods from the parent class or else - // they will be hidden by the following overriding declaration. - using Assembler::movdl; - using Assembler::movq; - void movdl(XMMRegister dst, AddressLiteral src); - void movq(XMMRegister dst, AddressLiteral src); - - // Can push value or effective address - void pushptr(AddressLiteral src); - - void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } - void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } - - void pushoop(jobject obj); - void pushklass(Metadata* obj); - - // sign extend as need a l to ptr sized element - void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } - void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } - - // C2 compiled method's prolog code. - void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b); - - // IndexOf strings. - // Small strings are loaded through stack if they cross page boundary. - void string_indexof(Register str1, Register str2, - Register cnt1, Register cnt2, - int int_cnt2, Register result, - XMMRegister vec, Register tmp); - - // IndexOf for constant substrings with size >= 8 elements - // which don't need to be loaded through stack. - void string_indexofC8(Register str1, Register str2, - Register cnt1, Register cnt2, - int int_cnt2, Register result, - XMMRegister vec, Register tmp); - - // Smallest code: we don't need to load through stack, - // check string tail. - - // Compare strings. - void string_compare(Register str1, Register str2, - Register cnt1, Register cnt2, Register result, - XMMRegister vec1); - - // Compare char[] arrays. - void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, - Register limit, Register result, Register chr, - XMMRegister vec1, XMMRegister vec2); - - // Fill primitive arrays - void generate_fill(BasicType t, bool aligned, - Register to, Register value, Register count, - Register rtmp, XMMRegister xtmp); - -#undef VIRTUAL - -}; - -/** - * class SkipIfEqual: - * - * Instantiating this class will result in assembly code being output that will - * jump around any code emitted between the creation of the instance and it's - * automatic destruction at the end of a scope block, depending on the value of - * the flag passed to the constructor, which will be checked at run-time. - */ -class SkipIfEqual { - private: - MacroAssembler* _masm; - Label _label; - - public: - SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); - ~SkipIfEqual(); -}; - -#ifdef ASSERT -inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } -#endif - #endif // CPU_X86_VM_ASSEMBLER_X86_HPP diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp index bf299c6da8c..7dbb0950712 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.inline.hpp @@ -28,48 +28,6 @@ #include "asm/assembler.inline.hpp" #include "asm/codeBuffer.hpp" #include "code/codeCache.hpp" -#include "runtime/handles.inline.hpp" - -inline void MacroAssembler::pd_patch_instruction(address branch, address target) { - unsigned char op = branch[0]; - assert(op == 0xE8 /* call */ || - op == 0xE9 /* jmp */ || - op == 0xEB /* short jmp */ || - (op & 0xF0) == 0x70 /* short jcc */ || - op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */, - "Invalid opcode at patch point"); - - if (op == 0xEB || (op & 0xF0) == 0x70) { - // short offset operators (jmp and jcc) - char* disp = (char*) &branch[1]; - int imm8 = target - (address) &disp[1]; - guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset"); - *disp = imm8; - } else { - int* disp = (int*) &branch[(op == 0x0F)? 2: 1]; - int imm32 = target - (address) &disp[1]; - *disp = imm32; - } -} - -#ifndef PRODUCT -inline void MacroAssembler::pd_print_patched_instruction(address branch) { - const char* s; - unsigned char op = branch[0]; - if (op == 0xE8) { - s = "call"; - } else if (op == 0xE9 || op == 0xEB) { - s = "jmp"; - } else if ((op & 0xF0) == 0x70) { - s = "jcc"; - } else if (op == 0x0F) { - s = "jcc"; - } else { - s = "????"; - } - tty->print("%s (unresolved)", s); -} -#endif // ndef PRODUCT #ifndef _LP64 inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; } @@ -87,12 +45,6 @@ inline void Assembler::prefixq(Address adr, Register reg) {} inline void Assembler::prefix(Address adr, XMMRegister reg) {} inline void Assembler::prefixq(Address adr, XMMRegister reg) {} -#else -inline void Assembler::emit_long64(jlong x) { - *(jlong*) _code_pos = x; - _code_pos += sizeof(jlong); - code_section()->set_end(_code_pos); -} #endif // _LP64 #endif // CPU_X86_VM_ASSEMBLER_X86_INLINE_HPP diff --git a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp index 684ae866e9c..83146761cd2 100644 --- a/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "c1/c1_Compilation.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" diff --git a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp index 64a9463f868..946c400e590 100644 --- a/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp +++ b/hotspot/src/cpu/x86/vm/cppInterpreter_x86.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/cppInterpreter.hpp" #include "interpreter/interpreter.hpp" diff --git a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp index 55459c09d54..ea8111b9fd7 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp @@ -25,6 +25,8 @@ #ifndef CPU_X86_VM_FRAME_X86_INLINE_HPP #define CPU_X86_VM_FRAME_X86_INLINE_HPP +#include "code/codeCache.hpp" + // Inline functions for Intel frames: // Constructors: diff --git a/hotspot/src/cpu/x86/vm/icBuffer_x86.cpp b/hotspot/src/cpu/x86/vm/icBuffer_x86.cpp index ede401d4d9e..62ecc447a13 100644 --- a/hotspot/src/cpu/x86/vm/icBuffer_x86.cpp +++ b/hotspot/src/cpu/x86/vm/icBuffer_x86.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/icBuffer.hpp" #include "gc_interface/collectedHeap.inline.hpp" #include "interpreter/bytecodes.hpp" diff --git a/hotspot/src/cpu/x86/vm/icache_x86.cpp b/hotspot/src/cpu/x86/vm/icache_x86.cpp index 91d4f4da1c1..b9ec2f6d186 100644 --- a/hotspot/src/cpu/x86/vm/icache_x86.cpp +++ b/hotspot/src/cpu/x86/vm/icache_x86.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "runtime/icache.hpp" #define __ _masm-> diff --git a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp index 7f8463a9a32..6dada56de0d 100644 --- a/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_32.hpp @@ -25,8 +25,10 @@ #ifndef CPU_X86_VM_INTERP_MASM_X86_32_HPP #define CPU_X86_VM_INTERP_MASM_X86_32_HPP -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" // This file specializes the assember with interpreter-specific macros diff --git a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp index eb8c4f0a453..66a001366ff 100644 --- a/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86_64.hpp @@ -25,8 +25,10 @@ #ifndef CPU_X86_VM_INTERP_MASM_X86_64_HPP #define CPU_X86_VM_INTERP_MASM_X86_64_HPP -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/invocationCounter.hpp" +#include "runtime/frame.hpp" // This file specializes the assember with interpreter-specific macros diff --git a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp index bed8137e4d5..865801e3de7 100644 --- a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp index 3b0a6b445ea..bc5229b997d 100644 --- a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/x86/vm/jniFastGetField_x86_32.cpp b/hotspot/src/cpu/x86/vm/jniFastGetField_x86_32.cpp index 57edd9b195b..e4fb943a925 100644 --- a/hotspot/src/cpu/x86/vm/jniFastGetField_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/jniFastGetField_x86_32.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "memory/resourceArea.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" diff --git a/hotspot/src/cpu/x86/vm/jniFastGetField_x86_64.cpp b/hotspot/src/cpu/x86/vm/jniFastGetField_x86_64.cpp index 8b0d2e6fa6f..1f523c7621d 100644 --- a/hotspot/src/cpu/x86/vm/jniFastGetField_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/jniFastGetField_x86_64.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "memory/resourceArea.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm_misc.hpp" diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp new file mode 100644 index 00000000000..fa2d7fa4376 --- /dev/null +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -0,0 +1,6099 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "compiler/disassembler.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#ifndef SERIALGC +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#define STOP(error) stop(error) +#else +#define BLOCK_COMMENT(str) block_comment(str) +#define STOP(error) block_comment(error); stop(error) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + + +#ifdef ASSERT +bool AbstractAssembler::pd_check_instruction_mark() { return true; } +#endif + +static Assembler::Condition reverse[] = { + Assembler::noOverflow /* overflow = 0x0 */ , + Assembler::overflow /* noOverflow = 0x1 */ , + Assembler::aboveEqual /* carrySet = 0x2, below = 0x2 */ , + Assembler::below /* aboveEqual = 0x3, carryClear = 0x3 */ , + Assembler::notZero /* zero = 0x4, equal = 0x4 */ , + Assembler::zero /* notZero = 0x5, notEqual = 0x5 */ , + Assembler::above /* belowEqual = 0x6 */ , + Assembler::belowEqual /* above = 0x7 */ , + Assembler::positive /* negative = 0x8 */ , + Assembler::negative /* positive = 0x9 */ , + Assembler::noParity /* parity = 0xa */ , + Assembler::parity /* noParity = 0xb */ , + Assembler::greaterEqual /* less = 0xc */ , + Assembler::less /* greaterEqual = 0xd */ , + Assembler::greater /* lessEqual = 0xe */ , + Assembler::lessEqual /* greater = 0xf, */ + +}; + + +// Implementation of MacroAssembler + +// First all the versions that have distinct versions depending on 32/64 bit +// Unless the difference is trivial (1 line or so). + +#ifndef _LP64 + +// 32bit versions + +Address MacroAssembler::as_Address(AddressLiteral adr) { + return Address(adr.target(), adr.rspec()); +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + return Address::make_array(adr); +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert(swap_reg == rax, "swap_reg must be rax, for cmpxchg"); + assert_different_registers(lock_reg, obj_reg, swap_reg); + + if (PrintBiasedLockingStatistics && counters == NULL) + counters = BiasedLocking::counters(); + + bool need_tmp_reg = false; + if (tmp_reg == noreg) { + need_tmp_reg = true; + tmp_reg = lock_reg; + } else { + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); + } + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address klass_addr (obj_reg, oopDesc::klass_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + movl(swap_reg, mark_addr); + } + if (need_tmp_reg) { + push(tmp_reg); + } + movl(tmp_reg, swap_reg); + andl(tmp_reg, markOopDesc::biased_lock_mask_in_place); + cmpl(tmp_reg, markOopDesc::biased_lock_pattern); + if (need_tmp_reg) { + pop(tmp_reg); + } + jcc(Assembler::notEqual, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + // Note that because there is no current thread register on x86 we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and + // simpler. + movl(saved_mark_addr, swap_reg); + if (need_tmp_reg) { + push(tmp_reg); + } + get_thread(tmp_reg); + xorl(swap_reg, tmp_reg); + if (swap_reg_contains_mark) { + null_check_offset = offset(); + } + movl(tmp_reg, klass_addr); + xorl(swap_reg, Address(tmp_reg, Klass::prototype_header_offset())); + andl(swap_reg, ~((int) markOopDesc::age_mask_in_place)); + if (need_tmp_reg) { + pop(tmp_reg); + } + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address)counters->biased_lock_entry_count_addr())); + } + jcc(Assembler::equal, done); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + testl(swap_reg, markOopDesc::biased_lock_mask_in_place); + jcc(Assembler::notZero, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + testl(swap_reg, markOopDesc::epoch_mask_in_place); + jcc(Assembler::notZero, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + movl(swap_reg, saved_mark_addr); + andl(swap_reg, + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + if (need_tmp_reg) { + push(tmp_reg); + } + get_thread(tmp_reg); + orl(tmp_reg, swap_reg); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, Address(obj_reg, 0)); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address)counters->anonymously_biased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + if (need_tmp_reg) { + push(tmp_reg); + } + get_thread(tmp_reg); + movl(swap_reg, klass_addr); + orl(tmp_reg, Address(swap_reg, Klass::prototype_header_offset())); + movl(swap_reg, saved_mark_addr); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, Address(obj_reg, 0)); + if (need_tmp_reg) { + pop(tmp_reg); + } + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address)counters->rebiased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + movl(swap_reg, saved_mark_addr); + if (need_tmp_reg) { + push(tmp_reg); + } + movl(tmp_reg, klass_addr); + movl(tmp_reg, Address(tmp_reg, Klass::prototype_header_offset())); + if (os::is_MP()) { + lock(); + } + cmpxchgptr(tmp_reg, Address(obj_reg, 0)); + if (need_tmp_reg) { + pop(tmp_reg); + } + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address)counters->revoked_lock_entry_count_addr())); + } + + bind(cas_label); + + return null_check_offset; +} +void MacroAssembler::call_VM_leaf_base(address entry_point, + int number_of_arguments) { + call(RuntimeAddress(entry_point)); + increment(rsp, number_of_arguments * wordSize); +} + +void MacroAssembler::cmpklass(Address src1, Metadata* obj) { + cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::cmpklass(Register src1, Metadata* obj) { + cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::cmpoop(Address src1, jobject obj) { + cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::cmpoop(Register src1, jobject obj) { + cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::extend_sign(Register hi, Register lo) { + // According to Intel Doc. AP-526, "Integer Divide", p.18. + if (VM_Version::is_P6() && hi == rdx && lo == rax) { + cdql(); + } else { + movl(hi, lo); + sarl(hi, 31); + } +} + +void MacroAssembler::jC2(Register tmp, Label& L) { + // set parity bit if FPU flag C2 is set (via rax) + save_rax(tmp); + fwait(); fnstsw_ax(); + sahf(); + restore_rax(tmp); + // branch + jcc(Assembler::parity, L); +} + +void MacroAssembler::jnC2(Register tmp, Label& L) { + // set parity bit if FPU flag C2 is set (via rax) + save_rax(tmp); + fwait(); fnstsw_ax(); + sahf(); + restore_rax(tmp); + // branch + jcc(Assembler::noParity, L); +} + +// 32bit can do a case table jump in one instruction but we no longer allow the base +// to be installed in the Address class +void MacroAssembler::jump(ArrayAddress entry) { + jmp(as_Address(entry)); +} + +// Note: y_lo will be destroyed +void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { + // Long compare for Java (semantics as described in JVM spec.) + Label high, low, done; + + cmpl(x_hi, y_hi); + jcc(Assembler::less, low); + jcc(Assembler::greater, high); + // x_hi is the return register + xorl(x_hi, x_hi); + cmpl(x_lo, y_lo); + jcc(Assembler::below, low); + jcc(Assembler::equal, done); + + bind(high); + xorl(x_hi, x_hi); + increment(x_hi); + jmp(done); + + bind(low); + xorl(x_hi, x_hi); + decrementl(x_hi); + + bind(done); +} + +void MacroAssembler::lea(Register dst, AddressLiteral src) { + mov_literal32(dst, (int32_t)src.target(), src.rspec()); +} + +void MacroAssembler::lea(Address dst, AddressLiteral adr) { + // leal(dst, as_Address(adr)); + // see note in movl as to why we must use a move + mov_literal32(dst, (int32_t) adr.target(), adr.rspec()); +} + +void MacroAssembler::leave() { + mov(rsp, rbp); + pop(rbp); +} + +void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { + // Multiplication of two Java long values stored on the stack + // as illustrated below. Result is in rdx:rax. + // + // rsp ---> [ ?? ] \ \ + // .... | y_rsp_offset | + // [ y_lo ] / (in bytes) | x_rsp_offset + // [ y_hi ] | (in bytes) + // .... | + // [ x_lo ] / + // [ x_hi ] + // .... + // + // Basic idea: lo(result) = lo(x_lo * y_lo) + // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) + Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); + Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); + Label quick; + // load x_hi, y_hi and check if quick + // multiplication is possible + movl(rbx, x_hi); + movl(rcx, y_hi); + movl(rax, rbx); + orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 + jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply + // do full multiplication + // 1st step + mull(y_lo); // x_hi * y_lo + movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, + // 2nd step + movl(rax, x_lo); + mull(rcx); // x_lo * y_hi + addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, + // 3rd step + bind(quick); // note: rbx, = 0 if quick multiply! + movl(rax, x_lo); + mull(y_lo); // x_lo * y_lo + addl(rdx, rbx); // correct hi(x_lo * y_lo) +} + +void MacroAssembler::lneg(Register hi, Register lo) { + negl(lo); + adcl(hi, 0); + negl(hi); +} + +void MacroAssembler::lshl(Register hi, Register lo) { + // Java shift left long support (semantics as described in JVM spec., p.305) + // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) + // shift value is in rcx ! + assert(hi != rcx, "must not use rcx"); + assert(lo != rcx, "must not use rcx"); + const Register s = rcx; // shift count + const int n = BitsPerWord; + Label L; + andl(s, 0x3f); // s := s & 0x3f (s < 0x40) + cmpl(s, n); // if (s < n) + jcc(Assembler::less, L); // else (s >= n) + movl(hi, lo); // x := x << n + xorl(lo, lo); + // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! + bind(L); // s (mod n) < n + shldl(hi, lo); // x := x << s + shll(lo); +} + + +void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { + // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) + // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) + assert(hi != rcx, "must not use rcx"); + assert(lo != rcx, "must not use rcx"); + const Register s = rcx; // shift count + const int n = BitsPerWord; + Label L; + andl(s, 0x3f); // s := s & 0x3f (s < 0x40) + cmpl(s, n); // if (s < n) + jcc(Assembler::less, L); // else (s >= n) + movl(lo, hi); // x := x >> n + if (sign_extension) sarl(hi, 31); + else xorl(hi, hi); + // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! + bind(L); // s (mod n) < n + shrdl(lo, hi); // x := x >> s + if (sign_extension) sarl(hi); + else shrl(hi); +} + +void MacroAssembler::movoop(Register dst, jobject obj) { + mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::movoop(Address dst, jobject obj) { + mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::movptr(Register dst, AddressLiteral src) { + if (src.is_lval()) { + mov_literal32(dst, (intptr_t)src.target(), src.rspec()); + } else { + movl(dst, as_Address(src)); + } +} + +void MacroAssembler::movptr(ArrayAddress dst, Register src) { + movl(as_Address(dst), src); +} + +void MacroAssembler::movptr(Register dst, ArrayAddress src) { + movl(dst, as_Address(src)); +} + +// src should NEVER be a real pointer. Use AddressLiteral for true pointers +void MacroAssembler::movptr(Address dst, intptr_t src) { + movl(dst, src); +} + + +void MacroAssembler::pop_callee_saved_registers() { + pop(rcx); + pop(rdx); + pop(rdi); + pop(rsi); +} + +void MacroAssembler::pop_fTOS() { + fld_d(Address(rsp, 0)); + addl(rsp, 2 * wordSize); +} + +void MacroAssembler::push_callee_saved_registers() { + push(rsi); + push(rdi); + push(rdx); + push(rcx); +} + +void MacroAssembler::push_fTOS() { + subl(rsp, 2 * wordSize); + fstp_d(Address(rsp, 0)); +} + + +void MacroAssembler::pushoop(jobject obj) { + push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::pushklass(Metadata* obj) { + push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::pushptr(AddressLiteral src) { + if (src.is_lval()) { + push_literal32((int32_t)src.target(), src.rspec()); + } else { + pushl(as_Address(src)); + } +} + +void MacroAssembler::set_word_if_not_zero(Register dst) { + xorl(dst, dst); + set_byte_if_not_zero(dst); +} + +static void pass_arg0(MacroAssembler* masm, Register arg) { + masm->push(arg); +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + masm->push(arg); +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + masm->push(arg); +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + masm->push(arg); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { + // In order to get locks to work, we need to fake a in_VM state + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); + if (ShowMessageBoxOnError) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } + // To see where a verify_oop failed, get $ebx+40/X for this frame. + // This is the value of eip which points to where verify_oop will return. + if (os::message_box(msg, "Execution stopped, print registers?")) { + print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); + BREAKPOINT; + } + } else { + ttyLocker ttyl; + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); + } + // Don't assert holding the ttyLock + assert(false, err_msg("DEBUG MESSAGE: %s", msg)); + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); +} + +void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { + ttyLocker ttyl; + FlagSetting fs(Debugging, true); + tty->print_cr("eip = 0x%08x", eip); +#ifndef PRODUCT + if ((WizardMode || Verbose) && PrintMiscellaneous) { + tty->cr(); + findpc(eip); + tty->cr(); + } +#endif +#define PRINT_REG(rax) \ + { tty->print("%s = ", #rax); os::print_location(tty, rax); } + PRINT_REG(rax); + PRINT_REG(rbx); + PRINT_REG(rcx); + PRINT_REG(rdx); + PRINT_REG(rdi); + PRINT_REG(rsi); + PRINT_REG(rbp); + PRINT_REG(rsp); +#undef PRINT_REG + // Print some words near top of staack. + int* dump_sp = (int*) rsp; + for (int col1 = 0; col1 < 8; col1++) { + tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); + os::print_location(tty, *dump_sp++); + } + for (int row = 0; row < 16; row++) { + tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); + for (int col = 0; col < 8; col++) { + tty->print(" 0x%08x", *dump_sp++); + } + tty->cr(); + } + // Print some instructions around pc: + Disassembler::decode((address)eip-64, (address)eip); + tty->print_cr("--------"); + Disassembler::decode((address)eip, (address)eip+32); +} + +void MacroAssembler::stop(const char* msg) { + ExternalAddress message((address)msg); + // push address of message + pushptr(message.addr()); + { Label L; call(L, relocInfo::none); bind(L); } // push eip + pusha(); // push registers + call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); + hlt(); +} + +void MacroAssembler::warn(const char* msg) { + push_CPU_state(); + + ExternalAddress message((address) msg); + // push address of message + pushptr(message.addr()); + + call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); + addl(rsp, wordSize); // discard argument + pop_CPU_state(); +} + +void MacroAssembler::print_state() { + { Label L; call(L, relocInfo::none); bind(L); } // push eip + pusha(); // push registers + + push_CPU_state(); + call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); + pop_CPU_state(); + + popa(); + addl(rsp, wordSize); +} + +#else // _LP64 + +// 64 bit versions + +Address MacroAssembler::as_Address(AddressLiteral adr) { + // amd64 always does this as a pc-rel + // we can be absolute or disp based on the instruction type + // jmp/call are displacements others are absolute + assert(!adr.is_lval(), "must be rval"); + assert(reachable(adr), "must be"); + return Address((int32_t)(intptr_t)(adr.target() - pc()), adr.target(), adr.reloc()); + +} + +Address MacroAssembler::as_Address(ArrayAddress adr) { + AddressLiteral base = adr.base(); + lea(rscratch1, base); + Address index = adr.index(); + assert(index._disp == 0, "must not have disp"); // maybe it can? + Address array(rscratch1, index._index, index._scale, index._disp); + return array; +} + +int MacroAssembler::biased_locking_enter(Register lock_reg, + Register obj_reg, + Register swap_reg, + Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, + Label* slow_case, + BiasedLockingCounters* counters) { + assert(UseBiasedLocking, "why call this otherwise?"); + assert(swap_reg == rax, "swap_reg must be rax for cmpxchgq"); + assert(tmp_reg != noreg, "tmp_reg must be supplied"); + assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg); + assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); + Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); + Address saved_mark_addr(lock_reg, 0); + + if (PrintBiasedLockingStatistics && counters == NULL) + counters = BiasedLocking::counters(); + + // Biased locking + // See whether the lock is currently biased toward our thread and + // whether the epoch is still valid + // Note that the runtime guarantees sufficient alignment of JavaThread + // pointers to allow age to be placed into low bits + // First check to see whether biasing is even enabled for this object + Label cas_label; + int null_check_offset = -1; + if (!swap_reg_contains_mark) { + null_check_offset = offset(); + movq(swap_reg, mark_addr); + } + movq(tmp_reg, swap_reg); + andq(tmp_reg, markOopDesc::biased_lock_mask_in_place); + cmpq(tmp_reg, markOopDesc::biased_lock_pattern); + jcc(Assembler::notEqual, cas_label); + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. + load_prototype_header(tmp_reg, obj_reg); + orq(tmp_reg, r15_thread); + xorq(tmp_reg, swap_reg); + andq(tmp_reg, ~((int) markOopDesc::age_mask_in_place)); + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); + } + jcc(Assembler::equal, done); + + Label try_revoke_bias; + Label try_rebias; + + // At this point we know that the header has the bias pattern and + // that we are not the bias owner in the current epoch. We need to + // figure out more details about the state of the header in order to + // know what operations can be legally performed on the object's + // header. + + // If the low three bits in the xor result aren't clear, that means + // the prototype header is no longer biased and we have to revoke + // the bias on this object. + testq(tmp_reg, markOopDesc::biased_lock_mask_in_place); + jcc(Assembler::notZero, try_revoke_bias); + + // Biasing is still enabled for this data type. See whether the + // epoch of the current bias is still valid, meaning that the epoch + // bits of the mark word are equal to the epoch bits of the + // prototype header. (Note that the prototype header's epoch bits + // only change at a safepoint.) If not, attempt to rebias the object + // toward the current thread. Note that we must be absolutely sure + // that the current epoch is invalid in order to do this because + // otherwise the manipulations it performs on the mark word are + // illegal. + testq(tmp_reg, markOopDesc::epoch_mask_in_place); + jcc(Assembler::notZero, try_rebias); + + // The epoch of the current bias is still valid but we know nothing + // about the owner; it might be set or it might be clear. Try to + // acquire the bias of the object using an atomic operation. If this + // fails we will go in to the runtime to revoke the object's bias. + // Note that we first construct the presumed unbiased header so we + // don't accidentally blow away another thread's valid bias. + andq(swap_reg, + markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); + movq(tmp_reg, swap_reg); + orq(tmp_reg, r15_thread); + if (os::is_MP()) { + lock(); + } + cmpxchgq(tmp_reg, Address(obj_reg, 0)); + // If the biasing toward our thread failed, this means that + // another thread succeeded in biasing it toward itself and we + // need to revoke that bias. The revocation will occur in the + // interpreter runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->anonymously_biased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_rebias); + // At this point we know the epoch has expired, meaning that the + // current "bias owner", if any, is actually invalid. Under these + // circumstances _only_, we are allowed to use the current header's + // value as the comparison value when doing the cas to acquire the + // bias in the current epoch. In other words, we allow transfer of + // the bias from one thread to another directly in this situation. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_prototype_header(tmp_reg, obj_reg); + orq(tmp_reg, r15_thread); + if (os::is_MP()) { + lock(); + } + cmpxchgq(tmp_reg, Address(obj_reg, 0)); + // If the biasing toward our thread failed, then another thread + // succeeded in biasing it toward itself and we need to revoke that + // bias. The revocation will occur in the runtime in the slow case. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->rebiased_lock_entry_count_addr())); + } + if (slow_case != NULL) { + jcc(Assembler::notZero, *slow_case); + } + jmp(done); + + bind(try_revoke_bias); + // The prototype mark in the klass doesn't have the bias bit set any + // more, indicating that objects of this data type are not supposed + // to be biased any more. We are going to try to reset the mark of + // this object to the prototype value and fall through to the + // CAS-based locking scheme. Note that if our CAS fails, it means + // that another thread raced us for the privilege of revoking the + // bias of this particular object, so it's okay to continue in the + // normal locking code. + // + // FIXME: due to a lack of registers we currently blow away the age + // bits in this situation. Should attempt to preserve them. + load_prototype_header(tmp_reg, obj_reg); + if (os::is_MP()) { + lock(); + } + cmpxchgq(tmp_reg, Address(obj_reg, 0)); + // Fall through to the normal CAS-based lock, because no matter what + // the result of the above CAS, some thread must have succeeded in + // removing the bias bit from the object's header. + if (counters != NULL) { + cond_inc32(Assembler::zero, + ExternalAddress((address) counters->revoked_lock_entry_count_addr())); + } + + bind(cas_label); + + return null_check_offset; +} + +void MacroAssembler::call_VM_leaf_base(address entry_point, int num_args) { + Label L, E; + +#ifdef _WIN64 + // Windows always allocates space for it's register args + assert(num_args <= 4, "only register arguments supported"); + subq(rsp, frame::arg_reg_save_area_bytes); +#endif + + // Align stack if necessary + testl(rsp, 15); + jcc(Assembler::zero, L); + + subq(rsp, 8); + { + call(RuntimeAddress(entry_point)); + } + addq(rsp, 8); + jmp(E); + + bind(L); + { + call(RuntimeAddress(entry_point)); + } + + bind(E); + +#ifdef _WIN64 + // restore stack pointer + addq(rsp, frame::arg_reg_save_area_bytes); +#endif + +} + +void MacroAssembler::cmp64(Register src1, AddressLiteral src2) { + assert(!src2.is_lval(), "should use cmpptr"); + + if (reachable(src2)) { + cmpq(src1, as_Address(src2)); + } else { + lea(rscratch1, src2); + Assembler::cmpq(src1, Address(rscratch1, 0)); + } +} + +int MacroAssembler::corrected_idivq(Register reg) { + // Full implementation of Java ldiv and lrem; checks for special + // case as described in JVM spec., p.243 & p.271. The function + // returns the (pc) offset of the idivl instruction - may be needed + // for implicit exceptions. + // + // normal case special case + // + // input : rax: dividend min_long + // reg: divisor (may not be eax/edx) -1 + // + // output: rax: quotient (= rax idiv reg) min_long + // rdx: remainder (= rax irem reg) 0 + assert(reg != rax && reg != rdx, "reg cannot be rax or rdx register"); + static const int64_t min_long = 0x8000000000000000; + Label normal_case, special_case; + + // check for special case + cmp64(rax, ExternalAddress((address) &min_long)); + jcc(Assembler::notEqual, normal_case); + xorl(rdx, rdx); // prepare rdx for possible special case (where + // remainder = 0) + cmpq(reg, -1); + jcc(Assembler::equal, special_case); + + // handle normal case + bind(normal_case); + cdqq(); + int idivq_offset = offset(); + idivq(reg); + + // normal and special case exit + bind(special_case); + + return idivq_offset; +} + +void MacroAssembler::decrementq(Register reg, int value) { + if (value == min_jint) { subq(reg, value); return; } + if (value < 0) { incrementq(reg, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { decq(reg) ; return; } + /* else */ { subq(reg, value) ; return; } +} + +void MacroAssembler::decrementq(Address dst, int value) { + if (value == min_jint) { subq(dst, value); return; } + if (value < 0) { incrementq(dst, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { decq(dst) ; return; } + /* else */ { subq(dst, value) ; return; } +} + +void MacroAssembler::incrementq(Register reg, int value) { + if (value == min_jint) { addq(reg, value); return; } + if (value < 0) { decrementq(reg, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { incq(reg) ; return; } + /* else */ { addq(reg, value) ; return; } +} + +void MacroAssembler::incrementq(Address dst, int value) { + if (value == min_jint) { addq(dst, value); return; } + if (value < 0) { decrementq(dst, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { incq(dst) ; return; } + /* else */ { addq(dst, value) ; return; } +} + +// 32bit can do a case table jump in one instruction but we no longer allow the base +// to be installed in the Address class +void MacroAssembler::jump(ArrayAddress entry) { + lea(rscratch1, entry.base()); + Address dispatch = entry.index(); + assert(dispatch._base == noreg, "must be"); + dispatch._base = rscratch1; + jmp(dispatch); +} + +void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { + ShouldNotReachHere(); // 64bit doesn't use two regs + cmpq(x_lo, y_lo); +} + +void MacroAssembler::lea(Register dst, AddressLiteral src) { + mov_literal64(dst, (intptr_t)src.target(), src.rspec()); +} + +void MacroAssembler::lea(Address dst, AddressLiteral adr) { + mov_literal64(rscratch1, (intptr_t)adr.target(), adr.rspec()); + movptr(dst, rscratch1); +} + +void MacroAssembler::leave() { + // %%% is this really better? Why not on 32bit too? + emit_byte(0xC9); // LEAVE +} + +void MacroAssembler::lneg(Register hi, Register lo) { + ShouldNotReachHere(); // 64bit doesn't use two regs + negq(lo); +} + +void MacroAssembler::movoop(Register dst, jobject obj) { + mov_literal64(dst, (intptr_t)obj, oop_Relocation::spec_for_immediate()); +} + +void MacroAssembler::movoop(Address dst, jobject obj) { + mov_literal64(rscratch1, (intptr_t)obj, oop_Relocation::spec_for_immediate()); + movq(dst, rscratch1); +} + +void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { + mov_literal64(dst, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); +} + +void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { + mov_literal64(rscratch1, (intptr_t)obj, metadata_Relocation::spec_for_immediate()); + movq(dst, rscratch1); +} + +void MacroAssembler::movptr(Register dst, AddressLiteral src) { + if (src.is_lval()) { + mov_literal64(dst, (intptr_t)src.target(), src.rspec()); + } else { + if (reachable(src)) { + movq(dst, as_Address(src)); + } else { + lea(rscratch1, src); + movq(dst, Address(rscratch1,0)); + } + } +} + +void MacroAssembler::movptr(ArrayAddress dst, Register src) { + movq(as_Address(dst), src); +} + +void MacroAssembler::movptr(Register dst, ArrayAddress src) { + movq(dst, as_Address(src)); +} + +// src should NEVER be a real pointer. Use AddressLiteral for true pointers +void MacroAssembler::movptr(Address dst, intptr_t src) { + mov64(rscratch1, src); + movq(dst, rscratch1); +} + +// These are mostly for initializing NULL +void MacroAssembler::movptr(Address dst, int32_t src) { + movslq(dst, src); +} + +void MacroAssembler::movptr(Register dst, int32_t src) { + mov64(dst, (intptr_t)src); +} + +void MacroAssembler::pushoop(jobject obj) { + movoop(rscratch1, obj); + push(rscratch1); +} + +void MacroAssembler::pushklass(Metadata* obj) { + mov_metadata(rscratch1, obj); + push(rscratch1); +} + +void MacroAssembler::pushptr(AddressLiteral src) { + lea(rscratch1, src); + if (src.is_lval()) { + push(rscratch1); + } else { + pushq(Address(rscratch1, 0)); + } +} + +void MacroAssembler::reset_last_Java_frame(bool clear_fp, + bool clear_pc) { + // we must set sp to zero to clear frame + movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); + // must clear fp, so that compiled frames are not confused; it is + // possible that we need it only for debugging + if (clear_fp) { + movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); + } + + if (clear_pc) { + movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); + } +} + +void MacroAssembler::set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = rsp; + } + + // last_java_fp is optional + if (last_java_fp->is_valid()) { + movptr(Address(r15_thread, JavaThread::last_Java_fp_offset()), + last_java_fp); + } + + // last_java_pc is optional + if (last_java_pc != NULL) { + Address java_pc(r15_thread, + JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()); + lea(rscratch1, InternalAddress(last_java_pc)); + movptr(java_pc, rscratch1); + } + + movptr(Address(r15_thread, JavaThread::last_Java_sp_offset()), last_java_sp); +} + +static void pass_arg0(MacroAssembler* masm, Register arg) { + if (c_rarg0 != arg ) { + masm->mov(c_rarg0, arg); + } +} + +static void pass_arg1(MacroAssembler* masm, Register arg) { + if (c_rarg1 != arg ) { + masm->mov(c_rarg1, arg); + } +} + +static void pass_arg2(MacroAssembler* masm, Register arg) { + if (c_rarg2 != arg ) { + masm->mov(c_rarg2, arg); + } +} + +static void pass_arg3(MacroAssembler* masm, Register arg) { + if (c_rarg3 != arg ) { + masm->mov(c_rarg3, arg); + } +} + +void MacroAssembler::stop(const char* msg) { + address rip = pc(); + pusha(); // get regs on stack + lea(c_rarg0, ExternalAddress((address) msg)); + lea(c_rarg1, InternalAddress(rip)); + movq(c_rarg2, rsp); // pass pointer to regs array + andq(rsp, -16); // align stack as required by ABI + call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64))); + hlt(); +} + +void MacroAssembler::warn(const char* msg) { + push(rbp); + movq(rbp, rsp); + andq(rsp, -16); // align stack as required by push_CPU_state and call + push_CPU_state(); // keeps alignment at 16 bytes + lea(c_rarg0, ExternalAddress((address) msg)); + call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0); + pop_CPU_state(); + mov(rsp, rbp); + pop(rbp); +} + +void MacroAssembler::print_state() { + address rip = pc(); + pusha(); // get regs on stack + push(rbp); + movq(rbp, rsp); + andq(rsp, -16); // align stack as required by push_CPU_state and call + push_CPU_state(); // keeps alignment at 16 bytes + + lea(c_rarg0, InternalAddress(rip)); + lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array + call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1); + + pop_CPU_state(); + mov(rsp, rbp); + pop(rbp); + popa(); +} + +#ifndef PRODUCT +extern "C" void findpc(intptr_t x); +#endif + +void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) { + // In order to get locks to work, we need to fake a in_VM state + if (ShowMessageBoxOnError) { + JavaThread* thread = JavaThread::current(); + JavaThreadState saved_state = thread->thread_state(); + thread->set_thread_state(_thread_in_vm); +#ifndef PRODUCT + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { + ttyLocker ttyl; + BytecodeCounter::print(); + } +#endif + // To see where a verify_oop failed, get $ebx+40/X for this frame. + // XXX correct this offset for amd64 + // This is the value of eip which points to where verify_oop will return. + if (os::message_box(msg, "Execution stopped, print registers?")) { + print_state64(pc, regs); + BREAKPOINT; + assert(false, "start up GDB"); + } + ThreadStateTransition::transition(thread, _thread_in_vm, saved_state); + } else { + ttyLocker ttyl; + ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", + msg); + assert(false, err_msg("DEBUG MESSAGE: %s", msg)); + } +} + +void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) { + ttyLocker ttyl; + FlagSetting fs(Debugging, true); + tty->print_cr("rip = 0x%016lx", pc); +#ifndef PRODUCT + tty->cr(); + findpc(pc); + tty->cr(); +#endif +#define PRINT_REG(rax, value) \ + { tty->print("%s = ", #rax); os::print_location(tty, value); } + PRINT_REG(rax, regs[15]); + PRINT_REG(rbx, regs[12]); + PRINT_REG(rcx, regs[14]); + PRINT_REG(rdx, regs[13]); + PRINT_REG(rdi, regs[8]); + PRINT_REG(rsi, regs[9]); + PRINT_REG(rbp, regs[10]); + PRINT_REG(rsp, regs[11]); + PRINT_REG(r8 , regs[7]); + PRINT_REG(r9 , regs[6]); + PRINT_REG(r10, regs[5]); + PRINT_REG(r11, regs[4]); + PRINT_REG(r12, regs[3]); + PRINT_REG(r13, regs[2]); + PRINT_REG(r14, regs[1]); + PRINT_REG(r15, regs[0]); +#undef PRINT_REG + // Print some words near top of staack. + int64_t* rsp = (int64_t*) regs[11]; + int64_t* dump_sp = rsp; + for (int col1 = 0; col1 < 8; col1++) { + tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); + os::print_location(tty, *dump_sp++); + } + for (int row = 0; row < 25; row++) { + tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp); + for (int col = 0; col < 4; col++) { + tty->print(" 0x%016lx", *dump_sp++); + } + tty->cr(); + } + // Print some instructions around pc: + Disassembler::decode((address)pc-64, (address)pc); + tty->print_cr("--------"); + Disassembler::decode((address)pc, (address)pc+32); +} + +#endif // _LP64 + +// Now versions that are common to 32/64 bit + +void MacroAssembler::addptr(Register dst, int32_t imm32) { + LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); +} + +void MacroAssembler::addptr(Register dst, Register src) { + LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); +} + +void MacroAssembler::addptr(Address dst, Register src) { + LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); +} + +void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::addsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::addsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::addss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + addss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + addss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::align(int modulus) { + if (offset() % modulus != 0) { + nop(modulus - (offset() % modulus)); + } +} + +void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::andpd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::andpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) { + // Used in sign-masking with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::andps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::andps(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::andptr(Register dst, int32_t imm32) { + LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); +} + +void MacroAssembler::atomic_incl(AddressLiteral counter_addr) { + pushf(); + if (os::is_MP()) + lock(); + incrementl(counter_addr); + popf(); +} + +// Writes to stack successive pages until offset reached to check for +// stack overflow + shadow pages. This clobbers tmp. +void MacroAssembler::bang_stack_size(Register size, Register tmp) { + movptr(tmp, rsp); + // Bang stack for total size given plus shadow page size. + // Bang one page at a time because large size can bang beyond yellow and + // red zones. + Label loop; + bind(loop); + movl(Address(tmp, (-os::vm_page_size())), size ); + subptr(tmp, os::vm_page_size()); + subl(size, os::vm_page_size()); + jcc(Assembler::greater, loop); + + // Bang down shadow pages too. + // The -1 because we already subtracted 1 page. + for (int i = 0; i< StackShadowPages-1; i++) { + // this could be any sized move but this is can be a debugging crumb + // so the bigger the better. + movptr(Address(tmp, (-i*os::vm_page_size())), size ); + } +} + +void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { + assert(UseBiasedLocking, "why call this otherwise?"); + + // Check for biased locking unlock case, which is a no-op + // Note: we do not have to check the thread ID for two reasons. + // First, the interpreter checks for IllegalMonitorStateException at + // a higher level. Second, if the bias was revoked while we held the + // lock, the object could not be rebiased toward another thread, so + // the bias bit would be clear. + movptr(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); + andptr(temp_reg, markOopDesc::biased_lock_mask_in_place); + cmpptr(temp_reg, markOopDesc::biased_lock_pattern); + jcc(Assembler::equal, done); +} + +void MacroAssembler::c2bool(Register x) { + // implements x == 0 ? 0 : 1 + // note: must only look at least-significant byte of x + // since C-style booleans are stored in one byte + // only! (was bug) + andl(x, 0xFF); + setb(Assembler::notZero, x); +} + +// Wouldn't need if AddressLiteral version had new name +void MacroAssembler::call(Label& L, relocInfo::relocType rtype) { + Assembler::call(L, rtype); +} + +void MacroAssembler::call(Register entry) { + Assembler::call(entry); +} + +void MacroAssembler::call(AddressLiteral entry) { + if (reachable(entry)) { + Assembler::call_literal(entry.target(), entry.rspec()); + } else { + lea(rscratch1, entry); + Assembler::call(rscratch1); + } +} + +void MacroAssembler::ic_call(address entry) { + RelocationHolder rh = virtual_call_Relocation::spec(pc()); + movptr(rax, (intptr_t)Universe::non_oop_word()); + call(AddressLiteral(entry, rh)); +} + +// Implementation of call_VM versions + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + bool check_exceptions) { + Label C, E; + call(C, relocInfo::none); + jmp(E); + + bind(C); + call_VM_helper(oop_result, entry_point, 0, check_exceptions); + ret(0); + + bind(E); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions) { + Label C, E; + call(C, relocInfo::none); + jmp(E); + + bind(C); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 1, check_exceptions); + ret(0); + + bind(E); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + Label C, E; + call(C, relocInfo::none); + jmp(E); + + bind(C); + + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 2, check_exceptions); + ret(0); + + bind(E); +} + +void MacroAssembler::call_VM(Register oop_result, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + Label C, E; + call(C, relocInfo::none); + jmp(E); + + bind(C); + + LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); + pass_arg3(this, arg_3); + + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + + pass_arg1(this, arg_1); + call_VM_helper(oop_result, entry_point, 3, check_exceptions); + ret(0); + + bind(E); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); + call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); + pass_arg3(this, arg_3); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::super_call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); + MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); +} + +void MacroAssembler::super_call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + bool check_exceptions) { + pass_arg1(this, arg_1); + super_call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); +} + +void MacroAssembler::super_call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + bool check_exceptions) { + + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); +} + +void MacroAssembler::super_call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, + Register arg_2, + Register arg_3, + bool check_exceptions) { + LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); + pass_arg3(this, arg_3); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + pass_arg1(this, arg_1); + super_call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); +} + +void MacroAssembler::call_VM_base(Register oop_result, + Register java_thread, + Register last_java_sp, + address entry_point, + int number_of_arguments, + bool check_exceptions) { + // determine java_thread register + if (!java_thread->is_valid()) { +#ifdef _LP64 + java_thread = r15_thread; +#else + java_thread = rdi; + get_thread(java_thread); +#endif // LP64 + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = rsp; + } + // debugging support + assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); + LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); +#ifdef ASSERT + // TraceBytecodes does not use r12 but saves it over the call, so don't verify + // r12 is the heapbase. + LP64_ONLY(if ((UseCompressedOops || UseCompressedKlassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) +#endif // ASSERT + + assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); + assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); + + // push java thread (becomes first argument of C function) + + NOT_LP64(push(java_thread); number_of_arguments++); + LP64_ONLY(mov(c_rarg0, r15_thread)); + + // set last Java frame before call + assert(last_java_sp != rbp, "can't use ebp/rbp"); + + // Only interpreter should have to set fp + set_last_Java_frame(java_thread, last_java_sp, rbp, NULL); + + // do the call, remove parameters + MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); + + // restore the thread (cannot use the pushed argument since arguments + // may be overwritten by C code generated by an optimizing compiler); + // however can use the register value directly if it is callee saved. + if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { + // rdi & rsi (also r15) are callee saved -> nothing to do +#ifdef ASSERT + guarantee(java_thread != rax, "change this code"); + push(rax); + { Label L; + get_thread(rax); + cmpptr(java_thread, rax); + jcc(Assembler::equal, L); + STOP("MacroAssembler::call_VM_base: rdi not callee saved?"); + bind(L); + } + pop(rax); +#endif + } else { + get_thread(java_thread); + } + // reset last Java frame + // Only interpreter should have to clear fp + reset_last_Java_frame(java_thread, true, false); + +#ifndef CC_INTERP + // C++ interp handles this in the interpreter + check_and_handle_popframe(java_thread); + check_and_handle_earlyret(java_thread); +#endif /* CC_INTERP */ + + if (check_exceptions) { + // check for pending exceptions (java_thread is set upon return) + cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); +#ifndef _LP64 + jump_cc(Assembler::notEqual, + RuntimeAddress(StubRoutines::forward_exception_entry())); +#else + // This used to conditionally jump to forward_exception however it is + // possible if we relocate that the branch will not reach. So we must jump + // around so we can always reach + + Label ok; + jcc(Assembler::equal, ok); + jump(RuntimeAddress(StubRoutines::forward_exception_entry())); + bind(ok); +#endif // LP64 + } + + // get oop result if there is one and reset the value in the thread + if (oop_result->is_valid()) { + get_vm_result(oop_result, java_thread); + } +} + +void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { + + // Calculate the value for last_Java_sp + // somewhat subtle. call_VM does an intermediate call + // which places a return address on the stack just under the + // stack pointer as the user finsihed with it. This allows + // use to retrieve last_Java_pc from last_Java_sp[-1]. + // On 32bit we then have to push additional args on the stack to accomplish + // the actual requested call. On 64bit call_VM only can use register args + // so the only extra space is the return address that call_VM created. + // This hopefully explains the calculations here. + +#ifdef _LP64 + // We've pushed one address, correct last_Java_sp + lea(rax, Address(rsp, wordSize)); +#else + lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); +#endif // LP64 + + call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); + +} + +void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { + call_VM_leaf_base(entry_point, number_of_arguments); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 1); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 2); +} + +void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + call_VM_leaf(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 1); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { + + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 2); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { + LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 3); +} + +void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { + LP64_ONLY(assert(arg_0 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg3, "smashed arg")); + LP64_ONLY(assert(arg_2 != c_rarg3, "smashed arg")); + pass_arg3(this, arg_3); + LP64_ONLY(assert(arg_0 != c_rarg2, "smashed arg")); + LP64_ONLY(assert(arg_1 != c_rarg2, "smashed arg")); + pass_arg2(this, arg_2); + LP64_ONLY(assert(arg_0 != c_rarg1, "smashed arg")); + pass_arg1(this, arg_1); + pass_arg0(this, arg_0); + MacroAssembler::call_VM_leaf_base(entry_point, 4); +} + +void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { + movptr(oop_result, Address(java_thread, JavaThread::vm_result_offset())); + movptr(Address(java_thread, JavaThread::vm_result_offset()), NULL_WORD); + verify_oop(oop_result, "broken oop in call_VM_base"); +} + +void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { + movptr(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); + movptr(Address(java_thread, JavaThread::vm_result_2_offset()), NULL_WORD); +} + +void MacroAssembler::check_and_handle_earlyret(Register java_thread) { +} + +void MacroAssembler::check_and_handle_popframe(Register java_thread) { +} + +void MacroAssembler::cmp32(AddressLiteral src1, int32_t imm) { + if (reachable(src1)) { + cmpl(as_Address(src1), imm); + } else { + lea(rscratch1, src1); + cmpl(Address(rscratch1, 0), imm); + } +} + +void MacroAssembler::cmp32(Register src1, AddressLiteral src2) { + assert(!src2.is_lval(), "use cmpptr"); + if (reachable(src2)) { + cmpl(src1, as_Address(src2)); + } else { + lea(rscratch1, src2); + cmpl(src1, Address(rscratch1, 0)); + } +} + +void MacroAssembler::cmp32(Register src1, int32_t imm) { + Assembler::cmpl(src1, imm); +} + +void MacroAssembler::cmp32(Register src1, Address src2) { + Assembler::cmpl(src1, src2); +} + +void MacroAssembler::cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { + ucomisd(opr1, opr2); + + Label L; + if (unordered_is_less) { + movl(dst, -1); + jcc(Assembler::parity, L); + jcc(Assembler::below , L); + movl(dst, 0); + jcc(Assembler::equal , L); + increment(dst); + } else { // unordered is greater + movl(dst, 1); + jcc(Assembler::parity, L); + jcc(Assembler::above , L); + movl(dst, 0); + jcc(Assembler::equal , L); + decrementl(dst); + } + bind(L); +} + +void MacroAssembler::cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less) { + ucomiss(opr1, opr2); + + Label L; + if (unordered_is_less) { + movl(dst, -1); + jcc(Assembler::parity, L); + jcc(Assembler::below , L); + movl(dst, 0); + jcc(Assembler::equal , L); + increment(dst); + } else { // unordered is greater + movl(dst, 1); + jcc(Assembler::parity, L); + jcc(Assembler::above , L); + movl(dst, 0); + jcc(Assembler::equal , L); + decrementl(dst); + } + bind(L); +} + + +void MacroAssembler::cmp8(AddressLiteral src1, int imm) { + if (reachable(src1)) { + cmpb(as_Address(src1), imm); + } else { + lea(rscratch1, src1); + cmpb(Address(rscratch1, 0), imm); + } +} + +void MacroAssembler::cmpptr(Register src1, AddressLiteral src2) { +#ifdef _LP64 + if (src2.is_lval()) { + movptr(rscratch1, src2); + Assembler::cmpq(src1, rscratch1); + } else if (reachable(src2)) { + cmpq(src1, as_Address(src2)); + } else { + lea(rscratch1, src2); + Assembler::cmpq(src1, Address(rscratch1, 0)); + } +#else + if (src2.is_lval()) { + cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); + } else { + cmpl(src1, as_Address(src2)); + } +#endif // _LP64 +} + +void MacroAssembler::cmpptr(Address src1, AddressLiteral src2) { + assert(src2.is_lval(), "not a mem-mem compare"); +#ifdef _LP64 + // moves src2's literal address + movptr(rscratch1, src2); + Assembler::cmpq(src1, rscratch1); +#else + cmp_literal32(src1, (int32_t) src2.target(), src2.rspec()); +#endif // _LP64 +} + +void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr) { + if (reachable(adr)) { + if (os::is_MP()) + lock(); + cmpxchgptr(reg, as_Address(adr)); + } else { + lea(rscratch1, adr); + if (os::is_MP()) + lock(); + cmpxchgptr(reg, Address(rscratch1, 0)); + } +} + +void MacroAssembler::cmpxchgptr(Register reg, Address adr) { + LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); +} + +void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::comisd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::comisd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::comiss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::comiss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::comiss(dst, Address(rscratch1, 0)); + } +} + + +void MacroAssembler::cond_inc32(Condition cond, AddressLiteral counter_addr) { + Condition negated_cond = negate_condition(cond); + Label L; + jcc(negated_cond, L); + atomic_incl(counter_addr); + bind(L); +} + +int MacroAssembler::corrected_idivl(Register reg) { + // Full implementation of Java idiv and irem; checks for + // special case as described in JVM spec., p.243 & p.271. + // The function returns the (pc) offset of the idivl + // instruction - may be needed for implicit exceptions. + // + // normal case special case + // + // input : rax,: dividend min_int + // reg: divisor (may not be rax,/rdx) -1 + // + // output: rax,: quotient (= rax, idiv reg) min_int + // rdx: remainder (= rax, irem reg) 0 + assert(reg != rax && reg != rdx, "reg cannot be rax, or rdx register"); + const int min_int = 0x80000000; + Label normal_case, special_case; + + // check for special case + cmpl(rax, min_int); + jcc(Assembler::notEqual, normal_case); + xorl(rdx, rdx); // prepare rdx for possible special case (where remainder = 0) + cmpl(reg, -1); + jcc(Assembler::equal, special_case); + + // handle normal case + bind(normal_case); + cdql(); + int idivl_offset = offset(); + idivl(reg); + + // normal and special case exit + bind(special_case); + + return idivl_offset; +} + + + +void MacroAssembler::decrementl(Register reg, int value) { + if (value == min_jint) {subl(reg, value) ; return; } + if (value < 0) { incrementl(reg, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { decl(reg) ; return; } + /* else */ { subl(reg, value) ; return; } +} + +void MacroAssembler::decrementl(Address dst, int value) { + if (value == min_jint) {subl(dst, value) ; return; } + if (value < 0) { incrementl(dst, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { decl(dst) ; return; } + /* else */ { subl(dst, value) ; return; } +} + +void MacroAssembler::division_with_shift (Register reg, int shift_value) { + assert (shift_value > 0, "illegal shift value"); + Label _is_positive; + testl (reg, reg); + jcc (Assembler::positive, _is_positive); + int offset = (1 << shift_value) - 1 ; + + if (offset == 1) { + incrementl(reg); + } else { + addl(reg, offset); + } + + bind (_is_positive); + sarl(reg, shift_value); +} + +void MacroAssembler::divsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::divss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::divss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::divss(dst, Address(rscratch1, 0)); + } +} + +// !defined(COMPILER2) is because of stupid core builds +#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) +void MacroAssembler::empty_FPU_stack() { + if (VM_Version::supports_mmx()) { + emms(); + } else { + for (int i = 8; i-- > 0; ) ffree(i); + } +} +#endif // !LP64 || C1 || !C2 + + +// Defines obj, preserves var_size_in_bytes +void MacroAssembler::eden_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Label& slow_case) { + assert(obj == rax, "obj must be in rax, for cmpxchg"); + assert_different_registers(obj, var_size_in_bytes, t1); + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + jmp(slow_case); + } else { + Register end = t1; + Label retry; + bind(retry); + ExternalAddress heap_top((address) Universe::heap()->top_addr()); + movptr(obj, heap_top); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + // if end < obj then we wrapped around => object too long => slow case + cmpptr(end, obj); + jcc(Assembler::below, slow_case); + cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr())); + jcc(Assembler::above, slow_case); + // Compare obj with the top addr, and if still equal, store the new top addr in + // end at the address of the top addr pointer. Sets ZF if was equal, and clears + // it otherwise. Use lock prefix for atomicity on MPs. + locked_cmpxchgptr(end, heap_top); + jcc(Assembler::notEqual, retry); + } +} + +void MacroAssembler::enter() { + push(rbp); + mov(rbp, rsp); +} + +// A 5 byte nop that is safe for patching (see patch_verified_entry) +void MacroAssembler::fat_nop() { + if (UseAddressNop) { + addr_nop_5(); + } else { + emit_byte(0x26); // es: + emit_byte(0x2e); // cs: + emit_byte(0x64); // fs: + emit_byte(0x65); // gs: + emit_byte(0x90); + } +} + +void MacroAssembler::fcmp(Register tmp) { + fcmp(tmp, 1, true, true); +} + +void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { + assert(!pop_right || pop_left, "usage error"); + if (VM_Version::supports_cmov()) { + assert(tmp == noreg, "unneeded temp"); + if (pop_left) { + fucomip(index); + } else { + fucomi(index); + } + if (pop_right) { + fpop(); + } + } else { + assert(tmp != noreg, "need temp"); + if (pop_left) { + if (pop_right) { + fcompp(); + } else { + fcomp(index); + } + } else { + fcom(index); + } + // convert FPU condition into eflags condition via rax, + save_rax(tmp); + fwait(); fnstsw_ax(); + sahf(); + restore_rax(tmp); + } + // condition codes set as follows: + // + // CF (corresponds to C0) if x < y + // PF (corresponds to C2) if unordered + // ZF (corresponds to C3) if x = y +} + +void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { + fcmp2int(dst, unordered_is_less, 1, true, true); +} + +void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { + fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); + Label L; + if (unordered_is_less) { + movl(dst, -1); + jcc(Assembler::parity, L); + jcc(Assembler::below , L); + movl(dst, 0); + jcc(Assembler::equal , L); + increment(dst); + } else { // unordered is greater + movl(dst, 1); + jcc(Assembler::parity, L); + jcc(Assembler::above , L); + movl(dst, 0); + jcc(Assembler::equal , L); + decrementl(dst); + } + bind(L); +} + +void MacroAssembler::fld_d(AddressLiteral src) { + fld_d(as_Address(src)); +} + +void MacroAssembler::fld_s(AddressLiteral src) { + fld_s(as_Address(src)); +} + +void MacroAssembler::fld_x(AddressLiteral src) { + Assembler::fld_x(as_Address(src)); +} + +void MacroAssembler::fldcw(AddressLiteral src) { + Assembler::fldcw(as_Address(src)); +} + +void MacroAssembler::pow_exp_core_encoding() { + // kills rax, rcx, rdx + subptr(rsp,sizeof(jdouble)); + // computes 2^X. Stack: X ... + // f2xm1 computes 2^X-1 but only operates on -1<=X<=1. Get int(X) and + // keep it on the thread's stack to compute 2^int(X) later + // then compute 2^(X-int(X)) as (2^(X-int(X)-1+1) + // final result is obtained with: 2^X = 2^int(X) * 2^(X-int(X)) + fld_s(0); // Stack: X X ... + frndint(); // Stack: int(X) X ... + fsuba(1); // Stack: int(X) X-int(X) ... + fistp_s(Address(rsp,0)); // move int(X) as integer to thread's stack. Stack: X-int(X) ... + f2xm1(); // Stack: 2^(X-int(X))-1 ... + fld1(); // Stack: 1 2^(X-int(X))-1 ... + faddp(1); // Stack: 2^(X-int(X)) + // computes 2^(int(X)): add exponent bias (1023) to int(X), then + // shift int(X)+1023 to exponent position. + // Exponent is limited to 11 bits if int(X)+1023 does not fit in 11 + // bits, set result to NaN. 0x000 and 0x7FF are reserved exponent + // values so detect them and set result to NaN. + movl(rax,Address(rsp,0)); + movl(rcx, -2048); // 11 bit mask and valid NaN binary encoding + addl(rax, 1023); + movl(rdx,rax); + shll(rax,20); + // Check that 0 < int(X)+1023 < 2047. Otherwise set rax to NaN. + addl(rdx,1); + // Check that 1 < int(X)+1023+1 < 2048 + // in 3 steps: + // 1- (int(X)+1023+1)&-2048 == 0 => 0 <= int(X)+1023+1 < 2048 + // 2- (int(X)+1023+1)&-2048 != 0 + // 3- (int(X)+1023+1)&-2048 != 1 + // Do 2- first because addl just updated the flags. + cmov32(Assembler::equal,rax,rcx); + cmpl(rdx,1); + cmov32(Assembler::equal,rax,rcx); + testl(rdx,rcx); + cmov32(Assembler::notEqual,rax,rcx); + movl(Address(rsp,4),rax); + movl(Address(rsp,0),0); + fmul_d(Address(rsp,0)); // Stack: 2^X ... + addptr(rsp,sizeof(jdouble)); +} + +void MacroAssembler::increase_precision() { + subptr(rsp, BytesPerWord); + fnstcw(Address(rsp, 0)); + movl(rax, Address(rsp, 0)); + orl(rax, 0x300); + push(rax); + fldcw(Address(rsp, 0)); + pop(rax); +} + +void MacroAssembler::restore_precision() { + fldcw(Address(rsp, 0)); + addptr(rsp, BytesPerWord); +} + +void MacroAssembler::fast_pow() { + // computes X^Y = 2^(Y * log2(X)) + // if fast computation is not possible, result is NaN. Requires + // fallback from user of this macro. + // increase precision for intermediate steps of the computation + increase_precision(); + fyl2x(); // Stack: (Y*log2(X)) ... + pow_exp_core_encoding(); // Stack: exp(X) ... + restore_precision(); +} + +void MacroAssembler::fast_exp() { + // computes exp(X) = 2^(X * log2(e)) + // if fast computation is not possible, result is NaN. Requires + // fallback from user of this macro. + // increase precision for intermediate steps of the computation + increase_precision(); + fldl2e(); // Stack: log2(e) X ... + fmulp(1); // Stack: (X*log2(e)) ... + pow_exp_core_encoding(); // Stack: exp(X) ... + restore_precision(); +} + +void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) { + // kills rax, rcx, rdx + // pow and exp needs 2 extra registers on the fpu stack. + Label slow_case, done; + Register tmp = noreg; + if (!VM_Version::supports_cmov()) { + // fcmp needs a temporary so preserve rdx, + tmp = rdx; + } + Register tmp2 = rax; + Register tmp3 = rcx; + + if (is_exp) { + // Stack: X + fld_s(0); // duplicate argument for runtime call. Stack: X X + fast_exp(); // Stack: exp(X) X + fcmp(tmp, 0, false, false); // Stack: exp(X) X + // exp(X) not equal to itself: exp(X) is NaN go to slow case. + jcc(Assembler::parity, slow_case); + // get rid of duplicate argument. Stack: exp(X) + if (num_fpu_regs_in_use > 0) { + fxch(); + fpop(); + } else { + ffree(1); + } + jmp(done); + } else { + // Stack: X Y + Label x_negative, y_odd; + + fldz(); // Stack: 0 X Y + fcmp(tmp, 1, true, false); // Stack: X Y + jcc(Assembler::above, x_negative); + + // X >= 0 + + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y + fld_s(1); // Stack: X Y X Y + fast_pow(); // Stack: X^Y X Y + fcmp(tmp, 0, false, false); // Stack: X^Y X Y + // X^Y not equal to itself: X^Y is NaN go to slow case. + jcc(Assembler::parity, slow_case); + // get rid of duplicate arguments. Stack: X^Y + if (num_fpu_regs_in_use > 0) { + fxch(); fpop(); + fxch(); fpop(); + } else { + ffree(2); + ffree(1); + } + jmp(done); + + // X <= 0 + bind(x_negative); + + fld_s(1); // Stack: Y X Y + frndint(); // Stack: int(Y) X Y + fcmp(tmp, 2, false, false); // Stack: int(Y) X Y + jcc(Assembler::notEqual, slow_case); + + subptr(rsp, 8); + + // For X^Y, when X < 0, Y has to be an integer and the final + // result depends on whether it's odd or even. We just checked + // that int(Y) == Y. We move int(Y) to gp registers as a 64 bit + // integer to test its parity. If int(Y) is huge and doesn't fit + // in the 64 bit integer range, the integer indefinite value will + // end up in the gp registers. Huge numbers are all even, the + // integer indefinite number is even so it's fine. + +#ifdef ASSERT + // Let's check we don't end up with an integer indefinite number + // when not expected. First test for huge numbers: check whether + // int(Y)+1 == int(Y) which is true for very large numbers and + // those are all even. A 64 bit integer is guaranteed to not + // overflow for numbers where y+1 != y (when precision is set to + // double precision). + Label y_not_huge; + + fld1(); // Stack: 1 int(Y) X Y + fadd(1); // Stack: 1+int(Y) int(Y) X Y + +#ifdef _LP64 + // trip to memory to force the precision down from double extended + // precision + fstp_d(Address(rsp, 0)); + fld_d(Address(rsp, 0)); +#endif + + fcmp(tmp, 1, true, false); // Stack: int(Y) X Y +#endif + + // move int(Y) as 64 bit integer to thread's stack + fistp_d(Address(rsp,0)); // Stack: X Y + +#ifdef ASSERT + jcc(Assembler::notEqual, y_not_huge); + + // Y is huge so we know it's even. It may not fit in a 64 bit + // integer and we don't want the debug code below to see the + // integer indefinite value so overwrite int(Y) on the thread's + // stack with 0. + movl(Address(rsp, 0), 0); + movl(Address(rsp, 4), 0); + + bind(y_not_huge); +#endif + + fld_s(1); // duplicate arguments for runtime call. Stack: Y X Y + fld_s(1); // Stack: X Y X Y + fabs(); // Stack: abs(X) Y X Y + fast_pow(); // Stack: abs(X)^Y X Y + fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y + // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case. + + pop(tmp2); + NOT_LP64(pop(tmp3)); + jcc(Assembler::parity, slow_case); + +#ifdef ASSERT + // Check that int(Y) is not integer indefinite value (int + // overflow). Shouldn't happen because for values that would + // overflow, 1+int(Y)==Y which was tested earlier. +#ifndef _LP64 + { + Label integer; + testl(tmp2, tmp2); + jcc(Assembler::notZero, integer); + cmpl(tmp3, 0x80000000); + jcc(Assembler::notZero, integer); + STOP("integer indefinite value shouldn't be seen here"); + bind(integer); + } +#else + { + Label integer; + mov(tmp3, tmp2); // preserve tmp2 for parity check below + shlq(tmp3, 1); + jcc(Assembler::carryClear, integer); + jcc(Assembler::notZero, integer); + STOP("integer indefinite value shouldn't be seen here"); + bind(integer); + } +#endif +#endif + + // get rid of duplicate arguments. Stack: X^Y + if (num_fpu_regs_in_use > 0) { + fxch(); fpop(); + fxch(); fpop(); + } else { + ffree(2); + ffree(1); + } + + testl(tmp2, 1); + jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y + // X <= 0, Y even: X^Y = -abs(X)^Y + + fchs(); // Stack: -abs(X)^Y Y + jmp(done); + } + + // slow case: runtime call + bind(slow_case); + + fpop(); // pop incorrect result or int(Y) + + fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow), + is_exp ? 1 : 2, num_fpu_regs_in_use); + + // Come here with result in F-TOS + bind(done); +} + +void MacroAssembler::fpop() { + ffree(); + fincstp(); +} + +void MacroAssembler::fremr(Register tmp) { + save_rax(tmp); + { Label L; + bind(L); + fprem(); + fwait(); fnstsw_ax(); +#ifdef _LP64 + testl(rax, 0x400); + jcc(Assembler::notEqual, L); +#else + sahf(); + jcc(Assembler::parity, L); +#endif // _LP64 + } + restore_rax(tmp); + // Result is in ST0. + // Note: fxch & fpop to get rid of ST1 + // (otherwise FPU stack could overflow eventually) + fxch(1); + fpop(); +} + + +void MacroAssembler::incrementl(AddressLiteral dst) { + if (reachable(dst)) { + incrementl(as_Address(dst)); + } else { + lea(rscratch1, dst); + incrementl(Address(rscratch1, 0)); + } +} + +void MacroAssembler::incrementl(ArrayAddress dst) { + incrementl(as_Address(dst)); +} + +void MacroAssembler::incrementl(Register reg, int value) { + if (value == min_jint) {addl(reg, value) ; return; } + if (value < 0) { decrementl(reg, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { incl(reg) ; return; } + /* else */ { addl(reg, value) ; return; } +} + +void MacroAssembler::incrementl(Address dst, int value) { + if (value == min_jint) {addl(dst, value) ; return; } + if (value < 0) { decrementl(dst, -value); return; } + if (value == 0) { ; return; } + if (value == 1 && UseIncDec) { incl(dst) ; return; } + /* else */ { addl(dst, value) ; return; } +} + +void MacroAssembler::jump(AddressLiteral dst) { + if (reachable(dst)) { + jmp_literal(dst.target(), dst.rspec()); + } else { + lea(rscratch1, dst); + jmp(rscratch1); + } +} + +void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst) { + if (reachable(dst)) { + InstructionMark im(this); + relocate(dst.reloc()); + const int short_size = 2; + const int long_size = 6; + int offs = (intptr_t)dst.target() - ((intptr_t)pc()); + if (dst.reloc() == relocInfo::none && is8bit(offs - short_size)) { + // 0111 tttn #8-bit disp + emit_byte(0x70 | cc); + emit_byte((offs - short_size) & 0xFF); + } else { + // 0000 1111 1000 tttn #32-bit disp + emit_byte(0x0F); + emit_byte(0x80 | cc); + emit_long(offs - long_size); + } + } else { +#ifdef ASSERT + warning("reversing conditional branch"); +#endif /* ASSERT */ + Label skip; + jccb(reverse[cc], skip); + lea(rscratch1, dst); + Assembler::jmp(rscratch1); + bind(skip); + } +} + +void MacroAssembler::ldmxcsr(AddressLiteral src) { + if (reachable(src)) { + Assembler::ldmxcsr(as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ldmxcsr(Address(rscratch1, 0)); + } +} + +int MacroAssembler::load_signed_byte(Register dst, Address src) { + int off; + if (LP64_ONLY(true ||) VM_Version::is_P6()) { + off = offset(); + movsbl(dst, src); // movsxb + } else { + off = load_unsigned_byte(dst, src); + shll(dst, 24); + sarl(dst, 24); + } + return off; +} + +// Note: load_signed_short used to be called load_signed_word. +// Although the 'w' in x86 opcodes refers to the term "word" in the assembler +// manual, which means 16 bits, that usage is found nowhere in HotSpot code. +// The term "word" in HotSpot means a 32- or 64-bit machine word. +int MacroAssembler::load_signed_short(Register dst, Address src) { + int off; + if (LP64_ONLY(true ||) VM_Version::is_P6()) { + // This is dubious to me since it seems safe to do a signed 16 => 64 bit + // version but this is what 64bit has always done. This seems to imply + // that users are only using 32bits worth. + off = offset(); + movswl(dst, src); // movsxw + } else { + off = load_unsigned_short(dst, src); + shll(dst, 16); + sarl(dst, 16); + } + return off; +} + +int MacroAssembler::load_unsigned_byte(Register dst, Address src) { + // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, + // and "3.9 Partial Register Penalties", p. 22). + int off; + if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { + off = offset(); + movzbl(dst, src); // movzxb + } else { + xorl(dst, dst); + off = offset(); + movb(dst, src); + } + return off; +} + +// Note: load_unsigned_short used to be called load_unsigned_word. +int MacroAssembler::load_unsigned_short(Register dst, Address src) { + // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, + // and "3.9 Partial Register Penalties", p. 22). + int off; + if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { + off = offset(); + movzwl(dst, src); // movzxw + } else { + xorl(dst, dst); + off = offset(); + movw(dst, src); + } + return off; +} + +void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { + switch (size_in_bytes) { +#ifndef _LP64 + case 8: + assert(dst2 != noreg, "second dest register required"); + movl(dst, src); + movl(dst2, src.plus_disp(BytesPerInt)); + break; +#else + case 8: movq(dst, src); break; +#endif + case 4: movl(dst, src); break; + case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; + case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { + switch (size_in_bytes) { +#ifndef _LP64 + case 8: + assert(src2 != noreg, "second source register required"); + movl(dst, src); + movl(dst.plus_disp(BytesPerInt), src2); + break; +#else + case 8: movq(dst, src); break; +#endif + case 4: movl(dst, src); break; + case 2: movw(dst, src); break; + case 1: movb(dst, src); break; + default: ShouldNotReachHere(); + } +} + +void MacroAssembler::mov32(AddressLiteral dst, Register src) { + if (reachable(dst)) { + movl(as_Address(dst), src); + } else { + lea(rscratch1, dst); + movl(Address(rscratch1, 0), src); + } +} + +void MacroAssembler::mov32(Register dst, AddressLiteral src) { + if (reachable(src)) { + movl(dst, as_Address(src)); + } else { + lea(rscratch1, src); + movl(dst, Address(rscratch1, 0)); + } +} + +// C++ bool manipulation + +void MacroAssembler::movbool(Register dst, Address src) { + if(sizeof(bool) == 1) + movb(dst, src); + else if(sizeof(bool) == 2) + movw(dst, src); + else if(sizeof(bool) == 4) + movl(dst, src); + else + // unsupported + ShouldNotReachHere(); +} + +void MacroAssembler::movbool(Address dst, bool boolconst) { + if(sizeof(bool) == 1) + movb(dst, (int) boolconst); + else if(sizeof(bool) == 2) + movw(dst, (int) boolconst); + else if(sizeof(bool) == 4) + movl(dst, (int) boolconst); + else + // unsupported + ShouldNotReachHere(); +} + +void MacroAssembler::movbool(Address dst, Register src) { + if(sizeof(bool) == 1) + movb(dst, src); + else if(sizeof(bool) == 2) + movw(dst, src); + else if(sizeof(bool) == 4) + movl(dst, src); + else + // unsupported + ShouldNotReachHere(); +} + +void MacroAssembler::movbyte(ArrayAddress dst, int src) { + movb(as_Address(dst), src); +} + +void MacroAssembler::movdl(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + movdl(dst, as_Address(src)); + } else { + lea(rscratch1, src); + movdl(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + movq(dst, as_Address(src)); + } else { + lea(rscratch1, src); + movq(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movdbl(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + if (UseXmmLoadAndClearUpper) { + movsd (dst, as_Address(src)); + } else { + movlpd(dst, as_Address(src)); + } + } else { + lea(rscratch1, src); + if (UseXmmLoadAndClearUpper) { + movsd (dst, Address(rscratch1, 0)); + } else { + movlpd(dst, Address(rscratch1, 0)); + } + } +} + +void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + movss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + movss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movptr(Register dst, Register src) { + LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); +} + +void MacroAssembler::movptr(Register dst, Address src) { + LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); +} + +// src should NEVER be a real pointer. Use AddressLiteral for true pointers +void MacroAssembler::movptr(Register dst, intptr_t src) { + LP64_ONLY(mov64(dst, src)) NOT_LP64(movl(dst, src)); +} + +void MacroAssembler::movptr(Address dst, Register src) { + LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); +} + +void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movdqu(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movdqu(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::movss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::movss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::movss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::mulss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::mulss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::mulss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::null_check(Register reg, int offset) { + if (needs_explicit_null_check(offset)) { + // provoke OS NULL exception if reg = NULL by + // accessing M[reg] w/o changing any (non-CC) registers + // NOTE: cmpl is plenty here to provoke a segv + cmpptr(rax, Address(reg, 0)); + // Note: should probably use testl(rax, Address(reg, 0)); + // may be shorter code (however, this version of + // testl needs to be implemented first) + } else { + // nothing to do, (later) access of M[reg + offset] + // will provoke OS NULL exception if reg = NULL + } +} + +void MacroAssembler::os_breakpoint() { + // instead of directly emitting a breakpoint, call os:breakpoint for better debugability + // (e.g., MSVC can't call ps() otherwise) + call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint))); +} + +void MacroAssembler::pop_CPU_state() { + pop_FPU_state(); + pop_IU_state(); +} + +void MacroAssembler::pop_FPU_state() { + NOT_LP64(frstor(Address(rsp, 0));) + LP64_ONLY(fxrstor(Address(rsp, 0));) + addptr(rsp, FPUStateSizeInWords * wordSize); +} + +void MacroAssembler::pop_IU_state() { + popa(); + LP64_ONLY(addq(rsp, 8)); + popf(); +} + +// Save Integer and Float state +// Warning: Stack must be 16 byte aligned (64bit) +void MacroAssembler::push_CPU_state() { + push_IU_state(); + push_FPU_state(); +} + +void MacroAssembler::push_FPU_state() { + subptr(rsp, FPUStateSizeInWords * wordSize); +#ifndef _LP64 + fnsave(Address(rsp, 0)); + fwait(); +#else + fxsave(Address(rsp, 0)); +#endif // LP64 +} + +void MacroAssembler::push_IU_state() { + // Push flags first because pusha kills them + pushf(); + // Make sure rsp stays 16-byte aligned + LP64_ONLY(subq(rsp, 8)); + pusha(); +} + +void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp, bool clear_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rdi; + get_thread(java_thread); + } + // we must set sp to zero to clear frame + movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), NULL_WORD); + if (clear_fp) { + movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), NULL_WORD); + } + + if (clear_pc) + movptr(Address(java_thread, JavaThread::last_Java_pc_offset()), NULL_WORD); + +} + +void MacroAssembler::restore_rax(Register tmp) { + if (tmp == noreg) pop(rax); + else if (tmp != rax) mov(rax, tmp); +} + +void MacroAssembler::round_to(Register reg, int modulus) { + addptr(reg, modulus - 1); + andptr(reg, -modulus); +} + +void MacroAssembler::save_rax(Register tmp) { + if (tmp == noreg) push(rax); + else if (tmp != rax) mov(tmp, rax); +} + +// Write serialization page so VM thread can do a pseudo remote membar. +// We use the current thread pointer to calculate a thread specific +// offset to write to within the page. This minimizes bus traffic +// due to cache line collision. +void MacroAssembler::serialize_memory(Register thread, Register tmp) { + movl(tmp, thread); + shrl(tmp, os::get_serialize_page_shift_count()); + andl(tmp, (os::vm_page_size() - sizeof(int))); + + Address index(noreg, tmp, Address::times_1); + ExternalAddress page(os::get_memory_serialize_page()); + + // Size of store must match masking code above + movl(as_Address(ArrayAddress(page, index)), tmp); +} + +// Calls to C land +// +// When entering C land, the rbp, & rsp of the last Java frame have to be recorded +// in the (thread-local) JavaThread object. When leaving C land, the last Java fp +// has to be reset to 0. This is required to allow proper stack traversal. +void MacroAssembler::set_last_Java_frame(Register java_thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc) { + // determine java_thread register + if (!java_thread->is_valid()) { + java_thread = rdi; + get_thread(java_thread); + } + // determine last_java_sp register + if (!last_java_sp->is_valid()) { + last_java_sp = rsp; + } + + // last_java_fp is optional + + if (last_java_fp->is_valid()) { + movptr(Address(java_thread, JavaThread::last_Java_fp_offset()), last_java_fp); + } + + // last_java_pc is optional + + if (last_java_pc != NULL) { + lea(Address(java_thread, + JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()), + InternalAddress(last_java_pc)); + + } + movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); +} + +void MacroAssembler::shlptr(Register dst, int imm8) { + LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); +} + +void MacroAssembler::shrptr(Register dst, int imm8) { + LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); +} + +void MacroAssembler::sign_extend_byte(Register reg) { + if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { + movsbl(reg, reg); // movsxb + } else { + shll(reg, 24); + sarl(reg, 24); + } +} + +void MacroAssembler::sign_extend_short(Register reg) { + if (LP64_ONLY(true ||) VM_Version::is_P6()) { + movswl(reg, reg); // movsxw + } else { + shll(reg, 16); + sarl(reg, 16); + } +} + +void MacroAssembler::testl(Register dst, AddressLiteral src) { + assert(reachable(src), "Address should be reachable"); + testl(dst, as_Address(src)); +} + +void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::sqrtss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::sqrtss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::sqrtss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subsd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subsd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subsd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::subss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::subss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::subss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomisd(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomisd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomisd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) { + if (reachable(src)) { + Assembler::ucomiss(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::ucomiss(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorpd(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorpd(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::xorps(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::xorps(dst, Address(rscratch1, 0)); + } +} + +void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) { + // Used in sign-bit flipping with aligned address. + assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes"); + if (reachable(src)) { + Assembler::pshufb(dst, as_Address(src)); + } else { + lea(rscratch1, src); + Assembler::pshufb(dst, Address(rscratch1, 0)); + } +} + +// AVX 3-operands instructions + +void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vaddss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vaddss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { + if (reachable(src)) { + vandpd(dst, nds, as_Address(src), vector256); + } else { + lea(rscratch1, src); + vandpd(dst, nds, Address(rscratch1, 0), vector256); + } +} + +void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { + if (reachable(src)) { + vandps(dst, nds, as_Address(src), vector256); + } else { + lea(rscratch1, src); + vandps(dst, nds, Address(rscratch1, 0), vector256); + } +} + +void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vdivss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vdivss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vmulss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vmulss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubsd(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubsd(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src) { + if (reachable(src)) { + vsubss(dst, nds, as_Address(src)); + } else { + lea(rscratch1, src); + vsubss(dst, nds, Address(rscratch1, 0)); + } +} + +void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { + if (reachable(src)) { + vxorpd(dst, nds, as_Address(src), vector256); + } else { + lea(rscratch1, src); + vxorpd(dst, nds, Address(rscratch1, 0), vector256); + } +} + +void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) { + if (reachable(src)) { + vxorps(dst, nds, as_Address(src), vector256); + } else { + lea(rscratch1, src); + vxorps(dst, nds, Address(rscratch1, 0), vector256); + } +} + + +////////////////////////////////////////////////////////////////////////////////// +#ifndef SERIALGC + +void MacroAssembler::g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call) { + + // If expand_call is true then we expand the call_VM_leaf macro + // directly to skip generating the check by + // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. + +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + + Label done; + Label runtime; + + assert(pre_val != noreg, "check this code"); + + if (obj != noreg) { + assert_different_registers(obj, pre_val, tmp); + assert(pre_val != rax, "check this code"); + } + + Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_active())); + Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + + // Is marking active? + if (in_bytes(PtrQueue::byte_width_of_active()) == 4) { + cmpl(in_progress, 0); + } else { + assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption"); + cmpb(in_progress, 0); + } + jcc(Assembler::equal, done); + + // Do we need to load the previous value? + if (obj != noreg) { + load_heap_oop(pre_val, Address(obj, 0)); + } + + // Is the previous value null? + cmpptr(pre_val, (int32_t) NULL_WORD); + jcc(Assembler::equal, done); + + // Can we store original value in the thread's buffer? + // Is index == 0? + // (The index field is typed as size_t.) + + movptr(tmp, index); // tmp := *index_adr + cmpptr(tmp, 0); // tmp == 0? + jcc(Assembler::equal, runtime); // If yes, goto runtime + + subptr(tmp, wordSize); // tmp := tmp - wordSize + movptr(index, tmp); // *index_adr := tmp + addptr(tmp, buffer); // tmp := tmp + *buffer_adr + + // Record the previous value + movptr(Address(tmp, 0), pre_val); + jmp(done); + + bind(runtime); + // save the live input values + if(tosca_live) push(rax); + + if (obj != noreg && obj != rax) + push(obj); + + if (pre_val != rax) + push(pre_val); + + // Calling the runtime using the regular call_VM_leaf mechanism generates + // code (generated by InterpreterMacroAssember::call_VM_leaf_base) + // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. + // + // If we care generating the pre-barrier without a frame (e.g. in the + // intrinsified Reference.get() routine) then ebp might be pointing to + // the caller frame and so this check will most likely fail at runtime. + // + // Expanding the call directly bypasses the generation of the check. + // So when we do not have have a full interpreter frame on the stack + // expand_call should be passed true. + + NOT_LP64( push(thread); ) + + if (expand_call) { + LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) + pass_arg1(this, thread); + pass_arg0(this, pre_val); + MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2); + } else { + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread); + } + + NOT_LP64( pop(thread); ) + + // save the live input values + if (pre_val != rax) + pop(pre_val); + + if (obj != noreg && obj != rax) + pop(obj); + + if(tosca_live) pop(rax); + + bind(done); +} + +void MacroAssembler::g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2) { +#ifdef _LP64 + assert(thread == r15_thread, "must be"); +#endif // _LP64 + + Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_index())); + Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() + + PtrQueue::byte_offset_of_buf())); + + BarrierSet* bs = Universe::heap()->barrier_set(); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + Label done; + Label runtime; + + // Does store cross heap regions? + + movptr(tmp, store_addr); + xorptr(tmp, new_val); + shrptr(tmp, HeapRegion::LogOfHRGrainBytes); + jcc(Assembler::equal, done); + + // crosses regions, storing NULL? + + cmpptr(new_val, (int32_t) NULL_WORD); + jcc(Assembler::equal, done); + + // storing region crossing non-NULL, is card already dirty? + + ExternalAddress cardtable((address) ct->byte_map_base); + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); +#ifdef _LP64 + const Register card_addr = tmp; + + movq(card_addr, store_addr); + shrq(card_addr, CardTableModRefBS::card_shift); + + lea(tmp2, cardtable); + + // get the address of the card + addq(card_addr, tmp2); +#else + const Register card_index = tmp; + + movl(card_index, store_addr); + shrl(card_index, CardTableModRefBS::card_shift); + + Address index(noreg, card_index, Address::times_1); + const Register card_addr = tmp; + lea(card_addr, as_Address(ArrayAddress(cardtable, index))); +#endif + cmpb(Address(card_addr, 0), 0); + jcc(Assembler::equal, done); + + // storing a region crossing, non-NULL oop, card is clean. + // dirty card and log. + + movb(Address(card_addr, 0), 0); + + cmpl(queue_index, 0); + jcc(Assembler::equal, runtime); + subl(queue_index, wordSize); + movptr(tmp2, buffer); +#ifdef _LP64 + movslq(rscratch1, queue_index); + addq(tmp2, rscratch1); + movq(Address(tmp2, 0), card_addr); +#else + addl(tmp2, queue_index); + movl(Address(tmp2, 0), card_index); +#endif + jmp(done); + + bind(runtime); + // save the live input values + push(store_addr); + push(new_val); +#ifdef _LP64 + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, r15_thread); +#else + push(thread); + call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_post), card_addr, thread); + pop(thread); +#endif + pop(new_val); + pop(store_addr); + + bind(done); +} + +#endif // SERIALGC +////////////////////////////////////////////////////////////////////////////////// + + +void MacroAssembler::store_check(Register obj) { + // Does a store check for the oop in register obj. The content of + // register obj is destroyed afterwards. + store_check_part_1(obj); + store_check_part_2(obj); +} + +void MacroAssembler::store_check(Register obj, Address dst) { + store_check(obj); +} + + +// split the store check operation so that other instructions can be scheduled inbetween +void MacroAssembler::store_check_part_1(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + shrptr(obj, CardTableModRefBS::card_shift); +} + +void MacroAssembler::store_check_part_2(Register obj) { + BarrierSet* bs = Universe::heap()->barrier_set(); + assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind"); + CardTableModRefBS* ct = (CardTableModRefBS*)bs; + assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code"); + + // The calculation for byte_map_base is as follows: + // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift); + // So this essentially converts an address to a displacement and + // it will never need to be relocated. On 64bit however the value may be too + // large for a 32bit displacement + + intptr_t disp = (intptr_t) ct->byte_map_base; + if (is_simm32(disp)) { + Address cardtable(noreg, obj, Address::times_1, disp); + movb(cardtable, 0); + } else { + // By doing it as an ExternalAddress disp could be converted to a rip-relative + // displacement and done in a single instruction given favorable mapping and + // a smarter version of as_Address. Worst case it is two instructions which + // is no worse off then loading disp into a register and doing as a simple + // Address() as above. + // We can't do as ExternalAddress as the only style since if disp == 0 we'll + // assert since NULL isn't acceptable in a reloci (see 6644928). In any case + // in some cases we'll get a single instruction version. + + ExternalAddress cardtable((address)disp); + Address index(noreg, obj, Address::times_1); + movb(as_Address(ArrayAddress(cardtable, index)), 0); + } +} + +void MacroAssembler::subptr(Register dst, int32_t imm32) { + LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); +} + +// Force generation of a 4 byte immediate value even if it fits into 8bit +void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { + LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); +} + +void MacroAssembler::subptr(Register dst, Register src) { + LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); +} + +// C++ bool manipulation +void MacroAssembler::testbool(Register dst) { + if(sizeof(bool) == 1) + testb(dst, 0xff); + else if(sizeof(bool) == 2) { + // testw implementation needed for two byte bools + ShouldNotReachHere(); + } else if(sizeof(bool) == 4) + testl(dst, dst); + else + // unsupported + ShouldNotReachHere(); +} + +void MacroAssembler::testptr(Register dst, Register src) { + LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); +} + +// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. +void MacroAssembler::tlab_allocate(Register obj, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1, + Register t2, + Label& slow_case) { + assert_different_registers(obj, t1, t2); + assert_different_registers(obj, var_size_in_bytes, t1); + Register end = t2; + Register thread = NOT_LP64(t1) LP64_ONLY(r15_thread); + + verify_tlab(); + + NOT_LP64(get_thread(thread)); + + movptr(obj, Address(thread, JavaThread::tlab_top_offset())); + if (var_size_in_bytes == noreg) { + lea(end, Address(obj, con_size_in_bytes)); + } else { + lea(end, Address(obj, var_size_in_bytes, Address::times_1)); + } + cmpptr(end, Address(thread, JavaThread::tlab_end_offset())); + jcc(Assembler::above, slow_case); + + // update the tlab top pointer + movptr(Address(thread, JavaThread::tlab_top_offset()), end); + + // recover var_size_in_bytes if necessary + if (var_size_in_bytes == end) { + subptr(var_size_in_bytes, obj); + } + verify_tlab(); +} + +// Preserves rbx, and rdx. +Register MacroAssembler::tlab_refill(Label& retry, + Label& try_eden, + Label& slow_case) { + Register top = rax; + Register t1 = rcx; + Register t2 = rsi; + Register thread_reg = NOT_LP64(rdi) LP64_ONLY(r15_thread); + assert_different_registers(top, thread_reg, t1, t2, /* preserve: */ rbx, rdx); + Label do_refill, discard_tlab; + + if (CMSIncrementalMode || !Universe::heap()->supports_inline_contig_alloc()) { + // No allocation in the shared eden. + jmp(slow_case); + } + + NOT_LP64(get_thread(thread_reg)); + + movptr(top, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); + movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); + + // calculate amount of free space + subptr(t1, top); + shrptr(t1, LogHeapWordSize); + + // Retain tlab and allocate object in shared space if + // the amount free in the tlab is too large to discard. + cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset()))); + jcc(Assembler::lessEqual, discard_tlab); + + // Retain + // %%% yuck as movptr... + movptr(t2, (int32_t) ThreadLocalAllocBuffer::refill_waste_limit_increment()); + addptr(Address(thread_reg, in_bytes(JavaThread::tlab_refill_waste_limit_offset())), t2); + if (TLABStats) { + // increment number of slow_allocations + addl(Address(thread_reg, in_bytes(JavaThread::tlab_slow_allocations_offset())), 1); + } + jmp(try_eden); + + bind(discard_tlab); + if (TLABStats) { + // increment number of refills + addl(Address(thread_reg, in_bytes(JavaThread::tlab_number_of_refills_offset())), 1); + // accumulate wastage -- t1 is amount free in tlab + addl(Address(thread_reg, in_bytes(JavaThread::tlab_fast_refill_waste_offset())), t1); + } + + // if tlab is currently allocated (top or end != null) then + // fill [top, end + alignment_reserve) with array object + testptr(top, top); + jcc(Assembler::zero, do_refill); + + // set up the mark word + movptr(Address(top, oopDesc::mark_offset_in_bytes()), (intptr_t)markOopDesc::prototype()->copy_set_hash(0x2)); + // set the length to the remaining space + subptr(t1, typeArrayOopDesc::header_size(T_INT)); + addptr(t1, (int32_t)ThreadLocalAllocBuffer::alignment_reserve()); + shlptr(t1, log2_intptr(HeapWordSize/sizeof(jint))); + movl(Address(top, arrayOopDesc::length_offset_in_bytes()), t1); + // set klass to intArrayKlass + // dubious reloc why not an oop reloc? + movptr(t1, ExternalAddress((address)Universe::intArrayKlassObj_addr())); + // store klass last. concurrent gcs assumes klass length is valid if + // klass field is not null. + store_klass(top, t1); + + movptr(t1, top); + subptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); + incr_allocated_bytes(thread_reg, t1, 0); + + // refill the tlab with an eden allocation + bind(do_refill); + movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); + shlptr(t1, LogHeapWordSize); + // allocate new tlab, address returned in top + eden_allocate(top, t1, 0, t2, slow_case); + + // Check that t1 was preserved in eden_allocate. +#ifdef ASSERT + if (UseTLAB) { + Label ok; + Register tsize = rsi; + assert_different_registers(tsize, thread_reg, t1); + push(tsize); + movptr(tsize, Address(thread_reg, in_bytes(JavaThread::tlab_size_offset()))); + shlptr(tsize, LogHeapWordSize); + cmpptr(t1, tsize); + jcc(Assembler::equal, ok); + STOP("assert(t1 != tlab size)"); + should_not_reach_here(); + + bind(ok); + pop(tsize); + } +#endif + movptr(Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())), top); + movptr(Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())), top); + addptr(top, t1); + subptr(top, (int32_t)ThreadLocalAllocBuffer::alignment_reserve_in_bytes()); + movptr(Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())), top); + verify_tlab(); + jmp(retry); + + return thread_reg; // for use by caller +} + +void MacroAssembler::incr_allocated_bytes(Register thread, + Register var_size_in_bytes, + int con_size_in_bytes, + Register t1) { + if (!thread->is_valid()) { +#ifdef _LP64 + thread = r15_thread; +#else + assert(t1->is_valid(), "need temp reg"); + thread = t1; + get_thread(thread); +#endif + } + +#ifdef _LP64 + if (var_size_in_bytes->is_valid()) { + addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); + } else { + addq(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); + } +#else + if (var_size_in_bytes->is_valid()) { + addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), var_size_in_bytes); + } else { + addl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())), con_size_in_bytes); + } + adcl(Address(thread, in_bytes(JavaThread::allocated_bytes_offset())+4), 0); +#endif +} + +void MacroAssembler::fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use) { + pusha(); + + // if we are coming from c1, xmm registers may be live + int off = 0; + if (UseSSE == 1) { + subptr(rsp, sizeof(jdouble)*8); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm0); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm1); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm2); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm3); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm4); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm5); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm6); + movflt(Address(rsp,off++*sizeof(jdouble)),xmm7); + } else if (UseSSE >= 2) { +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + assert(UseAVX > 0, "256bit vectors are supported only with AVX"); + // Save upper half of YMM registes + subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + vextractf128h(Address(rsp, 0),xmm0); + vextractf128h(Address(rsp, 16),xmm1); + vextractf128h(Address(rsp, 32),xmm2); + vextractf128h(Address(rsp, 48),xmm3); + vextractf128h(Address(rsp, 64),xmm4); + vextractf128h(Address(rsp, 80),xmm5); + vextractf128h(Address(rsp, 96),xmm6); + vextractf128h(Address(rsp,112),xmm7); +#ifdef _LP64 + vextractf128h(Address(rsp,128),xmm8); + vextractf128h(Address(rsp,144),xmm9); + vextractf128h(Address(rsp,160),xmm10); + vextractf128h(Address(rsp,176),xmm11); + vextractf128h(Address(rsp,192),xmm12); + vextractf128h(Address(rsp,208),xmm13); + vextractf128h(Address(rsp,224),xmm14); + vextractf128h(Address(rsp,240),xmm15); +#endif + } +#endif + // Save whole 128bit (16 bytes) XMM regiters + subptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + movdqu(Address(rsp,off++*16),xmm0); + movdqu(Address(rsp,off++*16),xmm1); + movdqu(Address(rsp,off++*16),xmm2); + movdqu(Address(rsp,off++*16),xmm3); + movdqu(Address(rsp,off++*16),xmm4); + movdqu(Address(rsp,off++*16),xmm5); + movdqu(Address(rsp,off++*16),xmm6); + movdqu(Address(rsp,off++*16),xmm7); +#ifdef _LP64 + movdqu(Address(rsp,off++*16),xmm8); + movdqu(Address(rsp,off++*16),xmm9); + movdqu(Address(rsp,off++*16),xmm10); + movdqu(Address(rsp,off++*16),xmm11); + movdqu(Address(rsp,off++*16),xmm12); + movdqu(Address(rsp,off++*16),xmm13); + movdqu(Address(rsp,off++*16),xmm14); + movdqu(Address(rsp,off++*16),xmm15); +#endif + } + + // Preserve registers across runtime call + int incoming_argument_and_return_value_offset = -1; + if (num_fpu_regs_in_use > 1) { + // Must preserve all other FPU regs (could alternatively convert + // SharedRuntime::dsin, dcos etc. into assembly routines known not to trash + // FPU state, but can not trust C compiler) + NEEDS_CLEANUP; + // NOTE that in this case we also push the incoming argument(s) to + // the stack and restore it later; we also use this stack slot to + // hold the return value from dsin, dcos etc. + for (int i = 0; i < num_fpu_regs_in_use; i++) { + subptr(rsp, sizeof(jdouble)); + fstp_d(Address(rsp, 0)); + } + incoming_argument_and_return_value_offset = sizeof(jdouble)*(num_fpu_regs_in_use-1); + for (int i = nb_args-1; i >= 0; i--) { + fld_d(Address(rsp, incoming_argument_and_return_value_offset-i*sizeof(jdouble))); + } + } + + subptr(rsp, nb_args*sizeof(jdouble)); + for (int i = 0; i < nb_args; i++) { + fstp_d(Address(rsp, i*sizeof(jdouble))); + } + +#ifdef _LP64 + if (nb_args > 0) { + movdbl(xmm0, Address(rsp, 0)); + } + if (nb_args > 1) { + movdbl(xmm1, Address(rsp, sizeof(jdouble))); + } + assert(nb_args <= 2, "unsupported number of args"); +#endif // _LP64 + + // NOTE: we must not use call_VM_leaf here because that requires a + // complete interpreter frame in debug mode -- same bug as 4387334 + // MacroAssembler::call_VM_leaf_base is perfectly safe and will + // do proper 64bit abi + + NEEDS_CLEANUP; + // Need to add stack banging before this runtime call if it needs to + // be taken; however, there is no generic stack banging routine at + // the MacroAssembler level + + MacroAssembler::call_VM_leaf_base(runtime_entry, 0); + +#ifdef _LP64 + movsd(Address(rsp, 0), xmm0); + fld_d(Address(rsp, 0)); +#endif // _LP64 + addptr(rsp, sizeof(jdouble) * nb_args); + if (num_fpu_regs_in_use > 1) { + // Must save return value to stack and then restore entire FPU + // stack except incoming arguments + fstp_d(Address(rsp, incoming_argument_and_return_value_offset)); + for (int i = 0; i < num_fpu_regs_in_use - nb_args; i++) { + fld_d(Address(rsp, 0)); + addptr(rsp, sizeof(jdouble)); + } + fld_d(Address(rsp, (nb_args-1)*sizeof(jdouble))); + addptr(rsp, sizeof(jdouble) * nb_args); + } + + off = 0; + if (UseSSE == 1) { + movflt(xmm0, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm1, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm2, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm3, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm4, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm5, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm6, Address(rsp,off++*sizeof(jdouble))); + movflt(xmm7, Address(rsp,off++*sizeof(jdouble))); + addptr(rsp, sizeof(jdouble)*8); + } else if (UseSSE >= 2) { + // Restore whole 128bit (16 bytes) XMM regiters + movdqu(xmm0, Address(rsp,off++*16)); + movdqu(xmm1, Address(rsp,off++*16)); + movdqu(xmm2, Address(rsp,off++*16)); + movdqu(xmm3, Address(rsp,off++*16)); + movdqu(xmm4, Address(rsp,off++*16)); + movdqu(xmm5, Address(rsp,off++*16)); + movdqu(xmm6, Address(rsp,off++*16)); + movdqu(xmm7, Address(rsp,off++*16)); +#ifdef _LP64 + movdqu(xmm8, Address(rsp,off++*16)); + movdqu(xmm9, Address(rsp,off++*16)); + movdqu(xmm10, Address(rsp,off++*16)); + movdqu(xmm11, Address(rsp,off++*16)); + movdqu(xmm12, Address(rsp,off++*16)); + movdqu(xmm13, Address(rsp,off++*16)); + movdqu(xmm14, Address(rsp,off++*16)); + movdqu(xmm15, Address(rsp,off++*16)); +#endif + addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); +#ifdef COMPILER2 + if (MaxVectorSize > 16) { + // Restore upper half of YMM registes. + vinsertf128h(xmm0, Address(rsp, 0)); + vinsertf128h(xmm1, Address(rsp, 16)); + vinsertf128h(xmm2, Address(rsp, 32)); + vinsertf128h(xmm3, Address(rsp, 48)); + vinsertf128h(xmm4, Address(rsp, 64)); + vinsertf128h(xmm5, Address(rsp, 80)); + vinsertf128h(xmm6, Address(rsp, 96)); + vinsertf128h(xmm7, Address(rsp,112)); +#ifdef _LP64 + vinsertf128h(xmm8, Address(rsp,128)); + vinsertf128h(xmm9, Address(rsp,144)); + vinsertf128h(xmm10, Address(rsp,160)); + vinsertf128h(xmm11, Address(rsp,176)); + vinsertf128h(xmm12, Address(rsp,192)); + vinsertf128h(xmm13, Address(rsp,208)); + vinsertf128h(xmm14, Address(rsp,224)); + vinsertf128h(xmm15, Address(rsp,240)); +#endif + addptr(rsp, 16 * LP64_ONLY(16) NOT_LP64(8)); + } +#endif + } + popa(); +} + +static const double pi_4 = 0.7853981633974483; + +void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { + // A hand-coded argument reduction for values in fabs(pi/4, pi/2) + // was attempted in this code; unfortunately it appears that the + // switch to 80-bit precision and back causes this to be + // unprofitable compared with simply performing a runtime call if + // the argument is out of the (-pi/4, pi/4) range. + + Register tmp = noreg; + if (!VM_Version::supports_cmov()) { + // fcmp needs a temporary so preserve rbx, + tmp = rbx; + push(tmp); + } + + Label slow_case, done; + + ExternalAddress pi4_adr = (address)&pi_4; + if (reachable(pi4_adr)) { + // x ?<= pi/4 + fld_d(pi4_adr); + fld_s(1); // Stack: X PI/4 X + fabs(); // Stack: |X| PI/4 X + fcmp(tmp); + jcc(Assembler::above, slow_case); + + // fastest case: -pi/4 <= x <= pi/4 + switch(trig) { + case 's': + fsin(); + break; + case 'c': + fcos(); + break; + case 't': + ftan(); + break; + default: + assert(false, "bad intrinsic"); + break; + } + jmp(done); + } + + // slow case: runtime call + bind(slow_case); + + switch(trig) { + case 's': + { + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), 1, num_fpu_regs_in_use); + } + break; + case 'c': + { + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), 1, num_fpu_regs_in_use); + } + break; + case 't': + { + fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), 1, num_fpu_regs_in_use); + } + break; + default: + assert(false, "bad intrinsic"); + break; + } + + // Come here with result in F-TOS + bind(done); + + if (tmp != noreg) { + pop(tmp); + } +} + + +// Look up the method for a megamorphic invokeinterface call. +// The target method is determined by . +// The receiver klass is in recv_klass. +// On success, the result will be in method_result, and execution falls through. +// On failure, execution transfers to the given label. +void MacroAssembler::lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& L_no_such_interface) { + assert_different_registers(recv_klass, intf_klass, method_result, scan_temp); + assert(itable_index.is_constant() || itable_index.as_register() == method_result, + "caller must use same register for non-constant itable index as for method"); + + // Compute start of first itableOffsetEntry (which is at the end of the vtable) + int vtable_base = InstanceKlass::vtable_start_offset() * wordSize; + int itentry_off = itableMethodEntry::method_offset_in_bytes(); + int scan_step = itableOffsetEntry::size() * wordSize; + int vte_size = vtableEntry::size() * wordSize; + Address::ScaleFactor times_vte_scale = Address::times_ptr; + assert(vte_size == wordSize, "else adjust times_vte_scale"); + + movl(scan_temp, Address(recv_klass, InstanceKlass::vtable_length_offset() * wordSize)); + + // %%% Could store the aligned, prescaled offset in the klassoop. + lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base)); + if (HeapWordsPerLong > 1) { + // Round up to align_object_offset boundary + // see code for InstanceKlass::start_of_itable! + round_to(scan_temp, BytesPerLong); + } + + // Adjust recv_klass by scaled itable_index, so we can free itable_index. + assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); + lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off)); + + // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) { + // if (scan->interface() == intf) { + // result = (klass + scan->offset() + itable_index); + // } + // } + Label search, found_method; + + for (int peel = 1; peel >= 0; peel--) { + movptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); + cmpptr(intf_klass, method_result); + + if (peel) { + jccb(Assembler::equal, found_method); + } else { + jccb(Assembler::notEqual, search); + // (invert the test to fall through to found_method...) + } + + if (!peel) break; + + bind(search); + + // Check that the previous entry is non-null. A null entry means that + // the receiver class doesn't implement the interface, and wasn't the + // same as when the caller was compiled. + testptr(method_result, method_result); + jcc(Assembler::zero, L_no_such_interface); + addptr(scan_temp, scan_step); + } + + bind(found_method); + + // Got a hit. + movl(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); + movptr(method_result, Address(recv_klass, scan_temp, Address::times_1)); +} + + +// virtual method calling +void MacroAssembler::lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result) { + const int base = InstanceKlass::vtable_start_offset() * wordSize; + assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); + Address vtable_entry_addr(recv_klass, + vtable_index, Address::times_ptr, + base + vtableEntry::method_offset_in_bytes()); + movptr(method_result, vtable_entry_addr); +} + + +void MacroAssembler::check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success) { + Label L_failure; + check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); + check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); + bind(L_failure); +} + + +void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset) { + assert_different_registers(sub_klass, super_klass, temp_reg); + bool must_load_sco = (super_check_offset.constant_or_zero() == -1); + if (super_check_offset.is_register()) { + assert_different_registers(sub_klass, super_klass, + super_check_offset.as_register()); + } else if (must_load_sco) { + assert(temp_reg != noreg, "supply either a temp or a register offset"); + } + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + int sco_offset = in_bytes(Klass::super_check_offset_offset()); + Address super_check_offset_addr(super_klass, sco_offset); + + // Hacked jcc, which "knows" that L_fallthrough, at least, is in + // range of a jccb. If this routine grows larger, reconsider at + // least some of these. +#define local_jcc(assembler_cond, label) \ + if (&(label) == &L_fallthrough) jccb(assembler_cond, label); \ + else jcc( assembler_cond, label) /*omit semi*/ + + // Hacked jmp, which may only be used just before L_fallthrough. +#define final_jmp(label) \ + if (&(label) == &L_fallthrough) { /*do nothing*/ } \ + else jmp(label) /*omit semi*/ + + // If the pointers are equal, we are done (e.g., String[] elements). + // This self-check enables sharing of secondary supertype arrays among + // non-primary types such as array-of-interface. Otherwise, each such + // type would need its own customized SSA. + // We move this check to the front of the fast path because many + // type checks are in fact trivially successful in this manner, + // so we get a nicely predicted branch right at the start of the check. + cmpptr(sub_klass, super_klass); + local_jcc(Assembler::equal, *L_success); + + // Check the supertype display: + if (must_load_sco) { + // Positive movl does right thing on LP64. + movl(temp_reg, super_check_offset_addr); + super_check_offset = RegisterOrConstant(temp_reg); + } + Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); + cmpptr(super_klass, super_check_addr); // load displayed supertype + + // This check has worked decisively for primary supers. + // Secondary supers are sought in the super_cache ('super_cache_addr'). + // (Secondary supers are interfaces and very deeply nested subtypes.) + // This works in the same check above because of a tricky aliasing + // between the super_cache and the primary super display elements. + // (The 'super_check_addr' can address either, as the case requires.) + // Note that the cache is updated below if it does not help us find + // what we need immediately. + // So if it was a primary super, we can just fail immediately. + // Otherwise, it's the slow path for us (no success at this point). + + if (super_check_offset.is_register()) { + local_jcc(Assembler::equal, *L_success); + cmpl(super_check_offset.as_register(), sc_offset); + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_slow_path); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_slow_path); + } + } else if (super_check_offset.as_constant() == sc_offset) { + // Need a slow path; fast failure is impossible. + if (L_slow_path == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_slow_path); + final_jmp(*L_success); + } + } else { + // No slow path; it's a fast decision. + if (L_failure == &L_fallthrough) { + local_jcc(Assembler::equal, *L_success); + } else { + local_jcc(Assembler::notEqual, *L_failure); + final_jmp(*L_success); + } + } + + bind(L_fallthrough); + +#undef local_jcc +#undef final_jmp +} + + +void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes) { + assert_different_registers(sub_klass, super_klass, temp_reg); + if (temp2_reg != noreg) + assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); +#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) + + Label L_fallthrough; + int label_nulls = 0; + if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } + if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } + assert(label_nulls <= 1, "at most one NULL in the batch"); + + // a couple of useful fields in sub_klass: + int ss_offset = in_bytes(Klass::secondary_supers_offset()); + int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); + Address secondary_supers_addr(sub_klass, ss_offset); + Address super_cache_addr( sub_klass, sc_offset); + + // Do a linear scan of the secondary super-klass chain. + // This code is rarely used, so simplicity is a virtue here. + // The repne_scan instruction uses fixed registers, which we must spill. + // Don't worry too much about pre-existing connections with the input regs. + + assert(sub_klass != rax, "killed reg"); // killed by mov(rax, super) + assert(sub_klass != rcx, "killed reg"); // killed by lea(rcx, &pst_counter) + + // Get super_klass value into rax (even if it was in rdi or rcx). + bool pushed_rax = false, pushed_rcx = false, pushed_rdi = false; + if (super_klass != rax || UseCompressedOops) { + if (!IS_A_TEMP(rax)) { push(rax); pushed_rax = true; } + mov(rax, super_klass); + } + if (!IS_A_TEMP(rcx)) { push(rcx); pushed_rcx = true; } + if (!IS_A_TEMP(rdi)) { push(rdi); pushed_rdi = true; } + +#ifndef PRODUCT + int* pst_counter = &SharedRuntime::_partial_subtype_ctr; + ExternalAddress pst_counter_addr((address) pst_counter); + NOT_LP64( incrementl(pst_counter_addr) ); + LP64_ONLY( lea(rcx, pst_counter_addr) ); + LP64_ONLY( incrementl(Address(rcx, 0)) ); +#endif //PRODUCT + + // We will consult the secondary-super array. + movptr(rdi, secondary_supers_addr); + // Load the array length. (Positive movl does right thing on LP64.) + movl(rcx, Address(rdi, Array::length_offset_in_bytes())); + // Skip to start of data. + addptr(rdi, Array::base_offset_in_bytes()); + + // Scan RCX words at [RDI] for an occurrence of RAX. + // Set NZ/Z based on last compare. + // Z flag value will not be set by 'repne' if RCX == 0 since 'repne' does + // not change flags (only scas instruction which is repeated sets flags). + // Set Z = 0 (not equal) before 'repne' to indicate that class was not found. + + testptr(rax,rax); // Set Z = 0 + repne_scan(); + + // Unspill the temp. registers: + if (pushed_rdi) pop(rdi); + if (pushed_rcx) pop(rcx); + if (pushed_rax) pop(rax); + + if (set_cond_codes) { + // Special hack for the AD files: rdi is guaranteed non-zero. + assert(!pushed_rdi, "rdi must be left non-NULL"); + // Also, the condition codes are properly set Z/NZ on succeed/failure. + } + + if (L_failure == &L_fallthrough) + jccb(Assembler::notEqual, *L_failure); + else jcc(Assembler::notEqual, *L_failure); + + // Success. Cache the super we found and proceed in triumph. + movptr(super_cache_addr, super_klass); + + if (L_success != &L_fallthrough) { + jmp(*L_success); + } + +#undef IS_A_TEMP + + bind(L_fallthrough); +} + + +void MacroAssembler::cmov32(Condition cc, Register dst, Address src) { + if (VM_Version::supports_cmov()) { + cmovl(cc, dst, src); + } else { + Label L; + jccb(negate_condition(cc), L); + movl(dst, src); + bind(L); + } +} + +void MacroAssembler::cmov32(Condition cc, Register dst, Register src) { + if (VM_Version::supports_cmov()) { + cmovl(cc, dst, src); + } else { + Label L; + jccb(negate_condition(cc), L); + movl(dst, src); + bind(L); + } +} + +void MacroAssembler::verify_oop(Register reg, const char* s) { + if (!VerifyOops) return; + + // Pass register number to verify_oop_subroutine + char* b = new char[strlen(s) + 50]; + sprintf(b, "verify_oop: %s: %s", reg->name(), s); + BLOCK_COMMENT("verify_oop {"); +#ifdef _LP64 + push(rscratch1); // save r10, trashed by movptr() +#endif + push(rax); // save rax, + push(reg); // pass register argument + ExternalAddress buffer((address) b); + // avoid using pushptr, as it modifies scratch registers + // and our contract is not to modify anything + movptr(rax, buffer.addr()); + push(rax); + // call indirectly to solve generation ordering problem + movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + call(rax); + // Caller pops the arguments (oop, message) and restores rax, r10 + BLOCK_COMMENT("} verify_oop"); +} + + +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { + intptr_t value = *delayed_value_addr; + if (value != 0) + return RegisterOrConstant(value + offset); + + // load indirectly to solve generation ordering problem + movptr(tmp, ExternalAddress((address) delayed_value_addr)); + +#ifdef ASSERT + { Label L; + testptr(tmp, tmp); + if (WizardMode) { + jcc(Assembler::notZero, L); + char* buf = new char[40]; + sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]); + STOP(buf); + } else { + jccb(Assembler::notZero, L); + hlt(); + } + bind(L); + } +#endif + + if (offset != 0) + addptr(tmp, offset); + + return RegisterOrConstant(tmp); +} + + +Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, + int extra_slot_offset) { + // cf. TemplateTable::prepare_invoke(), if (load_receiver). + int stackElementSize = Interpreter::stackElementSize; + int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); +#ifdef ASSERT + int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); + assert(offset1 - offset == stackElementSize, "correct arithmetic"); +#endif + Register scale_reg = noreg; + Address::ScaleFactor scale_factor = Address::no_scale; + if (arg_slot.is_constant()) { + offset += arg_slot.as_constant() * stackElementSize; + } else { + scale_reg = arg_slot.as_register(); + scale_factor = Address::times(stackElementSize); + } + offset += wordSize; // return PC is on stack + return Address(rsp, scale_reg, scale_factor, offset); +} + + +void MacroAssembler::verify_oop_addr(Address addr, const char* s) { + if (!VerifyOops) return; + + // Address adjust(addr.base(), addr.index(), addr.scale(), addr.disp() + BytesPerWord); + // Pass register number to verify_oop_subroutine + char* b = new char[strlen(s) + 50]; + sprintf(b, "verify_oop_addr: %s", s); + +#ifdef _LP64 + push(rscratch1); // save r10, trashed by movptr() +#endif + push(rax); // save rax, + // addr may contain rsp so we will have to adjust it based on the push + // we just did (and on 64 bit we do two pushes) + // NOTE: 64bit seemed to have had a bug in that it did movq(addr, rax); which + // stores rax into addr which is backwards of what was intended. + if (addr.uses(rsp)) { + lea(rax, addr); + pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); + } else { + pushptr(addr); + } + + ExternalAddress buffer((address) b); + // pass msg argument + // avoid using pushptr, as it modifies scratch registers + // and our contract is not to modify anything + movptr(rax, buffer.addr()); + push(rax); + + // call indirectly to solve generation ordering problem + movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address())); + call(rax); + // Caller pops the arguments (addr, message) and restores rax, r10. +} + +void MacroAssembler::verify_tlab() { +#ifdef ASSERT + if (UseTLAB && VerifyOops) { + Label next, ok; + Register t1 = rsi; + Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); + + push(t1); + NOT_LP64(push(thread_reg)); + NOT_LP64(get_thread(thread_reg)); + + movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); + cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); + jcc(Assembler::aboveEqual, next); + STOP("assert(top >= start)"); + should_not_reach_here(); + + bind(next); + movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset()))); + cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); + jcc(Assembler::aboveEqual, ok); + STOP("assert(top <= end)"); + should_not_reach_here(); + + bind(ok); + NOT_LP64(pop(thread_reg)); + pop(t1); + } +#endif +} + +class ControlWord { + public: + int32_t _value; + + int rounding_control() const { return (_value >> 10) & 3 ; } + int precision_control() const { return (_value >> 8) & 3 ; } + bool precision() const { return ((_value >> 5) & 1) != 0; } + bool underflow() const { return ((_value >> 4) & 1) != 0; } + bool overflow() const { return ((_value >> 3) & 1) != 0; } + bool zero_divide() const { return ((_value >> 2) & 1) != 0; } + bool denormalized() const { return ((_value >> 1) & 1) != 0; } + bool invalid() const { return ((_value >> 0) & 1) != 0; } + + void print() const { + // rounding control + const char* rc; + switch (rounding_control()) { + case 0: rc = "round near"; break; + case 1: rc = "round down"; break; + case 2: rc = "round up "; break; + case 3: rc = "chop "; break; + }; + // precision control + const char* pc; + switch (precision_control()) { + case 0: pc = "24 bits "; break; + case 1: pc = "reserved"; break; + case 2: pc = "53 bits "; break; + case 3: pc = "64 bits "; break; + }; + // flags + char f[9]; + f[0] = ' '; + f[1] = ' '; + f[2] = (precision ()) ? 'P' : 'p'; + f[3] = (underflow ()) ? 'U' : 'u'; + f[4] = (overflow ()) ? 'O' : 'o'; + f[5] = (zero_divide ()) ? 'Z' : 'z'; + f[6] = (denormalized()) ? 'D' : 'd'; + f[7] = (invalid ()) ? 'I' : 'i'; + f[8] = '\x0'; + // output + printf("%04x masks = %s, %s, %s", _value & 0xFFFF, f, rc, pc); + } + +}; + +class StatusWord { + public: + int32_t _value; + + bool busy() const { return ((_value >> 15) & 1) != 0; } + bool C3() const { return ((_value >> 14) & 1) != 0; } + bool C2() const { return ((_value >> 10) & 1) != 0; } + bool C1() const { return ((_value >> 9) & 1) != 0; } + bool C0() const { return ((_value >> 8) & 1) != 0; } + int top() const { return (_value >> 11) & 7 ; } + bool error_status() const { return ((_value >> 7) & 1) != 0; } + bool stack_fault() const { return ((_value >> 6) & 1) != 0; } + bool precision() const { return ((_value >> 5) & 1) != 0; } + bool underflow() const { return ((_value >> 4) & 1) != 0; } + bool overflow() const { return ((_value >> 3) & 1) != 0; } + bool zero_divide() const { return ((_value >> 2) & 1) != 0; } + bool denormalized() const { return ((_value >> 1) & 1) != 0; } + bool invalid() const { return ((_value >> 0) & 1) != 0; } + + void print() const { + // condition codes + char c[5]; + c[0] = (C3()) ? '3' : '-'; + c[1] = (C2()) ? '2' : '-'; + c[2] = (C1()) ? '1' : '-'; + c[3] = (C0()) ? '0' : '-'; + c[4] = '\x0'; + // flags + char f[9]; + f[0] = (error_status()) ? 'E' : '-'; + f[1] = (stack_fault ()) ? 'S' : '-'; + f[2] = (precision ()) ? 'P' : '-'; + f[3] = (underflow ()) ? 'U' : '-'; + f[4] = (overflow ()) ? 'O' : '-'; + f[5] = (zero_divide ()) ? 'Z' : '-'; + f[6] = (denormalized()) ? 'D' : '-'; + f[7] = (invalid ()) ? 'I' : '-'; + f[8] = '\x0'; + // output + printf("%04x flags = %s, cc = %s, top = %d", _value & 0xFFFF, f, c, top()); + } + +}; + +class TagWord { + public: + int32_t _value; + + int tag_at(int i) const { return (_value >> (i*2)) & 3; } + + void print() const { + printf("%04x", _value & 0xFFFF); + } + +}; + +class FPU_Register { + public: + int32_t _m0; + int32_t _m1; + int16_t _ex; + + bool is_indefinite() const { + return _ex == -1 && _m1 == (int32_t)0xC0000000 && _m0 == 0; + } + + void print() const { + char sign = (_ex < 0) ? '-' : '+'; + const char* kind = (_ex == 0x7FFF || _ex == (int16_t)-1) ? "NaN" : " "; + printf("%c%04hx.%08x%08x %s", sign, _ex, _m1, _m0, kind); + }; + +}; + +class FPU_State { + public: + enum { + register_size = 10, + number_of_registers = 8, + register_mask = 7 + }; + + ControlWord _control_word; + StatusWord _status_word; + TagWord _tag_word; + int32_t _error_offset; + int32_t _error_selector; + int32_t _data_offset; + int32_t _data_selector; + int8_t _register[register_size * number_of_registers]; + + int tag_for_st(int i) const { return _tag_word.tag_at((_status_word.top() + i) & register_mask); } + FPU_Register* st(int i) const { return (FPU_Register*)&_register[register_size * i]; } + + const char* tag_as_string(int tag) const { + switch (tag) { + case 0: return "valid"; + case 1: return "zero"; + case 2: return "special"; + case 3: return "empty"; + } + ShouldNotReachHere(); + return NULL; + } + + void print() const { + // print computation registers + { int t = _status_word.top(); + for (int i = 0; i < number_of_registers; i++) { + int j = (i - t) & register_mask; + printf("%c r%d = ST%d = ", (j == 0 ? '*' : ' '), i, j); + st(j)->print(); + printf(" %s\n", tag_as_string(_tag_word.tag_at(i))); + } + } + printf("\n"); + // print control registers + printf("ctrl = "); _control_word.print(); printf("\n"); + printf("stat = "); _status_word .print(); printf("\n"); + printf("tags = "); _tag_word .print(); printf("\n"); + } + +}; + +class Flag_Register { + public: + int32_t _value; + + bool overflow() const { return ((_value >> 11) & 1) != 0; } + bool direction() const { return ((_value >> 10) & 1) != 0; } + bool sign() const { return ((_value >> 7) & 1) != 0; } + bool zero() const { return ((_value >> 6) & 1) != 0; } + bool auxiliary_carry() const { return ((_value >> 4) & 1) != 0; } + bool parity() const { return ((_value >> 2) & 1) != 0; } + bool carry() const { return ((_value >> 0) & 1) != 0; } + + void print() const { + // flags + char f[8]; + f[0] = (overflow ()) ? 'O' : '-'; + f[1] = (direction ()) ? 'D' : '-'; + f[2] = (sign ()) ? 'S' : '-'; + f[3] = (zero ()) ? 'Z' : '-'; + f[4] = (auxiliary_carry()) ? 'A' : '-'; + f[5] = (parity ()) ? 'P' : '-'; + f[6] = (carry ()) ? 'C' : '-'; + f[7] = '\x0'; + // output + printf("%08x flags = %s", _value, f); + } + +}; + +class IU_Register { + public: + int32_t _value; + + void print() const { + printf("%08x %11d", _value, _value); + } + +}; + +class IU_State { + public: + Flag_Register _eflags; + IU_Register _rdi; + IU_Register _rsi; + IU_Register _rbp; + IU_Register _rsp; + IU_Register _rbx; + IU_Register _rdx; + IU_Register _rcx; + IU_Register _rax; + + void print() const { + // computation registers + printf("rax, = "); _rax.print(); printf("\n"); + printf("rbx, = "); _rbx.print(); printf("\n"); + printf("rcx = "); _rcx.print(); printf("\n"); + printf("rdx = "); _rdx.print(); printf("\n"); + printf("rdi = "); _rdi.print(); printf("\n"); + printf("rsi = "); _rsi.print(); printf("\n"); + printf("rbp, = "); _rbp.print(); printf("\n"); + printf("rsp = "); _rsp.print(); printf("\n"); + printf("\n"); + // control registers + printf("flgs = "); _eflags.print(); printf("\n"); + } +}; + + +class CPU_State { + public: + FPU_State _fpu_state; + IU_State _iu_state; + + void print() const { + printf("--------------------------------------------------\n"); + _iu_state .print(); + printf("\n"); + _fpu_state.print(); + printf("--------------------------------------------------\n"); + } + +}; + + +static void _print_CPU_state(CPU_State* state) { + state->print(); +}; + + +void MacroAssembler::print_CPU_state() { + push_CPU_state(); + push(rsp); // pass CPU state + call(RuntimeAddress(CAST_FROM_FN_PTR(address, _print_CPU_state))); + addptr(rsp, wordSize); // discard argument + pop_CPU_state(); +} + + +static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { + static int counter = 0; + FPU_State* fs = &state->_fpu_state; + counter++; + // For leaf calls, only verify that the top few elements remain empty. + // We only need 1 empty at the top for C2 code. + if( stack_depth < 0 ) { + if( fs->tag_for_st(7) != 3 ) { + printf("FPR7 not empty\n"); + state->print(); + assert(false, "error"); + return false; + } + return true; // All other stack states do not matter + } + + assert((fs->_control_word._value & 0xffff) == StubRoutines::_fpu_cntrl_wrd_std, + "bad FPU control word"); + + // compute stack depth + int i = 0; + while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; + int d = i; + while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; + // verify findings + if (i != FPU_State::number_of_registers) { + // stack not contiguous + printf("%s: stack not contiguous at ST%d\n", s, i); + state->print(); + assert(false, "error"); + return false; + } + // check if computed stack depth corresponds to expected stack depth + if (stack_depth < 0) { + // expected stack depth is -stack_depth or less + if (d > -stack_depth) { + // too many elements on the stack + printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); + state->print(); + assert(false, "error"); + return false; + } + } else { + // expected stack depth is stack_depth + if (d != stack_depth) { + // wrong stack depth + printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); + state->print(); + assert(false, "error"); + return false; + } + } + // everything is cool + return true; +} + + +void MacroAssembler::verify_FPU(int stack_depth, const char* s) { + if (!VerifyFPU) return; + push_CPU_state(); + push(rsp); // pass CPU state + ExternalAddress msg((address) s); + // pass message string s + pushptr(msg.addr()); + push(stack_depth); // pass stack depth + call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); + addptr(rsp, 3 * wordSize); // discard arguments + // check for error + { Label L; + testl(rax, rax); + jcc(Assembler::notZero, L); + int3(); // break if error condition + bind(L); + } + pop_CPU_state(); +} + +void MacroAssembler::load_klass(Register dst, Register src) { +#ifdef _LP64 + if (UseCompressedKlassPointers) { + movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); + decode_klass_not_null(dst); + } else +#endif + movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); +} + +void MacroAssembler::load_prototype_header(Register dst, Register src) { +#ifdef _LP64 + if (UseCompressedKlassPointers) { + assert (Universe::heap() != NULL, "java heap should be initialized"); + movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + movq(dst, Address(r12_heapbase, dst, Address::times_8, Klass::prototype_header_offset())); + } else { + movq(dst, Address(dst, Klass::prototype_header_offset())); + } + } else +#endif + { + movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); + movptr(dst, Address(dst, Klass::prototype_header_offset())); + } +} + +void MacroAssembler::store_klass(Register dst, Register src) { +#ifdef _LP64 + if (UseCompressedKlassPointers) { + encode_klass_not_null(src); + movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); + } else +#endif + movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); +} + +void MacroAssembler::load_heap_oop(Register dst, Address src) { +#ifdef _LP64 + // FIXME: Must change all places where we try to load the klass. + if (UseCompressedOops) { + movl(dst, src); + decode_heap_oop(dst); + } else +#endif + movptr(dst, src); +} + +// Doesn't do verfication, generates fixed size code +void MacroAssembler::load_heap_oop_not_null(Register dst, Address src) { +#ifdef _LP64 + if (UseCompressedOops) { + movl(dst, src); + decode_heap_oop_not_null(dst); + } else +#endif + movptr(dst, src); +} + +void MacroAssembler::store_heap_oop(Address dst, Register src) { +#ifdef _LP64 + if (UseCompressedOops) { + assert(!dst.uses(src), "not enough registers"); + encode_heap_oop(src); + movl(dst, src); + } else +#endif + movptr(dst, src); +} + +void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) { + assert_different_registers(src1, tmp); +#ifdef _LP64 + if (UseCompressedOops) { + bool did_push = false; + if (tmp == noreg) { + tmp = rax; + push(tmp); + did_push = true; + assert(!src2.uses(rsp), "can't push"); + } + load_heap_oop(tmp, src2); + cmpptr(src1, tmp); + if (did_push) pop(tmp); + } else +#endif + cmpptr(src1, src2); +} + +// Used for storing NULLs. +void MacroAssembler::store_heap_oop_null(Address dst) { +#ifdef _LP64 + if (UseCompressedOops) { + movl(dst, (int32_t)NULL_WORD); + } else { + movslq(dst, (int32_t)NULL_WORD); + } +#else + movl(dst, (int32_t)NULL_WORD); +#endif +} + +#ifdef _LP64 +void MacroAssembler::store_klass_gap(Register dst, Register src) { + if (UseCompressedKlassPointers) { + // Store to klass gap in destination + movl(Address(dst, oopDesc::klass_gap_offset_in_bytes()), src); + } +} + +#ifdef ASSERT +void MacroAssembler::verify_heapbase(const char* msg) { + assert (UseCompressedOops || UseCompressedKlassPointers, "should be compressed"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + if (CheckCompressedOops) { + Label ok; + push(rscratch1); // cmpptr trashes rscratch1 + cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + jcc(Assembler::equal, ok); + STOP(msg); + bind(ok); + pop(rscratch1); + } +} +#endif + +// Algorithm must match oop.inline.hpp encode_heap_oop. +void MacroAssembler::encode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop: heap base corrupted?"); +#endif + verify_oop(r, "broken oop in encode_heap_oop"); + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shrq(r, LogMinObjAlignmentInBytes); + } + return; + } + testq(r, r); + cmovq(Assembler::equal, r, r12_heapbase); + subq(r, r12_heapbase); + shrq(r, LogMinObjAlignmentInBytes); +} + +void MacroAssembler::encode_heap_oop_not_null(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + testq(r, r); + jcc(Assembler::notEqual, ok); + STOP("null oop passed to encode_heap_oop_not_null"); + bind(ok); + } +#endif + verify_oop(r, "broken oop in encode_heap_oop_not_null"); + if (Universe::narrow_oop_base() != NULL) { + subq(r, r12_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shrq(r, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?"); + if (CheckCompressedOops) { + Label ok; + testq(src, src); + jcc(Assembler::notEqual, ok); + STOP("null oop passed to encode_heap_oop_not_null2"); + bind(ok); + } +#endif + verify_oop(src, "broken oop in encode_heap_oop_not_null2"); + if (dst != src) { + movq(dst, src); + } + if (Universe::narrow_oop_base() != NULL) { + subq(dst, r12_heapbase); + } + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shrq(dst, LogMinObjAlignmentInBytes); + } +} + +void MacroAssembler::decode_heap_oop(Register r) { +#ifdef ASSERT + verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?"); +#endif + if (Universe::narrow_oop_base() == NULL) { + if (Universe::narrow_oop_shift() != 0) { + assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shlq(r, LogMinObjAlignmentInBytes); + } + } else { + Label done; + shlq(r, LogMinObjAlignmentInBytes); + jccb(Assembler::equal, done); + addq(r, r12_heapbase); + bind(done); + } + verify_oop(r, "broken oop in decode_heap_oop"); +} + +void MacroAssembler::decode_heap_oop_not_null(Register r) { + // Note: it will change flags + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + shlq(r, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(r, r12_heapbase); + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { + // Note: it will change flags + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (LogMinObjAlignmentInBytes == Address::times_8) { + leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + } else { + if (dst != src) { + movq(dst, src); + } + shlq(dst, LogMinObjAlignmentInBytes); + if (Universe::narrow_oop_base() != NULL) { + addq(dst, r12_heapbase); + } + } + } else { + assert (Universe::narrow_oop_base() == NULL, "sanity"); + if (dst != src) { + movq(dst, src); + } + } +} + +void MacroAssembler::encode_klass_not_null(Register r) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_klass_not_null: heap base corrupted?"); +#endif + if (Universe::narrow_klass_base() != NULL) { + subq(r, r12_heapbase); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shrq(r, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::encode_klass_not_null(Register dst, Register src) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); +#ifdef ASSERT + verify_heapbase("MacroAssembler::encode_klass_not_null2: heap base corrupted?"); +#endif + if (dst != src) { + movq(dst, src); + } + if (Universe::narrow_klass_base() != NULL) { + subq(dst, r12_heapbase); + } + if (Universe::narrow_klass_shift() != 0) { + assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shrq(dst, LogKlassAlignmentInBytes); + } +} + +void MacroAssembler::decode_klass_not_null(Register r) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + // Note: it will change flags + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + shlq(r, LogKlassAlignmentInBytes); + if (Universe::narrow_klass_base() != NULL) { + addq(r, r12_heapbase); + } + } else { + assert (Universe::narrow_klass_base() == NULL, "sanity"); + } +} + +void MacroAssembler::decode_klass_not_null(Register dst, Register src) { + assert(Metaspace::is_initialized(), "metaspace should be initialized"); + // Note: it will change flags + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. + if (Universe::narrow_klass_shift() != 0) { + assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); + assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); + leaq(dst, Address(r12_heapbase, src, Address::times_8, 0)); + } else { + assert (Universe::narrow_klass_base() == NULL, "sanity"); + if (dst != src) { + movq(dst, src); + } + } +} + +void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + mov_narrow_oop(dst, oop_index, rspec); +} + +void MacroAssembler::set_narrow_oop(Address dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + mov_narrow_oop(dst, oop_index, rspec); +} + +void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); +} + +void MacroAssembler::set_narrow_klass(Address dst, Klass* k) { + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + mov_narrow_oop(dst, oopDesc::encode_klass(k), rspec); +} + +void MacroAssembler::cmp_narrow_oop(Register dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + Assembler::cmp_narrow_oop(dst, oop_index, rspec); +} + +void MacroAssembler::cmp_narrow_oop(Address dst, jobject obj) { + assert (UseCompressedOops, "should only be used for compressed headers"); + assert (Universe::heap() != NULL, "java heap should be initialized"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int oop_index = oop_recorder()->find_index(obj); + RelocationHolder rspec = oop_Relocation::spec(oop_index); + Assembler::cmp_narrow_oop(dst, oop_index, rspec); +} + +void MacroAssembler::cmp_narrow_klass(Register dst, Klass* k) { + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); +} + +void MacroAssembler::cmp_narrow_klass(Address dst, Klass* k) { + assert (UseCompressedKlassPointers, "should only be used for compressed headers"); + assert (oop_recorder() != NULL, "this assembler needs an OopRecorder"); + int klass_index = oop_recorder()->find_index(k); + RelocationHolder rspec = metadata_Relocation::spec(klass_index); + Assembler::cmp_narrow_oop(dst, oopDesc::encode_klass(k), rspec); +} + +void MacroAssembler::reinit_heapbase() { + if (UseCompressedOops || UseCompressedKlassPointers) { + movptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr())); + } +} +#endif // _LP64 + + +// C2 compiled method's prolog code. +void MacroAssembler::verified_entry(int framesize, bool stack_bang, bool fp_mode_24b) { + + // WARNING: Initial instruction MUST be 5 bytes or longer so that + // NativeJump::patch_verified_entry will be able to patch out the entry + // code safely. The push to verify stack depth is ok at 5 bytes, + // the frame allocation can be either 3 or 6 bytes. So if we don't do + // stack bang then we must use the 6 byte frame allocation even if + // we have no frame. :-( + + assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); + // Remove word for return addr + framesize -= wordSize; + + // Calls to C2R adapters often do not accept exceptional returns. + // We require that their callers must bang for them. But be careful, because + // some VM calls (such as call site linkage) can use several kilobytes of + // stack. But the stack safety zone should account for that. + // See bugs 4446381, 4468289, 4497237. + if (stack_bang) { + generate_stack_overflow_check(framesize); + + // We always push rbp, so that on return to interpreter rbp, will be + // restored correctly and we can correct the stack. + push(rbp); + // Remove word for ebp + framesize -= wordSize; + + // Create frame + if (framesize) { + subptr(rsp, framesize); + } + } else { + // Create frame (force generation of a 4 byte immediate value) + subptr_imm32(rsp, framesize); + + // Save RBP register now. + framesize -= wordSize; + movptr(Address(rsp, framesize), rbp); + } + + if (VerifyStackAtCalls) { // Majik cookie to verify stack depth + framesize -= wordSize; + movptr(Address(rsp, framesize), (int32_t)0xbadb100d); + } + +#ifndef _LP64 + // If method sets FPU control word do it now + if (fp_mode_24b) { + fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24())); + } + if (UseSSE >= 2 && VerifyFPU) { + verify_FPU(0, "FPU stack must be clean on entry"); + } +#endif + +#ifdef ASSERT + if (VerifyStackAtCalls) { + Label L; + push(rax); + mov(rax, rsp); + andptr(rax, StackAlignmentInBytes-1); + cmpptr(rax, StackAlignmentInBytes-wordSize); + pop(rax); + jcc(Assembler::equal, L); + STOP("Stack is not properly aligned!"); + bind(L); + } +#endif + +} + + +// IndexOf for constant substrings with size >= 8 chars +// which don't need to be loaded through stack. +void MacroAssembler::string_indexofC8(Register str1, Register str2, + Register cnt1, Register cnt2, + int int_cnt2, Register result, + XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); + assert(UseSSE42Intrinsics, "SSE4.2 is required"); + + // This method uses pcmpestri inxtruction with bound registers + // inputs: + // xmm - substring + // rax - substring length (elements count) + // mem - scanned string + // rdx - string length (elements count) + // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) + // outputs: + // rcx - matched index in string + assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); + + Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, + RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR, + MATCH_SUBSTR_HEAD, RELOAD_STR, FOUND_CANDIDATE; + + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars"); + + // Load substring. + movdqu(vec, Address(str2, 0)); + movl(cnt2, int_cnt2); + movptr(result, str1); // string addr + + if (int_cnt2 > 8) { + jmpb(SCAN_TO_SUBSTR); + + // Reload substr for rescan, this code + // is executed only for large substrings (> 8 chars) + bind(RELOAD_SUBSTR); + movdqu(vec, Address(str2, 0)); + negptr(cnt2); // Jumped here with negative cnt2, convert to positive + + bind(RELOAD_STR); + // We came here after the beginning of the substring was + // matched but the rest of it was not so we need to search + // again. Start from the next element after the previous match. + + // cnt2 is number of substring reminding elements and + // cnt1 is number of string reminding elements when cmp failed. + // Restored cnt1 = cnt1 - cnt2 + int_cnt2 + subl(cnt1, cnt2); + addl(cnt1, int_cnt2); + movl(cnt2, int_cnt2); // Now restore cnt2 + + decrementl(cnt1); // Shift to next element + cmpl(cnt1, cnt2); + jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring + + addptr(result, 2); + + } // (int_cnt2 > 8) + + // Scan string for start of substr in 16-byte vectors + bind(SCAN_TO_SUBSTR); + pcmpestri(vec, Address(result, 0), 0x0d); + jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 + subl(cnt1, 8); + jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string + cmpl(cnt1, cnt2); + jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring + addptr(result, 16); + jmpb(SCAN_TO_SUBSTR); + + // Found a potential substr + bind(FOUND_CANDIDATE); + // Matched whole vector if first element matched (tmp(rcx) == 0). + if (int_cnt2 == 8) { + jccb(Assembler::overflow, RET_FOUND); // OF == 1 + } else { // int_cnt2 > 8 + jccb(Assembler::overflow, FOUND_SUBSTR); + } + // After pcmpestri tmp(rcx) contains matched element index + // Compute start addr of substr + lea(result, Address(result, tmp, Address::times_2)); + + // Make sure string is still long enough + subl(cnt1, tmp); + cmpl(cnt1, cnt2); + if (int_cnt2 == 8) { + jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); + } else { // int_cnt2 > 8 + jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD); + } + // Left less then substring. + + bind(RET_NOT_FOUND); + movl(result, -1); + jmpb(EXIT); + + if (int_cnt2 > 8) { + // This code is optimized for the case when whole substring + // is matched if its head is matched. + bind(MATCH_SUBSTR_HEAD); + pcmpestri(vec, Address(result, 0), 0x0d); + // Reload only string if does not match + jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0 + + Label CONT_SCAN_SUBSTR; + // Compare the rest of substring (> 8 chars). + bind(FOUND_SUBSTR); + // First 8 chars are already matched. + negptr(cnt2); + addptr(cnt2, 8); + + bind(SCAN_SUBSTR); + subl(cnt1, 8); + cmpl(cnt2, -8); // Do not read beyond substring + jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR); + // Back-up strings to avoid reading beyond substring: + // cnt1 = cnt1 - cnt2 + 8 + addl(cnt1, cnt2); // cnt2 is negative + addl(cnt1, 8); + movl(cnt2, 8); negptr(cnt2); + bind(CONT_SCAN_SUBSTR); + if (int_cnt2 < (int)G) { + movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2)); + pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d); + } else { + // calculate index in register to avoid integer overflow (int_cnt2*2) + movl(tmp, int_cnt2); + addptr(tmp, cnt2); + movdqu(vec, Address(str2, tmp, Address::times_2, 0)); + pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d); + } + // Need to reload strings pointers if not matched whole vector + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 + addptr(cnt2, 8); + jcc(Assembler::negative, SCAN_SUBSTR); + // Fall through if found full substring + + } // (int_cnt2 > 8) + + bind(RET_FOUND); + // Found result if we matched full small substring. + // Compute substr offset + subptr(result, str1); + shrl(result, 1); // index + bind(EXIT); + +} // string_indexofC8 + +// Small strings are loaded through stack if they cross page boundary. +void MacroAssembler::string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + int int_cnt2, Register result, + XMMRegister vec, Register tmp) { + ShortBranchVerifier sbv(this); + assert(UseSSE42Intrinsics, "SSE4.2 is required"); + // + // int_cnt2 is length of small (< 8 chars) constant substring + // or (-1) for non constant substring in which case its length + // is in cnt2 register. + // + // Note, inline_string_indexOf() generates checks: + // if (substr.count > string.count) return -1; + // if (substr.count == 0) return 0; + // + assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0"); + + // This method uses pcmpestri inxtruction with bound registers + // inputs: + // xmm - substring + // rax - substring length (elements count) + // mem - scanned string + // rdx - string length (elements count) + // 0xd - mode: 1100 (substring search) + 01 (unsigned shorts) + // outputs: + // rcx - matched index in string + assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); + + Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR, + RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR, + FOUND_CANDIDATE; + + { //======================================================== + // We don't know where these strings are located + // and we can't read beyond them. Load them through stack. + Label BIG_STRINGS, CHECK_STR, COPY_SUBSTR, COPY_STR; + + movptr(tmp, rsp); // save old SP + + if (int_cnt2 > 0) { // small (< 8 chars) constant substring + if (int_cnt2 == 1) { // One char + load_unsigned_short(result, Address(str2, 0)); + movdl(vec, result); // move 32 bits + } else if (int_cnt2 == 2) { // Two chars + movdl(vec, Address(str2, 0)); // move 32 bits + } else if (int_cnt2 == 4) { // Four chars + movq(vec, Address(str2, 0)); // move 64 bits + } else { // cnt2 = { 3, 5, 6, 7 } + // Array header size is 12 bytes in 32-bit VM + // + 6 bytes for 3 chars == 18 bytes, + // enough space to load vec and shift. + assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity"); + movdqu(vec, Address(str2, (int_cnt2*2)-16)); + psrldq(vec, 16-(int_cnt2*2)); + } + } else { // not constant substring + cmpl(cnt2, 8); + jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough + + // We can read beyond string if srt+16 does not cross page boundary + // since heaps are aligned and mapped by pages. + assert(os::vm_page_size() < (int)G, "default page should be small"); + movl(result, str2); // We need only low 32 bits + andl(result, (os::vm_page_size()-1)); + cmpl(result, (os::vm_page_size()-16)); + jccb(Assembler::belowEqual, CHECK_STR); + + // Move small strings to stack to allow load 16 bytes into vec. + subptr(rsp, 16); + int stk_offset = wordSize-2; + push(cnt2); + + bind(COPY_SUBSTR); + load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2)); + movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); + decrement(cnt2); + jccb(Assembler::notZero, COPY_SUBSTR); + + pop(cnt2); + movptr(str2, rsp); // New substring address + } // non constant + + bind(CHECK_STR); + cmpl(cnt1, 8); + jccb(Assembler::aboveEqual, BIG_STRINGS); + + // Check cross page boundary. + movl(result, str1); // We need only low 32 bits + andl(result, (os::vm_page_size()-1)); + cmpl(result, (os::vm_page_size()-16)); + jccb(Assembler::belowEqual, BIG_STRINGS); + + subptr(rsp, 16); + int stk_offset = -2; + if (int_cnt2 < 0) { // not constant + push(cnt2); + stk_offset += wordSize; + } + movl(cnt2, cnt1); + + bind(COPY_STR); + load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2)); + movw(Address(rsp, cnt2, Address::times_2, stk_offset), result); + decrement(cnt2); + jccb(Assembler::notZero, COPY_STR); + + if (int_cnt2 < 0) { // not constant + pop(cnt2); + } + movptr(str1, rsp); // New string address + + bind(BIG_STRINGS); + // Load substring. + if (int_cnt2 < 0) { // -1 + movdqu(vec, Address(str2, 0)); + push(cnt2); // substr count + push(str2); // substr addr + push(str1); // string addr + } else { + // Small (< 8 chars) constant substrings are loaded already. + movl(cnt2, int_cnt2); + } + push(tmp); // original SP + + } // Finished loading + + //======================================================== + // Start search + // + + movptr(result, str1); // string addr + + if (int_cnt2 < 0) { // Only for non constant substring + jmpb(SCAN_TO_SUBSTR); + + // SP saved at sp+0 + // String saved at sp+1*wordSize + // Substr saved at sp+2*wordSize + // Substr count saved at sp+3*wordSize + + // Reload substr for rescan, this code + // is executed only for large substrings (> 8 chars) + bind(RELOAD_SUBSTR); + movptr(str2, Address(rsp, 2*wordSize)); + movl(cnt2, Address(rsp, 3*wordSize)); + movdqu(vec, Address(str2, 0)); + // We came here after the beginning of the substring was + // matched but the rest of it was not so we need to search + // again. Start from the next element after the previous match. + subptr(str1, result); // Restore counter + shrl(str1, 1); + addl(cnt1, str1); + decrementl(cnt1); // Shift to next element + cmpl(cnt1, cnt2); + jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring + + addptr(result, 2); + } // non constant + + // Scan string for start of substr in 16-byte vectors + bind(SCAN_TO_SUBSTR); + assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri"); + pcmpestri(vec, Address(result, 0), 0x0d); + jccb(Assembler::below, FOUND_CANDIDATE); // CF == 1 + subl(cnt1, 8); + jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string + cmpl(cnt1, cnt2); + jccb(Assembler::negative, RET_NOT_FOUND); // Left less then substring + addptr(result, 16); + + bind(ADJUST_STR); + cmpl(cnt1, 8); // Do not read beyond string + jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR); + // Back-up string to avoid reading beyond string. + lea(result, Address(result, cnt1, Address::times_2, -16)); + movl(cnt1, 8); + jmpb(SCAN_TO_SUBSTR); + + // Found a potential substr + bind(FOUND_CANDIDATE); + // After pcmpestri tmp(rcx) contains matched element index + + // Make sure string is still long enough + subl(cnt1, tmp); + cmpl(cnt1, cnt2); + jccb(Assembler::greaterEqual, FOUND_SUBSTR); + // Left less then substring. + + bind(RET_NOT_FOUND); + movl(result, -1); + jmpb(CLEANUP); + + bind(FOUND_SUBSTR); + // Compute start addr of substr + lea(result, Address(result, tmp, Address::times_2)); + + if (int_cnt2 > 0) { // Constant substring + // Repeat search for small substring (< 8 chars) + // from new point without reloading substring. + // Have to check that we don't read beyond string. + cmpl(tmp, 8-int_cnt2); + jccb(Assembler::greater, ADJUST_STR); + // Fall through if matched whole substring. + } else { // non constant + assert(int_cnt2 == -1, "should be != 0"); + + addl(tmp, cnt2); + // Found result if we matched whole substring. + cmpl(tmp, 8); + jccb(Assembler::lessEqual, RET_FOUND); + + // Repeat search for small substring (<= 8 chars) + // from new point 'str1' without reloading substring. + cmpl(cnt2, 8); + // Have to check that we don't read beyond string. + jccb(Assembler::lessEqual, ADJUST_STR); + + Label CHECK_NEXT, CONT_SCAN_SUBSTR, RET_FOUND_LONG; + // Compare the rest of substring (> 8 chars). + movptr(str1, result); + + cmpl(tmp, cnt2); + // First 8 chars are already matched. + jccb(Assembler::equal, CHECK_NEXT); + + bind(SCAN_SUBSTR); + pcmpestri(vec, Address(str1, 0), 0x0d); + // Need to reload strings pointers if not matched whole vector + jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0 + + bind(CHECK_NEXT); + subl(cnt2, 8); + jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring + addptr(str1, 16); + addptr(str2, 16); + subl(cnt1, 8); + cmpl(cnt2, 8); // Do not read beyond substring + jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR); + // Back-up strings to avoid reading beyond substring. + lea(str2, Address(str2, cnt2, Address::times_2, -16)); + lea(str1, Address(str1, cnt2, Address::times_2, -16)); + subl(cnt1, cnt2); + movl(cnt2, 8); + addl(cnt1, 8); + bind(CONT_SCAN_SUBSTR); + movdqu(vec, Address(str2, 0)); + jmpb(SCAN_SUBSTR); + + bind(RET_FOUND_LONG); + movptr(str1, Address(rsp, wordSize)); + } // non constant + + bind(RET_FOUND); + // Compute substr offset + subptr(result, str1); + shrl(result, 1); // index + + bind(CLEANUP); + pop(rsp); // restore SP + +} // string_indexof + +// Compare strings. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + XMMRegister vec1) { + ShortBranchVerifier sbv(this); + Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL; + + // Compute the minimum of the string lengths and the + // difference of the string lengths (stack). + // Do the conditional move stuff + movl(result, cnt1); + subl(cnt1, cnt2); + push(cnt1); + cmov32(Assembler::lessEqual, cnt2, result); + + // Is the minimum length zero? + testl(cnt2, cnt2); + jcc(Assembler::zero, LENGTH_DIFF_LABEL); + + // Load first characters + load_unsigned_short(result, Address(str1, 0)); + load_unsigned_short(cnt1, Address(str2, 0)); + + // Compare first characters + subl(result, cnt1); + jcc(Assembler::notZero, POP_LABEL); + decrementl(cnt2); + jcc(Assembler::zero, LENGTH_DIFF_LABEL); + + { + // Check after comparing first character to see if strings are equivalent + Label LSkip2; + // Check if the strings start at same location + cmpptr(str1, str2); + jccb(Assembler::notEqual, LSkip2); + + // Check if the length difference is zero (from stack) + cmpl(Address(rsp, 0), 0x0); + jcc(Assembler::equal, LENGTH_DIFF_LABEL); + + // Strings might not be equivalent + bind(LSkip2); + } + + Address::ScaleFactor scale = Address::times_2; + int stride = 8; + + // Advance to next element + addptr(str1, 16/stride); + addptr(str2, 16/stride); + + if (UseSSE42Intrinsics) { + Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + int pcmpmask = 0x19; + // Setup to compare 16-byte vectors + movl(result, cnt2); + andl(cnt2, ~(stride - 1)); // cnt2 holds the vector count + jccb(Assembler::zero, COMPARE_TAIL); + + lea(str1, Address(str1, result, scale)); + lea(str2, Address(str2, result, scale)); + negptr(result); + + // pcmpestri + // inputs: + // vec1- substring + // rax - negative string length (elements count) + // mem - scaned string + // rdx - string length (elements count) + // pcmpmask - cmp mode: 11000 (string compare with negated result) + // + 00 (unsigned bytes) or + 01 (unsigned shorts) + // outputs: + // rcx - first mismatched element index + assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri"); + + bind(COMPARE_WIDE_VECTORS); + movdqu(vec1, Address(str1, result, scale)); + pcmpestri(vec1, Address(str2, result, scale), pcmpmask); + // After pcmpestri cnt1(rcx) contains mismatched element index + + jccb(Assembler::below, VECTOR_NOT_EQUAL); // CF==1 + addptr(result, stride); + subptr(cnt2, stride); + jccb(Assembler::notZero, COMPARE_WIDE_VECTORS); + + // compare wide vectors tail + testl(result, result); + jccb(Assembler::zero, LENGTH_DIFF_LABEL); + + movl(cnt2, stride); + movl(result, stride); + negptr(result); + movdqu(vec1, Address(str1, result, scale)); + pcmpestri(vec1, Address(str2, result, scale), pcmpmask); + jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL); + + // Mismatched characters in the vectors + bind(VECTOR_NOT_EQUAL); + addptr(result, cnt1); + movptr(cnt2, result); + load_unsigned_short(result, Address(str1, cnt2, scale)); + load_unsigned_short(cnt1, Address(str2, cnt2, scale)); + subl(result, cnt1); + jmpb(POP_LABEL); + + bind(COMPARE_TAIL); // limit is zero + movl(cnt2, result); + // Fallthru to tail compare + } + + // Shift str2 and str1 to the end of the arrays, negate min + lea(str1, Address(str1, cnt2, scale, 0)); + lea(str2, Address(str2, cnt2, scale, 0)); + negptr(cnt2); + + // Compare the rest of the elements + bind(WHILE_HEAD_LABEL); + load_unsigned_short(result, Address(str1, cnt2, scale, 0)); + load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0)); + subl(result, cnt1); + jccb(Assembler::notZero, POP_LABEL); + increment(cnt2); + jccb(Assembler::notZero, WHILE_HEAD_LABEL); + + // Strings are equal up to min length. Return the length difference. + bind(LENGTH_DIFF_LABEL); + pop(result); + jmpb(DONE_LABEL); + + // Discard the stored length difference + bind(POP_LABEL); + pop(cnt1); + + // That's it + bind(DONE_LABEL); +} + +// Compare char[] arrays aligned to 4 bytes or substrings. +void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, + Register limit, Register result, Register chr, + XMMRegister vec1, XMMRegister vec2) { + ShortBranchVerifier sbv(this); + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // Check the input args + cmpptr(ary1, ary2); + jcc(Assembler::equal, TRUE_LABEL); + + if (is_array_equ) { + // Need additional checks for arrays_equals. + testptr(ary1, ary1); + jcc(Assembler::zero, FALSE_LABEL); + testptr(ary2, ary2); + jcc(Assembler::zero, FALSE_LABEL); + + // Check the lengths + movl(limit, Address(ary1, length_offset)); + cmpl(limit, Address(ary2, length_offset)); + jcc(Assembler::notEqual, FALSE_LABEL); + } + + // count == 0 + testl(limit, limit); + jcc(Assembler::zero, TRUE_LABEL); + + if (is_array_equ) { + // Load array address + lea(ary1, Address(ary1, base_offset)); + lea(ary2, Address(ary2, base_offset)); + } + + shll(limit, 1); // byte count != 0 + movl(result, limit); // copy + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + + // Compare 16-byte vectors + andl(result, 0x0000000e); // tail count (in bytes) + andl(limit, 0xfffffff0); // vector count (in bytes) + jccb(Assembler::zero, COMPARE_TAIL); + + lea(ary1, Address(ary1, limit, Address::times_1)); + lea(ary2, Address(ary2, limit, Address::times_1)); + negptr(limit); + + bind(COMPARE_WIDE_VECTORS); + movdqu(vec1, Address(ary1, limit, Address::times_1)); + movdqu(vec2, Address(ary2, limit, Address::times_1)); + pxor(vec1, vec2); + + ptest(vec1, vec1); + jccb(Assembler::notZero, FALSE_LABEL); + addptr(limit, 16); + jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + + testl(result, result); + jccb(Assembler::zero, TRUE_LABEL); + + movdqu(vec1, Address(ary1, result, Address::times_1, -16)); + movdqu(vec2, Address(ary2, result, Address::times_1, -16)); + pxor(vec1, vec2); + + ptest(vec1, vec1); + jccb(Assembler::notZero, FALSE_LABEL); + jmpb(TRUE_LABEL); + + bind(COMPARE_TAIL); // limit is zero + movl(limit, result); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + andl(limit, 0xfffffffc); // vector count (in bytes) + jccb(Assembler::zero, COMPARE_CHAR); + + lea(ary1, Address(ary1, limit, Address::times_1)); + lea(ary2, Address(ary2, limit, Address::times_1)); + negptr(limit); + + bind(COMPARE_VECTORS); + movl(chr, Address(ary1, limit, Address::times_1)); + cmpl(chr, Address(ary2, limit, Address::times_1)); + jccb(Assembler::notEqual, FALSE_LABEL); + addptr(limit, 4); + jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + bind(COMPARE_CHAR); + testl(result, 0x2); // tail char + jccb(Assembler::zero, TRUE_LABEL); + load_unsigned_short(chr, Address(ary1, 0)); + load_unsigned_short(limit, Address(ary2, 0)); + cmpl(chr, limit); + jccb(Assembler::notEqual, FALSE_LABEL); + + bind(TRUE_LABEL); + movl(result, 1); // return true + jmpb(DONE); + + bind(FALSE_LABEL); + xorl(result, result); // return false + + // That's it + bind(DONE); +} + +void MacroAssembler::generate_fill(BasicType t, bool aligned, + Register to, Register value, Register count, + Register rtmp, XMMRegister xtmp) { + ShortBranchVerifier sbv(this); + assert_different_registers(to, value, count, rtmp); + Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte; + Label L_fill_2_bytes, L_fill_4_bytes; + + int shift = -1; + switch (t) { + case T_BYTE: + shift = 2; + break; + case T_SHORT: + shift = 1; + break; + case T_INT: + shift = 0; + break; + default: ShouldNotReachHere(); + } + + if (t == T_BYTE) { + andl(value, 0xff); + movl(rtmp, value); + shll(rtmp, 8); + orl(value, rtmp); + } + if (t == T_SHORT) { + andl(value, 0xffff); + } + if (t == T_BYTE || t == T_SHORT) { + movl(rtmp, value); + shll(rtmp, 16); + orl(value, rtmp); + } + + cmpl(count, 2<= 2, "supported cpu only" ); + Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; + // Fill 32-byte chunks + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); + + subl(count, 8 << shift); + jcc(Assembler::less, L_check_fill_8_bytes); + align(16); + + BIND(L_fill_32_bytes_loop); + + if (UseUnalignedLoadStores) { + movdqu(Address(to, 0), xtmp); + movdqu(Address(to, 16), xtmp); + } else { + movq(Address(to, 0), xtmp); + movq(Address(to, 8), xtmp); + movq(Address(to, 16), xtmp); + movq(Address(to, 24), xtmp); + } + + addptr(to, 32); + subl(count, 8 << shift); + jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); + BIND(L_check_fill_8_bytes); + addl(count, 8 << shift); + jccb(Assembler::zero, L_exit); + jmpb(L_fill_8_bytes); + + // + // length is too short, just fill qwords + // + BIND(L_fill_8_bytes_loop); + movq(Address(to, 0), xtmp); + addptr(to, 8); + BIND(L_fill_8_bytes); + subl(count, 1 << (shift + 1)); + jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); + } + } + // fill trailing 4 bytes + BIND(L_fill_4_bytes); + testl(count, 1<cmp8(ExternalAddress((address)flag_addr), value); + _masm->jcc(Assembler::equal, _label); +} + +SkipIfEqual::~SkipIfEqual() { + _masm->bind(_label); +} diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp new file mode 100644 index 00000000000..36d4d231155 --- /dev/null +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp @@ -0,0 +1,1172 @@ +/* + * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_X86_VM_MACROASSEMBLER_X86_HPP +#define CPU_X86_VM_MACROASSEMBLER_X86_HPP + +#include "asm/assembler.hpp" + + +// MacroAssembler extends Assembler by frequently used macros. +// +// Instructions for which a 'better' code sequence exists depending +// on arguments should also go in here. + +class MacroAssembler: public Assembler { + friend class LIR_Assembler; + friend class Runtime1; // as_Address() + + protected: + + Address as_Address(AddressLiteral adr); + Address as_Address(ArrayAddress adr); + + // Support for VM calls + // + // This is the base routine called by the different versions of call_VM_leaf. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). +#ifdef CC_INTERP + // c++ interpreter never wants to use interp_masm version of call_VM + #define VIRTUAL +#else + #define VIRTUAL virtual +#endif + + VIRTUAL void call_VM_leaf_base( + address entry_point, // the entry point + int number_of_arguments // the number of arguments to pop after the call + ); + + // This is the base routine called by the different versions of call_VM. The interpreter + // may customize this version by overriding it for its purposes (e.g., to save/restore + // additional registers when doing a VM call). + // + // If no java_thread register is specified (noreg) than rdi will be used instead. call_VM_base + // returns the register which contains the thread upon return. If a thread register has been + // specified, the return value will correspond to that register. If no last_java_sp is specified + // (noreg) than rsp will be used instead. + VIRTUAL void call_VM_base( // returns the register containing the thread upon return + Register oop_result, // where an oop-result ends up if any; use noreg otherwise + Register java_thread, // the thread if computed before ; use noreg otherwise + Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise + address entry_point, // the entry point + int number_of_arguments, // the number of arguments (w/o thread) to pop after the call + bool check_exceptions // whether to check for pending exceptions after return + ); + + // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. + // The implementation is only non-empty for the InterpreterMacroAssembler, + // as only the interpreter handles PopFrame and ForceEarlyReturn requests. + virtual void check_and_handle_popframe(Register java_thread); + virtual void check_and_handle_earlyret(Register java_thread); + + void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); + + // helpers for FPU flag access + // tmp is a temporary register, if none is available use noreg + void save_rax (Register tmp); + void restore_rax(Register tmp); + + public: + MacroAssembler(CodeBuffer* code) : Assembler(code) {} + + // Support for NULL-checks + // + // Generates code that causes a NULL OS exception if the content of reg is NULL. + // If the accessed location is M[reg + offset] and the offset is known, provide the + // offset. No explicit code generation is needed if the offset is within a certain + // range (0 <= offset <= page_size). + + void null_check(Register reg, int offset = -1); + static bool needs_explicit_null_check(intptr_t offset); + + // Required platform-specific helpers for Label::patch_instructions. + // They _shadow_ the declarations in AbstractAssembler, which are undefined. + void pd_patch_instruction(address branch, address target) { + unsigned char op = branch[0]; + assert(op == 0xE8 /* call */ || + op == 0xE9 /* jmp */ || + op == 0xEB /* short jmp */ || + (op & 0xF0) == 0x70 /* short jcc */ || + op == 0x0F && (branch[1] & 0xF0) == 0x80 /* jcc */, + "Invalid opcode at patch point"); + + if (op == 0xEB || (op & 0xF0) == 0x70) { + // short offset operators (jmp and jcc) + char* disp = (char*) &branch[1]; + int imm8 = target - (address) &disp[1]; + guarantee(this->is8bit(imm8), "Short forward jump exceeds 8-bit offset"); + *disp = imm8; + } else { + int* disp = (int*) &branch[(op == 0x0F)? 2: 1]; + int imm32 = target - (address) &disp[1]; + *disp = imm32; + } + } + +#ifndef PRODUCT + static void pd_print_patched_instruction(address branch) { + const char* s; + unsigned char op = branch[0]; + if (op == 0xE8) { + s = "call"; + } else if (op == 0xE9 || op == 0xEB) { + s = "jmp"; + } else if ((op & 0xF0) == 0x70) { + s = "jcc"; + } else if (op == 0x0F) { + s = "jcc"; + } else { + s = "????"; + } + tty->print("%s (unresolved)", s); + } +#endif + + // The following 4 methods return the offset of the appropriate move instruction + + // Support for fast byte/short loading with zero extension (depending on particular CPU) + int load_unsigned_byte(Register dst, Address src); + int load_unsigned_short(Register dst, Address src); + + // Support for fast byte/short loading with sign extension (depending on particular CPU) + int load_signed_byte(Register dst, Address src); + int load_signed_short(Register dst, Address src); + + // Support for sign-extension (hi:lo = extend_sign(lo)) + void extend_sign(Register hi, Register lo); + + // Load and store values by size and signed-ness + void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); + void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); + + // Support for inc/dec with optimal instruction selection depending on value + + void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } + void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } + + void decrementl(Address dst, int value = 1); + void decrementl(Register reg, int value = 1); + + void decrementq(Register reg, int value = 1); + void decrementq(Address dst, int value = 1); + + void incrementl(Address dst, int value = 1); + void incrementl(Register reg, int value = 1); + + void incrementq(Register reg, int value = 1); + void incrementq(Address dst, int value = 1); + + + // Support optimal SSE move instructions. + void movflt(XMMRegister dst, XMMRegister src) { + if (UseXmmRegToRegMoveAll) { movaps(dst, src); return; } + else { movss (dst, src); return; } + } + void movflt(XMMRegister dst, Address src) { movss(dst, src); } + void movflt(XMMRegister dst, AddressLiteral src); + void movflt(Address dst, XMMRegister src) { movss(dst, src); } + + void movdbl(XMMRegister dst, XMMRegister src) { + if (UseXmmRegToRegMoveAll) { movapd(dst, src); return; } + else { movsd (dst, src); return; } + } + + void movdbl(XMMRegister dst, AddressLiteral src); + + void movdbl(XMMRegister dst, Address src) { + if (UseXmmLoadAndClearUpper) { movsd (dst, src); return; } + else { movlpd(dst, src); return; } + } + void movdbl(Address dst, XMMRegister src) { movsd(dst, src); } + + void incrementl(AddressLiteral dst); + void incrementl(ArrayAddress dst); + + // Alignment + void align(int modulus); + + // A 5 byte nop that is safe for patching (see patch_verified_entry) + void fat_nop(); + + // Stack frame creation/removal + void enter(); + void leave(); + + // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) + // The pointer will be loaded into the thread register. + void get_thread(Register thread); + + + // Support for VM calls + // + // It is imperative that all calls into the VM are handled via the call_VM macros. + // They make sure that the stack linkage is setup correctly. call_VM's correspond + // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. + + + void call_VM(Register oop_result, + address entry_point, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + // Overloadings with last_Java_sp + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + int number_of_arguments = 0, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, bool + check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, + bool check_exceptions = true); + void call_VM(Register oop_result, + Register last_java_sp, + address entry_point, + Register arg_1, Register arg_2, Register arg_3, + bool check_exceptions = true); + + void get_vm_result (Register oop_result, Register thread); + void get_vm_result_2(Register metadata_result, Register thread); + + // These always tightly bind to MacroAssembler::call_VM_base + // bypassing the virtual implementation + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); + void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); + + void call_VM_leaf(address entry_point, + int number_of_arguments = 0); + void call_VM_leaf(address entry_point, + Register arg_1); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2); + void call_VM_leaf(address entry_point, + Register arg_1, Register arg_2, Register arg_3); + + // These always tightly bind to MacroAssembler::call_VM_leaf_base + // bypassing the virtual implementation + void super_call_VM_leaf(address entry_point); + void super_call_VM_leaf(address entry_point, Register arg_1); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); + void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); + + // last Java Frame (fills frame anchor) + void set_last_Java_frame(Register thread, + Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + // thread in the default location (r15_thread on 64bit) + void set_last_Java_frame(Register last_java_sp, + Register last_java_fp, + address last_java_pc); + + void reset_last_Java_frame(Register thread, bool clear_fp, bool clear_pc); + + // thread in the default location (r15_thread on 64bit) + void reset_last_Java_frame(bool clear_fp, bool clear_pc); + + // Stores + void store_check(Register obj); // store check for obj - register is destroyed afterwards + void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) + +#ifndef SERIALGC + + void g1_write_barrier_pre(Register obj, + Register pre_val, + Register thread, + Register tmp, + bool tosca_live, + bool expand_call); + + void g1_write_barrier_post(Register store_addr, + Register new_val, + Register thread, + Register tmp, + Register tmp2); + +#endif // SERIALGC + + // split store_check(Register obj) to enhance instruction interleaving + void store_check_part_1(Register obj); + void store_check_part_2(Register obj); + + // C 'boolean' to Java boolean: x == 0 ? 0 : 1 + void c2bool(Register x); + + // C++ bool manipulation + + void movbool(Register dst, Address src); + void movbool(Address dst, bool boolconst); + void movbool(Address dst, Register src); + void testbool(Register dst); + + // oop manipulations + void load_klass(Register dst, Register src); + void store_klass(Register dst, Register src); + + void load_heap_oop(Register dst, Address src); + void load_heap_oop_not_null(Register dst, Address src); + void store_heap_oop(Address dst, Register src); + void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg); + + // Used for storing NULL. All other oop constants should be + // stored using routines that take a jobject. + void store_heap_oop_null(Address dst); + + void load_prototype_header(Register dst, Register src); + +#ifdef _LP64 + void store_klass_gap(Register dst, Register src); + + // This dummy is to prevent a call to store_heap_oop from + // converting a zero (like NULL) into a Register by giving + // the compiler two choices it can't resolve + + void store_heap_oop(Address dst, void* dummy); + + void encode_heap_oop(Register r); + void decode_heap_oop(Register r); + void encode_heap_oop_not_null(Register r); + void decode_heap_oop_not_null(Register r); + void encode_heap_oop_not_null(Register dst, Register src); + void decode_heap_oop_not_null(Register dst, Register src); + + void set_narrow_oop(Register dst, jobject obj); + void set_narrow_oop(Address dst, jobject obj); + void cmp_narrow_oop(Register dst, jobject obj); + void cmp_narrow_oop(Address dst, jobject obj); + + void encode_klass_not_null(Register r); + void decode_klass_not_null(Register r); + void encode_klass_not_null(Register dst, Register src); + void decode_klass_not_null(Register dst, Register src); + void set_narrow_klass(Register dst, Klass* k); + void set_narrow_klass(Address dst, Klass* k); + void cmp_narrow_klass(Register dst, Klass* k); + void cmp_narrow_klass(Address dst, Klass* k); + + // if heap base register is used - reinit it with the correct value + void reinit_heapbase(); + + DEBUG_ONLY(void verify_heapbase(const char* msg);) + +#endif // _LP64 + + // Int division/remainder for Java + // (as idivl, but checks for special case as described in JVM spec.) + // returns idivl instruction offset for implicit exception handling + int corrected_idivl(Register reg); + + // Long division/remainder for Java + // (as idivq, but checks for special case as described in JVM spec.) + // returns idivq instruction offset for implicit exception handling + int corrected_idivq(Register reg); + + void int3(); + + // Long operation macros for a 32bit cpu + // Long negation for Java + void lneg(Register hi, Register lo); + + // Long multiplication for Java + // (destroys contents of eax, ebx, ecx and edx) + void lmul(int x_rsp_offset, int y_rsp_offset); // rdx:rax = x * y + + // Long shifts for Java + // (semantics as described in JVM spec.) + void lshl(Register hi, Register lo); // hi:lo << (rcx & 0x3f) + void lshr(Register hi, Register lo, bool sign_extension = false); // hi:lo >> (rcx & 0x3f) + + // Long compare for Java + // (semantics as described in JVM spec.) + void lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo); // x_hi = lcmp(x, y) + + + // misc + + // Sign extension + void sign_extend_short(Register reg); + void sign_extend_byte(Register reg); + + // Division by power of 2, rounding towards 0 + void division_with_shift(Register reg, int shift_value); + + // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: + // + // CF (corresponds to C0) if x < y + // PF (corresponds to C2) if unordered + // ZF (corresponds to C3) if x = y + // + // The arguments are in reversed order on the stack (i.e., top of stack is first argument). + // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) + void fcmp(Register tmp); + // Variant of the above which allows y to be further down the stack + // and which only pops x and y if specified. If pop_right is + // specified then pop_left must also be specified. + void fcmp(Register tmp, int index, bool pop_left, bool pop_right); + + // Floating-point comparison for Java + // Compares the top-most stack entries on the FPU stack and stores the result in dst. + // The arguments are in reversed order on the stack (i.e., top of stack is first argument). + // (semantics as described in JVM spec.) + void fcmp2int(Register dst, bool unordered_is_less); + // Variant of the above which allows y to be further down the stack + // and which only pops x and y if specified. If pop_right is + // specified then pop_left must also be specified. + void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); + + // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) + // tmp is a temporary register, if none is available use noreg + void fremr(Register tmp); + + + // same as fcmp2int, but using SSE2 + void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); + void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); + + // Inlined sin/cos generator for Java; must not use CPU instruction + // directly on Intel as it does not have high enough precision + // outside of the range [-pi/4, pi/4]. Extra argument indicate the + // number of FPU stack slots in use; all but the topmost will + // require saving if a slow case is necessary. Assumes argument is + // on FP TOS; result is on FP TOS. No cpu registers are changed by + // this code. + void trigfunc(char trig, int num_fpu_regs_in_use = 1); + + // branch to L if FPU flag C2 is set/not set + // tmp is a temporary register, if none is available use noreg + void jC2 (Register tmp, Label& L); + void jnC2(Register tmp, Label& L); + + // Pop ST (ffree & fincstp combined) + void fpop(); + + // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack + void push_fTOS(); + + // pops double TOS element from CPU stack and pushes on FPU stack + void pop_fTOS(); + + void empty_FPU_stack(); + + void push_IU_state(); + void pop_IU_state(); + + void push_FPU_state(); + void pop_FPU_state(); + + void push_CPU_state(); + void pop_CPU_state(); + + // Round up to a power of two + void round_to(Register reg, int modulus); + + // Callee saved registers handling + void push_callee_saved_registers(); + void pop_callee_saved_registers(); + + // allocation + void eden_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + void tlab_allocate( + Register obj, // result: pointer to object after successful allocation + Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise + int con_size_in_bytes, // object size in bytes if known at compile time + Register t1, // temp register + Register t2, // temp register + Label& slow_case // continuation point if fast allocation fails + ); + Register tlab_refill(Label& retry_tlab, Label& try_eden, Label& slow_case); // returns TLS address + void incr_allocated_bytes(Register thread, + Register var_size_in_bytes, int con_size_in_bytes, + Register t1 = noreg); + + // interface method calling + void lookup_interface_method(Register recv_klass, + Register intf_klass, + RegisterOrConstant itable_index, + Register method_result, + Register scan_temp, + Label& no_such_interface); + + // virtual method calling + void lookup_virtual_method(Register recv_klass, + RegisterOrConstant vtable_index, + Register method_result); + + // Test sub_klass against super_klass, with fast and slow paths. + + // The fast path produces a tri-state answer: yes / no / maybe-slow. + // One of the three labels can be NULL, meaning take the fall-through. + // If super_check_offset is -1, the value is loaded up from super_klass. + // No registers are killed, except temp_reg. + void check_klass_subtype_fast_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Label* L_success, + Label* L_failure, + Label* L_slow_path, + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); + + // The rest of the type check; must be wired to a corresponding fast path. + // It does not repeat the fast path logic, so don't use it standalone. + // The temp_reg and temp2_reg can be noreg, if no temps are available. + // Updates the sub's secondary super cache as necessary. + // If set_cond_codes, condition codes will be Z on success, NZ on failure. + void check_klass_subtype_slow_path(Register sub_klass, + Register super_klass, + Register temp_reg, + Register temp2_reg, + Label* L_success, + Label* L_failure, + bool set_cond_codes = false); + + // Simplified, combined version, good for typical uses. + // Falls through on failure. + void check_klass_subtype(Register sub_klass, + Register super_klass, + Register temp_reg, + Label& L_success); + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + //---- + void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 + + // Debugging + + // only if +VerifyOops + // TODO: Make these macros with file and line like sparc version! + void verify_oop(Register reg, const char* s = "broken oop"); + void verify_oop_addr(Address addr, const char * s = "broken oop addr"); + + // TODO: verify method and klass metadata (compare against vptr?) + void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} + void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} + +#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) +#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) + + // only if +VerifyFPU + void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); + + // prints msg, dumps registers and stops execution + void stop(const char* msg); + + // prints msg and continues + void warn(const char* msg); + + // dumps registers and other state + void print_state(); + + static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip); + static void print_state64(int64_t pc, int64_t regs[]); + + void os_breakpoint(); + + void untested() { stop("untested"); } + + void unimplemented(const char* what = "") { char* b = new char[1024]; jio_snprintf(b, 1024, "unimplemented: %s", what); stop(b); } + + void should_not_reach_here() { stop("should not reach here"); } + + void print_CPU_state(); + + // Stack overflow checking + void bang_stack_with_offset(int offset) { + // stack grows down, caller passes positive offset + assert(offset > 0, "must bang with negative offset"); + movl(Address(rsp, (-offset)), rax); + } + + // Writes to stack successive pages until offset reached to check for + // stack overflow + shadow pages. Also, clobbers tmp + void bang_stack_size(Register size, Register tmp); + + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); + + // Support for serializing memory accesses between threads + void serialize_memory(Register thread, Register tmp); + + void verify_tlab(); + + // Biased locking support + // lock_reg and obj_reg must be loaded up with the appropriate values. + // swap_reg must be rax, and is killed. + // tmp_reg is optional. If it is supplied (i.e., != noreg) it will + // be killed; if not supplied, push/pop will be used internally to + // allocate a temporary (inefficient, avoid if possible). + // Optional slow case is for implementations (interpreter and C1) which branch to + // slow case directly. Leaves condition codes set for C2's Fast_Lock node. + // Returns offset of first potentially-faulting instruction for null + // check info (currently consumed only by C1). If + // swap_reg_contains_mark is true then returns -1 as it is assumed + // the calling code has already passed any potential faults. + int biased_locking_enter(Register lock_reg, Register obj_reg, + Register swap_reg, Register tmp_reg, + bool swap_reg_contains_mark, + Label& done, Label* slow_case = NULL, + BiasedLockingCounters* counters = NULL); + void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); + + + Condition negate_condition(Condition cond); + + // Instructions that use AddressLiteral operands. These instruction can handle 32bit/64bit + // operands. In general the names are modified to avoid hiding the instruction in Assembler + // so that we don't need to implement all the varieties in the Assembler with trivial wrappers + // here in MacroAssembler. The major exception to this rule is call + + // Arithmetics + + + void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } + void addptr(Address dst, Register src); + + void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } + void addptr(Register dst, int32_t src); + void addptr(Register dst, Register src); + void addptr(Register dst, RegisterOrConstant src) { + if (src.is_constant()) addptr(dst, (int) src.as_constant()); + else addptr(dst, src.as_register()); + } + + void andptr(Register dst, int32_t src); + void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } + + void cmp8(AddressLiteral src1, int imm); + + // renamed to drag out the casting of address to int32_t/intptr_t + void cmp32(Register src1, int32_t imm); + + void cmp32(AddressLiteral src1, int32_t imm); + // compare reg - mem, or reg - &mem + void cmp32(Register src1, AddressLiteral src2); + + void cmp32(Register src1, Address src2); + +#ifndef _LP64 + void cmpklass(Address dst, Metadata* obj); + void cmpklass(Register dst, Metadata* obj); + void cmpoop(Address dst, jobject obj); + void cmpoop(Register dst, jobject obj); +#endif // _LP64 + + // NOTE src2 must be the lval. This is NOT an mem-mem compare + void cmpptr(Address src1, AddressLiteral src2); + + void cmpptr(Register src1, AddressLiteral src2); + + void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + + void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + + // cmp64 to avoild hiding cmpq + void cmp64(Register src1, AddressLiteral src); + + void cmpxchgptr(Register reg, Address adr); + + void locked_cmpxchgptr(Register reg, AddressLiteral adr); + + + void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } + + + void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } + + void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } + + void shlptr(Register dst, int32_t shift); + void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } + + void shrptr(Register dst, int32_t shift); + void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } + + void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } + void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } + + void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } + + void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } + void subptr(Register dst, int32_t src); + // Force generation of a 4 byte immediate value even if it fits into 8bit + void subptr_imm32(Register dst, int32_t src); + void subptr(Register dst, Register src); + void subptr(Register dst, RegisterOrConstant src) { + if (src.is_constant()) subptr(dst, (int) src.as_constant()); + else subptr(dst, src.as_register()); + } + + void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } + void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } + + void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } + void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } + + void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } + + + + // Helper functions for statistics gathering. + // Conditionally (atomically, on MPs) increments passed counter address, preserving condition codes. + void cond_inc32(Condition cond, AddressLiteral counter_addr); + // Unconditional atomic increment. + void atomic_incl(AddressLiteral counter_addr); + + void lea(Register dst, AddressLiteral adr); + void lea(Address dst, AddressLiteral adr); + void lea(Register dst, Address adr) { Assembler::lea(dst, adr); } + + void leal32(Register dst, Address src) { leal(dst, src); } + + // Import other testl() methods from the parent class or else + // they will be hidden by the following overriding declaration. + using Assembler::testl; + void testl(Register dst, AddressLiteral src); + + void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } + void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } + void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } + + void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } + void testptr(Register src1, Register src2); + + void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } + void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } + + // Calls + + void call(Label& L, relocInfo::relocType rtype); + void call(Register entry); + + // NOTE: this call tranfers to the effective address of entry NOT + // the address contained by entry. This is because this is more natural + // for jumps/calls. + void call(AddressLiteral entry); + + // Emit the CompiledIC call idiom + void ic_call(address entry); + + // Jumps + + // NOTE: these jumps tranfer to the effective address of dst NOT + // the address contained by dst. This is because this is more natural + // for jumps/calls. + void jump(AddressLiteral dst); + void jump_cc(Condition cc, AddressLiteral dst); + + // 32bit can do a case table jump in one instruction but we no longer allow the base + // to be installed in the Address class. This jump will tranfers to the address + // contained in the location described by entry (not the address of entry) + void jump(ArrayAddress entry); + + // Floating + + void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); } + void andpd(XMMRegister dst, AddressLiteral src); + + void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); } + void andps(XMMRegister dst, AddressLiteral src); + + void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); } + void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); } + void comiss(XMMRegister dst, AddressLiteral src); + + void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); } + void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } + void comisd(XMMRegister dst, AddressLiteral src); + + void fadd_s(Address src) { Assembler::fadd_s(src); } + void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } + + void fldcw(Address src) { Assembler::fldcw(src); } + void fldcw(AddressLiteral src); + + void fld_s(int index) { Assembler::fld_s(index); } + void fld_s(Address src) { Assembler::fld_s(src); } + void fld_s(AddressLiteral src); + + void fld_d(Address src) { Assembler::fld_d(src); } + void fld_d(AddressLiteral src); + + void fld_x(Address src) { Assembler::fld_x(src); } + void fld_x(AddressLiteral src); + + void fmul_s(Address src) { Assembler::fmul_s(src); } + void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } + + void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } + void ldmxcsr(AddressLiteral src); + + // compute pow(x,y) and exp(x) with x86 instructions. Don't cover + // all corner cases and may result in NaN and require fallback to a + // runtime call. + void fast_pow(); + void fast_exp(); + void increase_precision(); + void restore_precision(); + + // computes exp(x). Fallback to runtime call included. + void exp_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(true, num_fpu_regs_in_use); } + // computes pow(x,y). Fallback to runtime call included. + void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); } + +private: + + // call runtime as a fallback for trig functions and pow/exp. + void fp_runtime_fallback(address runtime_entry, int nb_args, int num_fpu_regs_in_use); + + // computes 2^(Ylog2X); Ylog2X in ST(0) + void pow_exp_core_encoding(); + + // computes pow(x,y) or exp(x). Fallback to runtime call included. + void pow_or_exp(bool is_exp, int num_fpu_regs_in_use); + + // these are private because users should be doing movflt/movdbl + + void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); } + void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); } + void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); } + void movss(XMMRegister dst, AddressLiteral src); + + void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); } + void movlpd(XMMRegister dst, AddressLiteral src); + +public: + + void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); } + void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); } + void addsd(XMMRegister dst, AddressLiteral src); + + void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); } + void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); } + void addss(XMMRegister dst, AddressLiteral src); + + void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); } + void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); } + void divsd(XMMRegister dst, AddressLiteral src); + + void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); } + void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); } + void divss(XMMRegister dst, AddressLiteral src); + + // Move Unaligned Double Quadword + void movdqu(Address dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, Address src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, XMMRegister src) { Assembler::movdqu(dst, src); } + void movdqu(XMMRegister dst, AddressLiteral src); + + void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); } + void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); } + void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); } + void movsd(XMMRegister dst, AddressLiteral src); + + void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); } + void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); } + void mulsd(XMMRegister dst, AddressLiteral src); + + void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); } + void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); } + void mulss(XMMRegister dst, AddressLiteral src); + + void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); } + void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); } + void sqrtsd(XMMRegister dst, AddressLiteral src); + + void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); } + void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); } + void sqrtss(XMMRegister dst, AddressLiteral src); + + void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); } + void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); } + void subsd(XMMRegister dst, AddressLiteral src); + + void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); } + void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); } + void subss(XMMRegister dst, AddressLiteral src); + + void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); } + void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); } + void ucomiss(XMMRegister dst, AddressLiteral src); + + void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); } + void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); } + void ucomisd(XMMRegister dst, AddressLiteral src); + + // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values + void xorpd(XMMRegister dst, XMMRegister src) { Assembler::xorpd(dst, src); } + void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); } + void xorpd(XMMRegister dst, AddressLiteral src); + + // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values + void xorps(XMMRegister dst, XMMRegister src) { Assembler::xorps(dst, src); } + void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); } + void xorps(XMMRegister dst, AddressLiteral src); + + // Shuffle Bytes + void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, Address src) { Assembler::pshufb(dst, src); } + void pshufb(XMMRegister dst, AddressLiteral src); + // AVX 3-operands instructions + + void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddsd(dst, nds, src); } + void vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vaddss(dst, nds, src); } + void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } + void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); } + void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); + + void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } + void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); } + void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); + + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } + void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivss(dst, nds, src); } + void vdivss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulsd(dst, nds, src); } + void vmulsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vmulss(dst, nds, src); } + void vmulss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubsd(dst, nds, src); } + void vsubsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vsubss(dst, nds, src); } + void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src); + + // AVX Vector instructions + + void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } + void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); } + void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); + + void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } + void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); } + void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256); + + void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { + if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 + Assembler::vpxor(dst, nds, src, vector256); + else + Assembler::vxorpd(dst, nds, src, vector256); + } + void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { + if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2 + Assembler::vpxor(dst, nds, src, vector256); + else + Assembler::vxorpd(dst, nds, src, vector256); + } + + // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector. + void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) { + if (UseAVX > 1) // vinserti128h is available only in AVX2 + Assembler::vinserti128h(dst, nds, src); + else + Assembler::vinsertf128h(dst, nds, src); + } + + // Data + + void cmov32( Condition cc, Register dst, Address src); + void cmov32( Condition cc, Register dst, Register src); + + void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } + + void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } + void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } + + void movoop(Register dst, jobject obj); + void movoop(Address dst, jobject obj); + + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + + void movptr(ArrayAddress dst, Register src); + // can this do an lea? + void movptr(Register dst, ArrayAddress src); + + void movptr(Register dst, Address src); + + void movptr(Register dst, AddressLiteral src); + + void movptr(Register dst, intptr_t src); + void movptr(Register dst, Register src); + void movptr(Address dst, intptr_t src); + + void movptr(Address dst, Register src); + + void movptr(Register dst, RegisterOrConstant src) { + if (src.is_constant()) movptr(dst, src.as_constant()); + else movptr(dst, src.as_register()); + } + +#ifdef _LP64 + // Generally the next two are only used for moving NULL + // Although there are situations in initializing the mark word where + // they could be used. They are dangerous. + + // They only exist on LP64 so that int32_t and intptr_t are not the same + // and we have ambiguous declarations. + + void movptr(Address dst, int32_t imm32); + void movptr(Register dst, int32_t imm32); +#endif // _LP64 + + // to avoid hiding movl + void mov32(AddressLiteral dst, Register src); + void mov32(Register dst, AddressLiteral src); + + // to avoid hiding movb + void movbyte(ArrayAddress dst, int src); + + // Import other mov() methods from the parent class or else + // they will be hidden by the following overriding declaration. + using Assembler::movdl; + using Assembler::movq; + void movdl(XMMRegister dst, AddressLiteral src); + void movq(XMMRegister dst, AddressLiteral src); + + // Can push value or effective address + void pushptr(AddressLiteral src); + + void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } + void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } + + void pushoop(jobject obj); + void pushklass(Metadata* obj); + + // sign extend as need a l to ptr sized element + void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } + void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } + + // C2 compiled method's prolog code. + void verified_entry(int framesize, bool stack_bang, bool fp_mode_24b); + + // IndexOf strings. + // Small strings are loaded through stack if they cross page boundary. + void string_indexof(Register str1, Register str2, + Register cnt1, Register cnt2, + int int_cnt2, Register result, + XMMRegister vec, Register tmp); + + // IndexOf for constant substrings with size >= 8 elements + // which don't need to be loaded through stack. + void string_indexofC8(Register str1, Register str2, + Register cnt1, Register cnt2, + int int_cnt2, Register result, + XMMRegister vec, Register tmp); + + // Smallest code: we don't need to load through stack, + // check string tail. + + // Compare strings. + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + XMMRegister vec1); + + // Compare char[] arrays. + void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2, + Register limit, Register result, Register chr, + XMMRegister vec1, XMMRegister vec2); + + // Fill primitive arrays + void generate_fill(BasicType t, bool aligned, + Register to, Register value, Register count, + Register rtmp, XMMRegister xtmp); + +#undef VIRTUAL + +}; + +/** + * class SkipIfEqual: + * + * Instantiating this class will result in assembly code being output that will + * jump around any code emitted between the creation of the instance and it's + * automatic destruction at the end of a scope block, depending on the value of + * the flag passed to the constructor, which will be checked at run-time. + */ +class SkipIfEqual { + private: + MacroAssembler* _masm; + Label _label; + + public: + SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); + ~SkipIfEqual(); +}; + +#endif // CPU_X86_VM_MACROASSEMBLER_X86_HPP diff --git a/hotspot/src/cpu/x86/vm/metaspaceShared_x86_32.cpp b/hotspot/src/cpu/x86/vm/metaspaceShared_x86_32.cpp index a43fafdd31b..c2956a52a54 100644 --- a/hotspot/src/cpu/x86/vm/metaspaceShared_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/metaspaceShared_x86_32.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" #include "memory/metaspaceShared.hpp" // Generate the self-patching vtable method: diff --git a/hotspot/src/cpu/x86/vm/metaspaceShared_x86_64.cpp b/hotspot/src/cpu/x86/vm/metaspaceShared_x86_64.cpp index 2ef2abf6a75..4ff6cc955d7 100644 --- a/hotspot/src/cpu/x86/vm/metaspaceShared_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/metaspaceShared_x86_64.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" #include "memory/metaspaceShared.hpp" // Generate the self-patching vtable method: diff --git a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp index 88ec6b71965..7da3a2c423a 100644 --- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "memory/allocation.inline.hpp" diff --git a/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp b/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp index 1cf509992ca..dccd7e0b7cd 100644 --- a/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp +++ b/hotspot/src/cpu/x86/vm/nativeInst_x86.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "memory/resourceArea.hpp" #include "nativeInst_x86.hpp" #include "oops/oop.inline.hpp" diff --git a/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp b/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp index 1023695e853..d4a929613ab 100644 --- a/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp +++ b/hotspot/src/cpu/x86/vm/relocInfo_x86.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.inline.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "code/relocInfo.hpp" #include "nativeInst_x86.hpp" #include "oops/oop.inline.hpp" diff --git a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp index c932f9fa2d1..1cc10d76622 100644 --- a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp @@ -24,12 +24,11 @@ #include "precompiled.hpp" #ifdef COMPILER2 -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "classfile/systemDictionary.hpp" #include "code/vmreg.hpp" #include "interpreter/interpreter.hpp" -#include "nativeInst_x86.hpp" #include "opto/runtime.hpp" #include "runtime/interfaceSupport.hpp" #include "runtime/sharedRuntime.hpp" diff --git a/hotspot/src/cpu/x86/vm/runtime_x86_64.cpp b/hotspot/src/cpu/x86/vm/runtime_x86_64.cpp index 8f53518bb4b..0c39aea845f 100644 --- a/hotspot/src/cpu/x86/vm/runtime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/runtime_x86_64.cpp @@ -24,12 +24,11 @@ #include "precompiled.hpp" #ifdef COMPILER2 -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "classfile/systemDictionary.hpp" #include "code/vmreg.hpp" #include "interpreter/interpreter.hpp" -#include "nativeInst_x86.hpp" #include "opto/runtime.hpp" #include "runtime/interfaceSupport.hpp" #include "runtime/sharedRuntime.hpp" diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp index 7c3116a489c..dc705421ca9 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_32.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index 271a045fb6d..50255eeef0e 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp index bcb408b5bc8..52e3f4169af 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/interpreter.hpp" #include "nativeInst_x86.hpp" #include "oops/instanceOop.hpp" diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index d205f963926..48f4af8dc17 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "interpreter/interpreter.hpp" #include "nativeInst_x86.hpp" #include "oops/instanceOop.hpp" diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp index 4aff1d9663f..19a2a45c9f7 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp index 75318ab42c8..e2b46fc78bc 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp index 24d138e4917..e2a20531f82 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_32.cpp @@ -23,7 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "interpreter/templateTable.hpp" diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp index bb9b86f2473..eedab0b4b22 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86_64.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "asm/macroAssembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "interpreter/templateTable.hpp" diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index 21bb57335ff..f48e66012a8 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -23,7 +23,8 @@ */ #include "precompiled.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "memory/resourceArea.hpp" #include "runtime/java.hpp" #include "runtime/stubCodeGenerator.hpp" diff --git a/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp b/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp index edf1ab1bfff..c470004ebc8 100644 --- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_32.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_x86_32.hpp" #include "memory/resourceArea.hpp" diff --git a/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp b/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp index 867ebfa152f..0dc056cdbaf 100644 --- a/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/vtableStubs_x86_64.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_x86_64.hpp" #include "memory/resourceArea.hpp" diff --git a/hotspot/src/cpu/zero/vm/assembler_zero.cpp b/hotspot/src/cpu/zero/vm/assembler_zero.cpp index a3e36276415..e37938c1fa7 100644 --- a/hotspot/src/cpu/zero/vm/assembler_zero.cpp +++ b/hotspot/src/cpu/zero/vm/assembler_zero.cpp @@ -46,6 +46,12 @@ int AbstractAssembler::code_fill_byte() { return 0; } +#ifdef ASSERT +bool AbstractAssembler::pd_check_instruction_mark() { + ShouldNotCallThis(); +} +#endif + void Assembler::pd_patch_instruction(address branch, address target) { ShouldNotCallThis(); } @@ -80,6 +86,11 @@ void MacroAssembler::store_oop(jobject obj) { emit_address((address) obj); } +void MacroAssembler::store_Metadata(Metadata* md) { + code_section()->relocate(pc(), metadata_Relocation::spec_for_immediate()); + emit_address((address) md); +} + static void should_not_call() { report_should_not_call(__FILE__, __LINE__); } diff --git a/hotspot/src/cpu/zero/vm/assembler_zero.hpp b/hotspot/src/cpu/zero/vm/assembler_zero.hpp index 2cc25a73aa0..724153982ac 100644 --- a/hotspot/src/cpu/zero/vm/assembler_zero.hpp +++ b/hotspot/src/cpu/zero/vm/assembler_zero.hpp @@ -55,14 +55,9 @@ class MacroAssembler : public Assembler { public: void advance(int bytes); void store_oop(jobject obj); + void store_Metadata(Metadata* obj); }; -#ifdef ASSERT -inline bool AbstractAssembler::pd_check_instruction_mark() { - ShouldNotCallThis(); -} -#endif - address ShouldNotCallThisStub(); address ShouldNotCallThisEntry(); diff --git a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp index ee855a9efbc..c6f82cd74a8 100644 --- a/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp +++ b/hotspot/src/cpu/zero/vm/cppInterpreter_zero.cpp @@ -1015,11 +1015,7 @@ int AbstractInterpreter::size_top_interpreter_activation(Method* method) { // Helper for figuring out if frames are interpreter frames bool CppInterpreter::contains(address pc) { -#ifdef PRODUCT - ShouldNotCallThis(); -#else return false; // make frame::print_value_on work -#endif // !PRODUCT } // Result handlers and convertors diff --git a/hotspot/src/cpu/zero/vm/globals_zero.hpp b/hotspot/src/cpu/zero/vm/globals_zero.hpp index 7e0ae86b3be..a3c7d244173 100644 --- a/hotspot/src/cpu/zero/vm/globals_zero.hpp +++ b/hotspot/src/cpu/zero/vm/globals_zero.hpp @@ -52,11 +52,7 @@ define_pd_global(intx, StackShadowPages, 5 LP64_ONLY(+1) DEBUG_ONLY(+3)); define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); -#ifdef _ALLBSD_SOURCE define_pd_global(bool, UseMembar, true); -#else -define_pd_global(bool, UseMembar, false); -#endif // GC Ergo Flags define_pd_global(intx, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread diff --git a/hotspot/src/os/bsd/vm/osThread_bsd.cpp b/hotspot/src/os/bsd/vm/osThread_bsd.cpp index 98a3f2f6340..92c6d0c4dac 100644 --- a/hotspot/src/os/bsd/vm/osThread_bsd.cpp +++ b/hotspot/src/os/bsd/vm/osThread_bsd.cpp @@ -23,29 +23,10 @@ */ // no precompiled headers -#include "runtime/atomic.hpp" -#include "runtime/handles.inline.hpp" #include "runtime/mutexLocker.hpp" -#include "runtime/os.hpp" #include "runtime/osThread.hpp" -#include "runtime/safepoint.hpp" -#include "runtime/vmThread.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif +#include void OSThread::pd_initialize() { assert(this != NULL, "check"); diff --git a/hotspot/src/os/bsd/vm/os_bsd.cpp b/hotspot/src/os/bsd/vm/os_bsd.cpp index b77f2f0d770..81b4c9d0caf 100644 --- a/hotspot/src/os/bsd/vm/os_bsd.cpp +++ b/hotspot/src/os/bsd/vm/os_bsd.cpp @@ -29,6 +29,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_bsd.h" #include "memory/allocation.inline.hpp" @@ -62,26 +63,6 @@ #include "utilities/events.hpp" #include "utilities/growableArray.hpp" #include "utilities/vmError.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -# include "nativeInst_x86.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -# include "nativeInst_sparc.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -# include "nativeInst_zero.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -# include "nativeInst_arm.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -# include "nativeInst_ppc.hpp" -#endif // put OS-includes here # include diff --git a/hotspot/src/os/bsd/vm/os_bsd.inline.hpp b/hotspot/src/os/bsd/vm/os_bsd.inline.hpp index 651059fbe8a..6a7cdc08b5f 100644 --- a/hotspot/src/os/bsd/vm/os_bsd.inline.hpp +++ b/hotspot/src/os/bsd/vm/os_bsd.inline.hpp @@ -26,13 +26,13 @@ #define OS_BSD_VM_OS_BSD_INLINE_HPP #include "runtime/atomic.hpp" +#include "runtime/atomic.inline.hpp" #include "runtime/os.hpp" + #ifdef TARGET_OS_ARCH_bsd_x86 -# include "atomic_bsd_x86.inline.hpp" # include "orderAccess_bsd_x86.inline.hpp" #endif #ifdef TARGET_OS_ARCH_bsd_zero -# include "atomic_bsd_zero.inline.hpp" # include "orderAccess_bsd_zero.inline.hpp" #endif diff --git a/hotspot/src/os/linux/vm/osThread_linux.cpp b/hotspot/src/os/linux/vm/osThread_linux.cpp index 8ff88a9e34f..8d27b5651bc 100644 --- a/hotspot/src/os/linux/vm/osThread_linux.cpp +++ b/hotspot/src/os/linux/vm/osThread_linux.cpp @@ -23,29 +23,10 @@ */ // no precompiled headers -#include "runtime/atomic.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/mutexLocker.hpp" -#include "runtime/os.hpp" +#include "runtime/mutex.hpp" #include "runtime/osThread.hpp" -#include "runtime/safepoint.hpp" -#include "runtime/vmThread.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif +#include void OSThread::pd_initialize() { assert(this != NULL, "check"); diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp index 09d821423da..45dea5e95a8 100644 --- a/hotspot/src/os/linux/vm/os_linux.cpp +++ b/hotspot/src/os/linux/vm/os_linux.cpp @@ -29,6 +29,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_linux.h" #include "memory/allocation.inline.hpp" @@ -62,26 +63,6 @@ #include "utilities/events.hpp" #include "utilities/growableArray.hpp" #include "utilities/vmError.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -# include "nativeInst_x86.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -# include "nativeInst_sparc.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -# include "nativeInst_zero.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -# include "nativeInst_arm.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -# include "nativeInst_ppc.hpp" -#endif // put OS-includes here # include diff --git a/hotspot/src/os/linux/vm/os_linux.inline.hpp b/hotspot/src/os/linux/vm/os_linux.inline.hpp index ba56240dc8e..c779f8a6aa5 100644 --- a/hotspot/src/os/linux/vm/os_linux.inline.hpp +++ b/hotspot/src/os/linux/vm/os_linux.inline.hpp @@ -26,25 +26,22 @@ #define OS_LINUX_VM_OS_LINUX_INLINE_HPP #include "runtime/atomic.hpp" +#include "runtime/atomic.inline.hpp" #include "runtime/os.hpp" + #ifdef TARGET_OS_ARCH_linux_x86 -# include "atomic_linux_x86.inline.hpp" # include "orderAccess_linux_x86.inline.hpp" #endif #ifdef TARGET_OS_ARCH_linux_sparc -# include "atomic_linux_sparc.inline.hpp" # include "orderAccess_linux_sparc.inline.hpp" #endif #ifdef TARGET_OS_ARCH_linux_zero -# include "atomic_linux_zero.inline.hpp" # include "orderAccess_linux_zero.inline.hpp" #endif #ifdef TARGET_OS_ARCH_linux_arm -# include "atomic_linux_arm.inline.hpp" # include "orderAccess_linux_arm.inline.hpp" #endif #ifdef TARGET_OS_ARCH_linux_ppc -# include "atomic_linux_ppc.inline.hpp" # include "orderAccess_linux_ppc.inline.hpp" #endif diff --git a/hotspot/src/os/solaris/vm/osThread_solaris.cpp b/hotspot/src/os/solaris/vm/osThread_solaris.cpp index 937ae5cbbcf..6310471f5e0 100644 --- a/hotspot/src/os/solaris/vm/osThread_solaris.cpp +++ b/hotspot/src/os/solaris/vm/osThread_solaris.cpp @@ -30,14 +30,8 @@ #include "runtime/osThread.hpp" #include "runtime/safepoint.hpp" #include "runtime/vmThread.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -# include +#include // *************************************************************** // Platform dependent initialization and cleanup diff --git a/hotspot/src/os/solaris/vm/os_solaris.cpp b/hotspot/src/os/solaris/vm/os_solaris.cpp index d25c2714b46..05b2ec0a58a 100644 --- a/hotspot/src/os/solaris/vm/os_solaris.cpp +++ b/hotspot/src/os/solaris/vm/os_solaris.cpp @@ -29,6 +29,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_solaris.h" #include "memory/allocation.inline.hpp" @@ -63,14 +64,6 @@ #include "utilities/events.hpp" #include "utilities/growableArray.hpp" #include "utilities/vmError.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -# include "nativeInst_x86.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -# include "nativeInst_sparc.hpp" -#endif // put OS-includes here # include diff --git a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp index 3454f6d7ba4..a3f09d01d32 100644 --- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp +++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp @@ -26,13 +26,13 @@ #define OS_SOLARIS_VM_OS_SOLARIS_INLINE_HPP #include "runtime/atomic.hpp" +#include "runtime/atomic.inline.hpp" #include "runtime/os.hpp" + #ifdef TARGET_OS_ARCH_solaris_x86 -# include "atomic_solaris_x86.inline.hpp" # include "orderAccess_solaris_x86.inline.hpp" #endif #ifdef TARGET_OS_ARCH_solaris_sparc -# include "atomic_solaris_sparc.inline.hpp" # include "orderAccess_solaris_sparc.inline.hpp" #endif diff --git a/hotspot/src/os/windows/vm/osThread_windows.cpp b/hotspot/src/os/windows/vm/osThread_windows.cpp index 005643aa67a..d65d0d6f557 100644 --- a/hotspot/src/os/windows/vm/osThread_windows.cpp +++ b/hotspot/src/os/windows/vm/osThread_windows.cpp @@ -30,9 +30,6 @@ #include "runtime/osThread.hpp" #include "runtime/safepoint.hpp" #include "runtime/vmThread.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif void OSThread::pd_initialize() { set_thread_handle(NULL); diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp index 8e1685ae9cb..ddd3a79bfe2 100644 --- a/hotspot/src/os/windows/vm/os_windows.cpp +++ b/hotspot/src/os/windows/vm/os_windows.cpp @@ -32,6 +32,7 @@ #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" #include "compiler/compileBroker.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "jvm_windows.h" #include "memory/allocation.inline.hpp" @@ -65,10 +66,6 @@ #include "utilities/events.hpp" #include "utilities/growableArray.hpp" #include "utilities/vmError.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -# include "nativeInst_x86.hpp" -#endif #ifdef _DEBUG #include diff --git a/hotspot/src/os/windows/vm/os_windows.inline.hpp b/hotspot/src/os/windows/vm/os_windows.inline.hpp index cee281fada7..8f8c3c72fd2 100644 --- a/hotspot/src/os/windows/vm/os_windows.inline.hpp +++ b/hotspot/src/os/windows/vm/os_windows.inline.hpp @@ -26,9 +26,10 @@ #define OS_WINDOWS_VM_OS_WINDOWS_INLINE_HPP #include "runtime/atomic.hpp" +#include "runtime/atomic.inline.hpp" #include "runtime/os.hpp" + #ifdef TARGET_OS_ARCH_windows_x86 -# include "atomic_windows_x86.inline.hpp" # include "orderAccess_windows_x86.inline.hpp" #endif diff --git a/hotspot/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp b/hotspot/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp index 5cd94107507..18bed6b0a6c 100644 --- a/hotspot/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp +++ b/hotspot/src/os_cpu/bsd_x86/vm/assembler_bsd_x86.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp index 4b67b60c7e1..fc55e4ec71e 100644 --- a/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp +++ b/hotspot/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -33,7 +33,6 @@ #include "jvm_bsd.h" #include "memory/allocation.inline.hpp" #include "mutex_bsd.inline.hpp" -#include "nativeInst_x86.hpp" #include "os_share_bsd.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm.h" diff --git a/hotspot/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp b/hotspot/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp index 20a5c5886dc..e13dc36700e 100644 --- a/hotspot/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp +++ b/hotspot/src/os_cpu/linux_sparc/vm/assembler_linux_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp index 97aeaa334fc..81c013301d2 100644 --- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp +++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" diff --git a/hotspot/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp b/hotspot/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp index 12275f55d07..5c0c07a0c91 100644 --- a/hotspot/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp +++ b/hotspot/src/os_cpu/linux_x86/vm/assembler_linux_x86.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp index eabe912053d..907395bb22a 100644 --- a/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp +++ b/hotspot/src/os_cpu/linux_x86/vm/os_linux_x86.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -33,7 +33,6 @@ #include "jvm_linux.h" #include "memory/allocation.inline.hpp" #include "mutex_linux.inline.hpp" -#include "nativeInst_x86.hpp" #include "os_share_linux.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm.h" diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp b/hotspot/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp index 5e9eaa12a31..c9e4c2cc0d0 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/assembler_solaris_sparc.cpp @@ -23,8 +23,7 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp index dc75ff06f33..f0db469941a 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_sparc.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" diff --git a/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp b/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp index d1cd12f2803..ed25848bca8 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/assembler_solaris_x86.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp index 223e69e5cde..5b5445da512 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp @@ -27,6 +27,7 @@ #include "runtime/atomic.hpp" #include "runtime/orderAccess.hpp" +#include "runtime/os.hpp" #include "vm_version_x86.hpp" // Implementation of class OrderAccess. diff --git a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp index e51bae1796f..d954063c296 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" @@ -33,7 +33,6 @@ #include "jvm_solaris.h" #include "memory/allocation.inline.hpp" #include "mutex_solaris.inline.hpp" -#include "nativeInst_x86.hpp" #include "os_share_solaris.hpp" #include "prims/jniFastGetField.hpp" #include "prims/jvm.h" diff --git a/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp b/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp index 55e5053387d..ce629e20762 100644 --- a/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp +++ b/hotspot/src/os_cpu/windows_x86/vm/assembler_windows_x86.cpp @@ -23,8 +23,8 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "runtime/os.hpp" #include "runtime/threadLocalStorage.hpp" diff --git a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp index 4caf927cf7c..e8bd38dc13e 100644 --- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp +++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp @@ -23,7 +23,7 @@ */ // no precompiled headers -#include "assembler_x86.inline.hpp" +#include "asm/macroAssembler.hpp" #include "classfile/classLoader.hpp" #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" diff --git a/hotspot/src/share/tools/LogCompilation/README b/hotspot/src/share/tools/LogCompilation/README index ed3cbbc8121..90dc3b893bb 100644 --- a/hotspot/src/share/tools/LogCompilation/README +++ b/hotspot/src/share/tools/LogCompilation/README @@ -13,6 +13,6 @@ Adding the -i option with also report inlining like PrintInlining. More information about the LogCompilation output can be found at -http://wikis.sun.com/display/HotSpotInternals/LogCompilation+overview -http://wikis.sun.com/display/HotSpotInternals/PrintCompilation -http://wikis.sun.com/display/HotSpotInternals/LogCompilation+tool +https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+overview +https://wikis.oracle.com/display/HotSpotInternals/PrintCompilation +https://wikis.oracle.com/display/HotSpotInternals/LogCompilation+tool diff --git a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java index 354b32a99d7..4870dffec2e 100644 --- a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java +++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/CallSite.java @@ -38,6 +38,7 @@ public class CallSite { private String reason; private List calls; private int endNodes; + private int endLiveNodes; private double timeStamp; CallSite() { @@ -106,7 +107,7 @@ public class CallSite { } } if (getEndNodes() > 0) { - stream.printf(" (end time: %6.4f nodes: %d)", getTimeStamp(), getEndNodes()); + stream.printf(" (end time: %6.4f nodes: %d live: %d)", getTimeStamp(), getEndNodes(), getEndLiveNodes()); } stream.println(""); if (getReceiver() != null) { @@ -195,6 +196,14 @@ public class CallSite { return endNodes; } + void setEndLiveNodes(int n) { + endLiveNodes = n; + } + + public int getEndLiveNodes() { + return endLiveNodes; + } + void setTimeStamp(double time) { timeStamp = time; } diff --git a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java index 12689b0d3cc..46991bc8da6 100644 --- a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java +++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogCompilation.java @@ -37,13 +37,13 @@ import org.xml.sax.helpers.*; public class LogCompilation extends DefaultHandler implements ErrorHandler, Constants { public static void usage(int exitcode) { - System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -N ] file1 ..."); + System.out.println("Usage: LogCompilation [ -v ] [ -c ] [ -s ] [ -e | -n ] file1 ..."); System.out.println(" -c: clean up malformed 1.5 xml"); System.out.println(" -i: print inlining decisions"); System.out.println(" -S: print compilation statistics"); System.out.println(" -s: sort events by start time"); System.out.println(" -e: sort events by elapsed time"); - System.out.println(" -N: sort events by name and start"); + System.out.println(" -n: sort events by name and start"); System.exit(exitcode); } @@ -137,7 +137,11 @@ public class LogCompilation extends DefaultHandler implements ErrorHandler, Cons v2 = Integer.valueOf(0); } phaseNodes.put(phase.getName(), Integer.valueOf(v2.intValue() + phase.getNodes())); - out.printf("\t%s %6.4f %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes()); + /* Print phase name, elapsed time, nodes at the start of the phase, + nodes created in the phase, live nodes at the start of the phase, + live nodes added in the phase. + */ + out.printf("\t%s %6.4f %d %d %d %d\n", phase.getName(), phase.getElapsedTime(), phase.getStartNodes(), phase.getNodes(), phase.getStartLiveNodes(), phase.getLiveNodes()); } } else if (e instanceof MakeNotEntrantEvent) { MakeNotEntrantEvent mne = (MakeNotEntrantEvent) e; diff --git a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java index dc0b7ab3429..80a00364cc3 100644 --- a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java +++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/LogParser.java @@ -224,7 +224,6 @@ public class LogParser extends DefaultHandler implements ErrorHandler, Constants throw new InternalError("can't find " + name); } int indent = 0; - String compile_id; String type(String id) { String result = types.get(id); @@ -268,12 +267,18 @@ public class LogParser extends DefaultHandler implements ErrorHandler, Constants if (qname.equals("phase")) { Phase p = new Phase(search(atts, "name"), Double.parseDouble(search(atts, "stamp")), - Integer.parseInt(search(atts, "nodes"))); + Integer.parseInt(search(atts, "nodes", "0")), + Integer.parseInt(search(atts, "live"))); phaseStack.push(p); } else if (qname.equals("phase_done")) { Phase p = phaseStack.pop(); - p.setEndNodes(Integer.parseInt(search(atts, "nodes"))); + if (! p.getId().equals(search(atts, "name"))) { + System.out.println("phase: " + p.getId()); + throw new InternalError("phase name mismatch"); + } p.setEnd(Double.parseDouble(search(atts, "stamp"))); + p.setEndNodes(Integer.parseInt(search(atts, "nodes", "0"))); + p.setEndLiveNodes(Integer.parseInt(search(atts, "live"))); compile.getPhases().add(p); } else if (qname.equals("task")) { compile = new Compilation(Integer.parseInt(search(atts, "compile_id", "-1"))); @@ -317,13 +322,16 @@ public class LogParser extends DefaultHandler implements ErrorHandler, Constants m.setName(search(atts, "name")); m.setReturnType(type(search(atts, "return"))); m.setArguments(search(atts, "arguments", "void")); - m.setBytes(search(atts, "bytes")); - m.setIICount(search(atts, "iicount")); - m.setFlags(search(atts, "flags")); + + if (search(atts, "unloaded", "0").equals("0")) { + m.setBytes(search(atts, "bytes")); + m.setIICount(search(atts, "iicount")); + m.setFlags(search(atts, "flags")); + } methods.put(id, m); } else if (qname.equals("call")) { site = new CallSite(bci, method(search(atts, "method"))); - site.setCount(Integer.parseInt(search(atts, "count"))); + site.setCount(Integer.parseInt(search(atts, "count", "0"))); String receiver = atts.getValue("receiver"); if (receiver != null) { site.setReceiver(type(receiver)); @@ -406,6 +414,7 @@ public class LogParser extends DefaultHandler implements ErrorHandler, Constants } else if (qname.equals("parse_done")) { CallSite call = scopes.pop(); call.setEndNodes(Integer.parseInt(search(atts, "nodes", "1"))); + call.setEndLiveNodes(Integer.parseInt(search(atts, "live", "1"))); call.setTimeStamp(Double.parseDouble(search(atts, "stamp"))); scopes.push(call); } diff --git a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java index b448f457bee..af160abb81a 100644 --- a/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java +++ b/hotspot/src/share/tools/LogCompilation/src/com/sun/hotspot/tools/compiler/Phase.java @@ -30,10 +30,13 @@ public class Phase extends BasicLogEvent { private final int startNodes; private int endNodes; + private final int startLiveNodes; + private int endLiveNodes; - Phase(String n, double s, int nodes) { + Phase(String n, double s, int nodes, int live) { super(s, n); startNodes = nodes; + startLiveNodes = live; } int getNodes() { @@ -55,6 +58,22 @@ public class Phase extends BasicLogEvent { public int getEndNodes() { return endNodes; } + /* Number of live nodes added by the phase */ + int getLiveNodes() { + return getEndLiveNodes() - getStartLiveNodes(); + } + + void setEndLiveNodes(int n) { + endLiveNodes = n; + } + + public int getStartLiveNodes() { + return startLiveNodes; + } + + public int getEndLiveNodes() { + return endLiveNodes; + } @Override public void print(PrintStream stream) { diff --git a/hotspot/src/share/vm/adlc/main.cpp b/hotspot/src/share/vm/adlc/main.cpp index 3bfa0695e5b..8ecce63dd78 100644 --- a/hotspot/src/share/vm/adlc/main.cpp +++ b/hotspot/src/share/vm/adlc/main.cpp @@ -212,7 +212,7 @@ int main(int argc, char *argv[]) AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._VM_file._name)); AD.addInclude(AD._CPP_file, "adfiles", get_basename(AD._HPP_file._name)); AD.addInclude(AD._CPP_file, "memory/allocation.inline.hpp"); - AD.addInclude(AD._CPP_file, "asm/assembler.hpp"); + AD.addInclude(AD._CPP_file, "asm/macroAssembler.inline.hpp"); AD.addInclude(AD._CPP_file, "code/vmreg.hpp"); AD.addInclude(AD._CPP_file, "gc_interface/collectedHeap.inline.hpp"); AD.addInclude(AD._CPP_file, "oops/compiledICHolder.hpp"); @@ -231,17 +231,14 @@ int main(int argc, char *argv[]) AD.addInclude(AD._CPP_file, "runtime/stubRoutines.hpp"); AD.addInclude(AD._CPP_file, "utilities/growableArray.hpp"); #ifdef TARGET_ARCH_x86 - AD.addInclude(AD._CPP_file, "assembler_x86.inline.hpp"); AD.addInclude(AD._CPP_file, "nativeInst_x86.hpp"); AD.addInclude(AD._CPP_file, "vmreg_x86.inline.hpp"); #endif #ifdef TARGET_ARCH_sparc - AD.addInclude(AD._CPP_file, "assembler_sparc.inline.hpp"); AD.addInclude(AD._CPP_file, "nativeInst_sparc.hpp"); AD.addInclude(AD._CPP_file, "vmreg_sparc.inline.hpp"); #endif #ifdef TARGET_ARCH_arm - AD.addInclude(AD._CPP_file, "assembler_arm.inline.hpp"); AD.addInclude(AD._CPP_file, "nativeInst_arm.hpp"); AD.addInclude(AD._CPP_file, "vmreg_arm.inline.hpp"); #endif diff --git a/hotspot/src/share/vm/asm/assembler.cpp b/hotspot/src/share/vm/asm/assembler.cpp index 7f5dbf21ae4..6a34989ec7b 100644 --- a/hotspot/src/share/vm/asm/assembler.cpp +++ b/hotspot/src/share/vm/asm/assembler.cpp @@ -23,26 +23,13 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "asm/codeBuffer.hpp" +#include "runtime/atomic.hpp" +#include "runtime/atomic.inline.hpp" #include "runtime/icache.hpp" #include "runtime/os.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif // Implementation of AbstractAssembler @@ -56,16 +43,13 @@ AbstractAssembler::AbstractAssembler(CodeBuffer* code) { if (code == NULL) return; CodeSection* cs = code->insts(); cs->clear_mark(); // new assembler kills old mark - _code_section = cs; - _code_begin = cs->start(); - _code_limit = cs->limit(); - _code_pos = cs->end(); - _oop_recorder= code->oop_recorder(); - DEBUG_ONLY( _short_branch_delta = 0; ) - if (_code_begin == NULL) { + if (cs->start() == NULL) { vm_exit_out_of_memory(0, err_msg("CodeCache: no room for %s", code->name())); } + _code_section = cs; + _oop_recorder= code->oop_recorder(); + DEBUG_ONLY( _short_branch_delta = 0; ) } void AbstractAssembler::set_code_section(CodeSection* cs) { @@ -73,9 +57,6 @@ void AbstractAssembler::set_code_section(CodeSection* cs) { assert(cs->is_allocated(), "need to pre-allocate this section"); cs->clear_mark(); // new assembly into this section kills old mark _code_section = cs; - _code_begin = cs->start(); - _code_limit = cs->limit(); - _code_pos = cs->end(); } // Inform CodeBuffer that incoming code and relocation will be for stubs @@ -83,7 +64,6 @@ address AbstractAssembler::start_a_stub(int required_space) { CodeBuffer* cb = code(); CodeSection* cs = cb->stubs(); assert(_code_section == cb->insts(), "not in insts?"); - sync(); if (cs->maybe_expand_to_ensure_remaining(required_space) && cb->blob() == NULL) { return NULL; @@ -96,7 +76,6 @@ address AbstractAssembler::start_a_stub(int required_space) { // Should not be called if start_a_stub() returned NULL void AbstractAssembler::end_a_stub() { assert(_code_section == code()->stubs(), "not in stubs?"); - sync(); set_code_section(code()->insts()); } @@ -104,8 +83,7 @@ void AbstractAssembler::end_a_stub() { address AbstractAssembler::start_a_const(int required_space, int required_align) { CodeBuffer* cb = code(); CodeSection* cs = cb->consts(); - assert(_code_section == cb->insts(), "not in insts?"); - sync(); + assert(_code_section == cb->insts() || _code_section == cb->stubs(), "not in insts/stubs?"); address end = cs->end(); int pad = -(intptr_t)end & (required_align-1); if (cs->maybe_expand_to_ensure_remaining(pad + required_space)) { @@ -121,16 +99,13 @@ address AbstractAssembler::start_a_const(int required_space, int required_align) } // Inform CodeBuffer that incoming code and relocation will be code -// Should not be called if start_a_const() returned NULL -void AbstractAssembler::end_a_const() { +// in section cs (insts or stubs). +void AbstractAssembler::end_a_const(CodeSection* cs) { assert(_code_section == code()->consts(), "not in consts?"); - sync(); - set_code_section(code()->insts()); + set_code_section(cs); } - void AbstractAssembler::flush() { - sync(); ICache::invalidate_range(addr_at(0), offset()); } diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp index 897edb0a791..5de33dd503e 100644 --- a/hotspot/src/share/vm/asm/assembler.hpp +++ b/hotspot/src/share/vm/asm/assembler.hpp @@ -25,12 +25,14 @@ #ifndef SHARE_VM_ASM_ASSEMBLER_HPP #define SHARE_VM_ASM_ASSEMBLER_HPP +#include "asm/codeBuffer.hpp" #include "code/oopRecorder.hpp" #include "code/relocInfo.hpp" #include "memory/allocation.hpp" #include "utilities/debug.hpp" #include "utilities/growableArray.hpp" #include "utilities/top.hpp" + #ifdef TARGET_ARCH_x86 # include "register_x86.hpp" # include "vm_version_x86.hpp" @@ -54,7 +56,6 @@ // This file contains platform-independent assembler declarations. -class CodeBuffer; class MacroAssembler; class AbstractAssembler; class Label; @@ -122,7 +123,7 @@ class Label VALUE_OBJ_CLASS_SPEC { assert(_loc == -1, "already bound"); _loc = loc; } - void bind_loc(int pos, int sect); // = bind_loc(locator(pos, sect)) + void bind_loc(int pos, int sect) { bind_loc(CodeBuffer::locator(pos, sect)); } #ifndef PRODUCT // Iterates over all unresolved instructions for printing @@ -137,8 +138,8 @@ class Label VALUE_OBJ_CLASS_SPEC { assert(_loc >= 0, "unbound label"); return _loc; } - int loc_pos() const; // == locator_pos(loc()) - int loc_sect() const; // == locator_sect(loc()) + int loc_pos() const { return CodeBuffer::locator_pos(loc()); } + int loc_sect() const { return CodeBuffer::locator_sect(loc()); } bool is_bound() const { return _loc >= 0; } bool is_unbound() const { return _loc == -1 && _patch_index > 0; } @@ -201,26 +202,32 @@ class AbstractAssembler : public ResourceObj { protected: CodeSection* _code_section; // section within the code buffer - address _code_begin; // first byte of code buffer - address _code_limit; // first byte after code buffer - address _code_pos; // current code generation position OopRecorder* _oop_recorder; // support for relocInfo::oop_type // Code emission & accessing - address addr_at(int pos) const { return _code_begin + pos; } + address addr_at(int pos) const { return code_section()->start() + pos; } + // This routine is called with a label is used for an address. // Labels and displacements truck in offsets, but target must return a PC. - address target(Label& L); // return _code_section->target(L) + address target(Label& L) { return code_section()->target(L, pc()); } bool is8bit(int x) const { return -0x80 <= x && x < 0x80; } bool isByte(int x) const { return 0 <= x && x < 0x100; } bool isShiftCount(int x) const { return 0 <= x && x < 32; } - void emit_byte(int x); // emit a single byte - void emit_word(int x); // emit a 16-bit word (not a wordSize word!) - void emit_long(jint x); // emit a 32-bit word (not a longSize word!) - void emit_address(address x); // emit an address (not a longSize word!) + void emit_int8( int8_t x) { code_section()->emit_int8( x); } + void emit_int16( int16_t x) { code_section()->emit_int16( x); } + void emit_int32( int32_t x) { code_section()->emit_int32( x); } + void emit_int64( int64_t x) { code_section()->emit_int64( x); } + + void emit_float( jfloat x) { code_section()->emit_float( x); } + void emit_double( jdouble x) { code_section()->emit_double( x); } + void emit_address(address x) { code_section()->emit_address(x); } + + void emit_byte(int x) { emit_int8 (x); } // deprecated + void emit_word(int x) { emit_int16(x); } // deprecated + void emit_long(jint x) { emit_int32(x); } // deprecated // Instruction boundaries (required when emitting relocatable values). class InstructionMark: public StackObj { @@ -237,10 +244,10 @@ class AbstractAssembler : public ResourceObj { } }; friend class InstructionMark; - #ifdef ASSERT +#ifdef ASSERT // Make it return true on platforms which need to verify // instruction boundaries for some operations. - inline static bool pd_check_instruction_mark(); + static bool pd_check_instruction_mark(); // Add delta to short branch distance to verify that it still fit into imm8. int _short_branch_delta; @@ -262,13 +269,13 @@ class AbstractAssembler : public ResourceObj { _assm->clear_short_branch_delta(); } }; - #else +#else // Dummy in product. class ShortBranchVerifier: public StackObj { public: ShortBranchVerifier(AbstractAssembler* assm) {} }; - #endif +#endif // Label functions void print(Label& L); @@ -278,9 +285,6 @@ class AbstractAssembler : public ResourceObj { // Creation AbstractAssembler(CodeBuffer* code); - // save end pointer back to code buf. - void sync(); - // ensure buf contains all code (call this before using/copying the code) void flush(); @@ -308,26 +312,31 @@ class AbstractAssembler : public ResourceObj { static bool is_simm32(intptr_t x) { return is_simm(x, 32); } // Accessors - CodeBuffer* code() const; // _code_section->outer() CodeSection* code_section() const { return _code_section; } - int sect() const; // return _code_section->index() - address pc() const { return _code_pos; } - int offset() const { return _code_pos - _code_begin; } - int locator() const; // CodeBuffer::locator(offset(), sect()) + CodeBuffer* code() const { return code_section()->outer(); } + int sect() const { return code_section()->index(); } + address pc() const { return code_section()->end(); } + int offset() const { return code_section()->size(); } + int locator() const { return CodeBuffer::locator(offset(), sect()); } + OopRecorder* oop_recorder() const { return _oop_recorder; } void set_oop_recorder(OopRecorder* r) { _oop_recorder = r; } - address inst_mark() const; - void set_inst_mark(); - void clear_inst_mark(); + address inst_mark() const { return code_section()->mark(); } + void set_inst_mark() { code_section()->set_mark(); } + void clear_inst_mark() { code_section()->clear_mark(); } // Constants in code void a_byte(int x); void a_long(jint x); - void relocate(RelocationHolder const& rspec, int format = 0); + void relocate(RelocationHolder const& rspec, int format = 0) { + assert(!pd_check_instruction_mark() + || inst_mark() == NULL || inst_mark() == code_section()->end(), + "call relocate() between instructions"); + code_section()->relocate(code_section()->end(), rspec, format); + } void relocate( relocInfo::relocType rtype, int format = 0) { - if (rtype != relocInfo::none) - relocate(Relocation::spec_simple(rtype), format); + code_section()->relocate(code_section()->end(), rtype, format); } static int code_fill_byte(); // used to pad out odd-sized code buffers @@ -348,52 +357,55 @@ class AbstractAssembler : public ResourceObj { void end_a_stub(); // Ditto for constants. address start_a_const(int required_space, int required_align = sizeof(double)); - void end_a_const(); + void end_a_const(CodeSection* cs); // Pass the codesection to continue in (insts or stubs?). // constants support + // + // We must remember the code section (insts or stubs) in c1 + // so we can reset to the proper section in end_a_const(). address long_constant(jlong c) { + CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); if (ptr != NULL) { - *(jlong*)ptr = c; - _code_pos = ptr + sizeof(c); - end_a_const(); + emit_int64(c); + end_a_const(c1); } return ptr; } address double_constant(jdouble c) { + CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); if (ptr != NULL) { - *(jdouble*)ptr = c; - _code_pos = ptr + sizeof(c); - end_a_const(); + emit_double(c); + end_a_const(c1); } return ptr; } address float_constant(jfloat c) { + CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); if (ptr != NULL) { - *(jfloat*)ptr = c; - _code_pos = ptr + sizeof(c); - end_a_const(); + emit_float(c); + end_a_const(c1); } return ptr; } address address_constant(address c) { + CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); if (ptr != NULL) { - *(address*)ptr = c; - _code_pos = ptr + sizeof(c); - end_a_const(); + emit_address(c); + end_a_const(c1); } return ptr; } address address_constant(address c, RelocationHolder const& rspec) { + CodeSection* c1 = _code_section; address ptr = start_a_const(sizeof(c), sizeof(c)); if (ptr != NULL) { relocate(rspec); - *(address*)ptr = c; - _code_pos = ptr + sizeof(c); - end_a_const(); + emit_address(c); + end_a_const(c1); } return ptr; } diff --git a/hotspot/src/share/vm/asm/assembler.inline.hpp b/hotspot/src/share/vm/asm/assembler.inline.hpp index 6eede4445ca..2dcac0e2c17 100644 --- a/hotspot/src/share/vm/asm/assembler.inline.hpp +++ b/hotspot/src/share/vm/asm/assembler.inline.hpp @@ -26,92 +26,21 @@ #define SHARE_VM_ASM_ASSEMBLER_INLINE_HPP #include "asm/assembler.hpp" -#include "asm/codeBuffer.hpp" -#include "compiler/disassembler.hpp" -#include "runtime/threadLocalStorage.hpp" -inline void AbstractAssembler::sync() { - CodeSection* cs = code_section(); - guarantee(cs->start() == _code_begin, "must not shift code buffer"); - cs->set_end(_code_pos); -} - -inline void AbstractAssembler::emit_byte(int x) { - assert(isByte(x), "not a byte"); - *(unsigned char*)_code_pos = (unsigned char)x; - _code_pos += sizeof(unsigned char); - sync(); -} - - -inline void AbstractAssembler::emit_word(int x) { - *(short*)_code_pos = (short)x; - _code_pos += sizeof(short); - sync(); -} - - -inline void AbstractAssembler::emit_long(jint x) { - *(jint*)_code_pos = x; - _code_pos += sizeof(jint); - sync(); -} - -inline void AbstractAssembler::emit_address(address x) { - *(address*)_code_pos = x; - _code_pos += sizeof(address); - sync(); -} - -inline address AbstractAssembler::inst_mark() const { - return code_section()->mark(); -} - - -inline void AbstractAssembler::set_inst_mark() { - code_section()->set_mark(); -} - - -inline void AbstractAssembler::clear_inst_mark() { - code_section()->clear_mark(); -} - - -inline void AbstractAssembler::relocate(RelocationHolder const& rspec, int format) { - assert(!pd_check_instruction_mark() - || inst_mark() == NULL || inst_mark() == _code_pos, - "call relocate() between instructions"); - code_section()->relocate(_code_pos, rspec, format); -} - - -inline CodeBuffer* AbstractAssembler::code() const { - return code_section()->outer(); -} - -inline int AbstractAssembler::sect() const { - return code_section()->index(); -} - -inline int AbstractAssembler::locator() const { - return CodeBuffer::locator(offset(), sect()); -} - -inline address AbstractAssembler::target(Label& L) { - return code_section()->target(L, pc()); -} - -inline int Label::loc_pos() const { - return CodeBuffer::locator_pos(loc()); -} - -inline int Label::loc_sect() const { - return CodeBuffer::locator_sect(loc()); -} - -inline void Label::bind_loc(int pos, int sect) { - bind_loc(CodeBuffer::locator(pos, sect)); -} +#ifdef TARGET_ARCH_x86 +# include "assembler_x86.inline.hpp" +#endif +#ifdef TARGET_ARCH_sparc +# include "assembler_sparc.inline.hpp" +#endif +#ifdef TARGET_ARCH_zero +# include "assembler_zero.inline.hpp" +#endif +#ifdef TARGET_ARCH_arm +# include "assembler_arm.inline.hpp" +#endif +#ifdef TARGET_ARCH_ppc +# include "assembler_ppc.inline.hpp" +#endif #endif // SHARE_VM_ASM_ASSEMBLER_INLINE_HPP diff --git a/hotspot/src/share/vm/asm/codeBuffer.cpp b/hotspot/src/share/vm/asm/codeBuffer.cpp index 3fb1f179b0c..6b5548a1951 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.cpp +++ b/hotspot/src/share/vm/asm/codeBuffer.cpp @@ -254,6 +254,10 @@ address CodeBuffer::locator_address(int locator) const { return start + locator_pos(locator); } +bool CodeBuffer::is_backward_branch(Label& L) { + return L.is_bound() && insts_end() <= locator_address(L.loc()); +} + address CodeBuffer::decode_begin() { address begin = _insts.start(); if (_decode_begin != NULL && _decode_begin > begin) @@ -758,7 +762,18 @@ void CodeBuffer::relocate_code_to(CodeBuffer* dest) const { // Make the new code copy use the old copy's relocations: dest_cs->initialize_locs_from(cs); + } + // Do relocation after all sections are copied. + // This is necessary if the code uses constants in stubs, which are + // relocated when the corresponding instruction in the code (e.g., a + // call) is relocated. Stubs are placed behind the main code + // section, so that section has to be copied before relocating. + for (int n = (int) SECT_FIRST; n < (int)SECT_LIMIT; n++) { + // pull code out of each section + const CodeSection* cs = code_section(n); + if (cs->is_empty()) continue; // skip trivial section + CodeSection* dest_cs = dest->code_section(n); { // Repair the pc relative information in the code after the move RelocIterator iter(dest_cs); while (iter.next()) { diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp index 63dba2dbb0d..d13697ef46e 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.hpp +++ b/hotspot/src/share/vm/asm/codeBuffer.hpp @@ -25,17 +25,15 @@ #ifndef SHARE_VM_ASM_CODEBUFFER_HPP #define SHARE_VM_ASM_CODEBUFFER_HPP -#include "asm/assembler.hpp" #include "code/oopRecorder.hpp" #include "code/relocInfo.hpp" -class CodeComments; -class AbstractAssembler; -class MacroAssembler; -class PhaseCFG; -class Compile; -class BufferBlob; -class CodeBuffer; +class CodeComments; +class PhaseCFG; +class Compile; +class BufferBlob; +class CodeBuffer; +class Label; class CodeOffsets: public StackObj { public: @@ -194,10 +192,14 @@ class CodeSection VALUE_OBJ_CLASS_SPEC { } // Code emission - void emit_int8 (int8_t x) { *((int8_t*) end()) = x; set_end(end() + 1); } - void emit_int16(int16_t x) { *((int16_t*) end()) = x; set_end(end() + 2); } - void emit_int32(int32_t x) { *((int32_t*) end()) = x; set_end(end() + 4); } - void emit_int64(int64_t x) { *((int64_t*) end()) = x; set_end(end() + 8); } + void emit_int8 ( int8_t x) { *((int8_t*) end()) = x; set_end(end() + sizeof(int8_t)); } + void emit_int16( int16_t x) { *((int16_t*) end()) = x; set_end(end() + sizeof(int16_t)); } + void emit_int32( int32_t x) { *((int32_t*) end()) = x; set_end(end() + sizeof(int32_t)); } + void emit_int64( int64_t x) { *((int64_t*) end()) = x; set_end(end() + sizeof(int64_t)); } + + void emit_float( jfloat x) { *((jfloat*) end()) = x; set_end(end() + sizeof(jfloat)); } + void emit_double(jdouble x) { *((jdouble*) end()) = x; set_end(end() + sizeof(jdouble)); } + void emit_address(address x) { *((address*) end()) = x; set_end(end() + sizeof(address)); } // Share a scratch buffer for relocinfo. (Hacky; saves a resource allocation.) void initialize_shared_locs(relocInfo* buf, int length); @@ -451,6 +453,9 @@ class CodeBuffer: public StackObj { int locator(address addr) const; address locator_address(int locator) const; + // Heuristic for pre-packing the taken/not-taken bit of a predicted branch. + bool is_backward_branch(Label& L); + // Properties const char* name() const { return _name; } CodeBuffer* before_expand() const { return _before_expand; } diff --git a/hotspot/src/share/vm/asm/macroAssembler.hpp b/hotspot/src/share/vm/asm/macroAssembler.hpp new file mode 100644 index 00000000000..92ecd138634 --- /dev/null +++ b/hotspot/src/share/vm/asm/macroAssembler.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_ASM_MACROASSEMBLER_HPP +#define SHARE_VM_ASM_MACROASSEMBLER_HPP + +#include "asm/assembler.hpp" + +#ifdef TARGET_ARCH_x86 +# include "macroAssembler_x86.hpp" +#endif +#ifdef TARGET_ARCH_sparc +# include "macroAssembler_sparc.hpp" +#endif +#ifdef TARGET_ARCH_zero +# include "assembler_zero.hpp" +#endif +#ifdef TARGET_ARCH_arm +# include "assembler_arm.hpp" +#endif +#ifdef TARGET_ARCH_ppc +# include "assembler_ppc.hpp" +#endif + +#endif // SHARE_VM_ASM_MACROASSEMBLER_HPP diff --git a/hotspot/src/share/vm/asm/macroAssembler.inline.hpp b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp new file mode 100644 index 00000000000..70a157c91a2 --- /dev/null +++ b/hotspot/src/share/vm/asm/macroAssembler.inline.hpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP +#define SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP + +#include "asm/macroAssembler.hpp" + +#ifdef TARGET_ARCH_x86 +// no macroAssembler_x86.inline.hpp +#endif +#ifdef TARGET_ARCH_sparc +# include "macroAssembler_sparc.inline.hpp" +#endif +#ifdef TARGET_ARCH_zero +# include "assembler_zero.inline.hpp" +#endif +#ifdef TARGET_ARCH_arm +# include "assembler_arm.inline.hpp" +#endif +#ifdef TARGET_ARCH_ppc +# include "assembler_ppc.inline.hpp" +#endif + +#endif // SHARE_VM_ASM_MACROASSEMBLER_INLINE_HPP diff --git a/hotspot/src/share/vm/asm/register.hpp b/hotspot/src/share/vm/asm/register.hpp index 128b6070b1f..5afecdeb116 100644 --- a/hotspot/src/share/vm/asm/register.hpp +++ b/hotspot/src/share/vm/asm/register.hpp @@ -93,6 +93,21 @@ enum { name##_##type##EnumValue = value##_##type##EnumValue } #define REGISTER_DEFINITION(type, name) \ const type name = ((type)name##_##type##EnumValue) +#ifdef TARGET_ARCH_x86 +# include "register_x86.hpp" +#endif +#ifdef TARGET_ARCH_sparc +# include "register_sparc.hpp" +#endif +#ifdef TARGET_ARCH_zero +# include "register_zero.hpp" +#endif +#ifdef TARGET_ARCH_arm +# include "register_arm.hpp" +#endif +#ifdef TARGET_ARCH_ppc +# include "register_ppc.hpp" +#endif // Debugging support diff --git a/hotspot/src/share/vm/c1/c1_Compilation.cpp b/hotspot/src/share/vm/c1/c1_Compilation.cpp index 68d3d2cc36b..cc268ef1450 100644 --- a/hotspot/src/share/vm/c1/c1_Compilation.cpp +++ b/hotspot/src/share/vm/c1/c1_Compilation.cpp @@ -129,7 +129,15 @@ void Compilation::build_hir() { CHECK_BAILOUT(); // setup ir + CompileLog* log = this->log(); + if (log != NULL) { + log->begin_head("parse method='%d' ", + log->identify(_method)); + log->stamp(); + log->end_head(); + } _hir = new IR(this, method(), osr_bci()); + if (log) log->done("parse"); if (!_hir->is_valid()) { bailout("invalid parsing"); return; diff --git a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp index 3b6bbabdc34..f5e84bbe5f2 100644 --- a/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp +++ b/hotspot/src/share/vm/c1/c1_MacroAssembler.hpp @@ -25,22 +25,8 @@ #ifndef SHARE_VM_C1_C1_MACROASSEMBLER_HPP #define SHARE_VM_C1_C1_MACROASSEMBLER_HPP -#include "asm/assembler.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" class CodeEmitInfo; diff --git a/hotspot/src/share/vm/ci/ciMethod.cpp b/hotspot/src/share/vm/ci/ciMethod.cpp index ed1f4b8b24d..8ad9ecb1199 100644 --- a/hotspot/src/share/vm/ci/ciMethod.cpp +++ b/hotspot/src/share/vm/ci/ciMethod.cpp @@ -741,6 +741,24 @@ int ciMethod::interpreter_call_site_count(int bci) { return -1; // unknown } +// ------------------------------------------------------------------ +// ciMethod::get_field_at_bci +ciField* ciMethod::get_field_at_bci(int bci, bool &will_link) { + ciBytecodeStream iter(this); + iter.reset_to_bci(bci); + iter.next(); + return iter.get_field(will_link); +} + +// ------------------------------------------------------------------ +// ciMethod::get_method_at_bci +ciMethod* ciMethod::get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature) { + ciBytecodeStream iter(this); + iter.reset_to_bci(bci); + iter.next(); + return iter.get_method(will_link, declared_signature); +} + // ------------------------------------------------------------------ // Adjust a CounterData count to be commensurate with // interpreter_invocation_count. If the MDO exists for diff --git a/hotspot/src/share/vm/ci/ciMethod.hpp b/hotspot/src/share/vm/ci/ciMethod.hpp index cf8a622fa7c..193eb68de05 100644 --- a/hotspot/src/share/vm/ci/ciMethod.hpp +++ b/hotspot/src/share/vm/ci/ciMethod.hpp @@ -226,6 +226,9 @@ class ciMethod : public ciMetadata { ciCallProfile call_profile_at_bci(int bci); int interpreter_call_site_count(int bci); + ciField* get_field_at_bci( int bci, bool &will_link); + ciMethod* get_method_at_bci(int bci, bool &will_link, ciSignature* *declared_signature); + // Given a certain calling environment, find the monomorphic target // for the call. Return NULL if the call is not monomorphic in // its calling environment. diff --git a/hotspot/src/share/vm/ci/ciSignature.hpp b/hotspot/src/share/vm/ci/ciSignature.hpp index 37506c953b4..1d0f067c3bc 100644 --- a/hotspot/src/share/vm/ci/ciSignature.hpp +++ b/hotspot/src/share/vm/ci/ciSignature.hpp @@ -57,12 +57,14 @@ public: ciSymbol* as_symbol() const { return _symbol; } ciKlass* accessing_klass() const { return _accessing_klass; } - ciType* return_type() const; - ciType* type_at(int index) const; + ciType* return_type() const; + ciType* type_at(int index) const; int size() const { return _size; } int count() const { return _count; } + int arg_size_for_bc(Bytecodes::Code bc) { return size() + (Bytecodes::has_receiver(bc) ? 1 : 0); } + bool equals(ciSignature* that); void print_signature(); diff --git a/hotspot/src/share/vm/code/icBuffer.cpp b/hotspot/src/share/vm/code/icBuffer.cpp index 8d5200ce901..81ef87ce200 100644 --- a/hotspot/src/share/vm/code/icBuffer.cpp +++ b/hotspot/src/share/vm/code/icBuffer.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "code/codeCache.hpp" #include "code/compiledIC.hpp" #include "code/icBuffer.hpp" #include "code/nmethod.hpp" @@ -37,21 +38,6 @@ #include "oops/oop.inline2.hpp" #include "runtime/mutexLocker.hpp" #include "runtime/stubRoutines.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif DEF_STUB_INTERFACE(ICStub); diff --git a/hotspot/src/share/vm/code/relocInfo.cpp b/hotspot/src/share/vm/code/relocInfo.cpp index fbe597ad5ce..679bc290ade 100644 --- a/hotspot/src/share/vm/code/relocInfo.cpp +++ b/hotspot/src/share/vm/code/relocInfo.cpp @@ -23,32 +23,13 @@ */ #include "precompiled.hpp" +#include "code/codeCache.hpp" #include "code/compiledIC.hpp" #include "code/nmethod.hpp" #include "code/relocInfo.hpp" #include "memory/resourceArea.hpp" #include "runtime/stubCodeGenerator.hpp" #include "utilities/copy.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -# include "nativeInst_x86.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -# include "nativeInst_sparc.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -# include "nativeInst_zero.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -# include "nativeInst_arm.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -# include "nativeInst_ppc.hpp" -#endif const RelocationHolder RelocationHolder::none; // its type is relocInfo::none diff --git a/hotspot/src/share/vm/code/vmreg.hpp b/hotspot/src/share/vm/code/vmreg.hpp index 23832a398c1..70b476de00e 100644 --- a/hotspot/src/share/vm/code/vmreg.hpp +++ b/hotspot/src/share/vm/code/vmreg.hpp @@ -27,21 +27,8 @@ #include "memory/allocation.hpp" #include "utilities/globalDefinitions.hpp" -#ifdef TARGET_ARCH_x86 -# include "register_x86.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "register_sparc.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "register_zero.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "register_arm.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "register_ppc.hpp" -#endif +#include "asm/register.hpp" + #ifdef COMPILER2 #include "opto/adlcVMDeps.hpp" #include "utilities/ostream.hpp" diff --git a/hotspot/src/share/vm/interpreter/bytecodes.hpp b/hotspot/src/share/vm/interpreter/bytecodes.hpp index b33df83d74a..707f210e8e0 100644 --- a/hotspot/src/share/vm/interpreter/bytecodes.hpp +++ b/hotspot/src/share/vm/interpreter/bytecodes.hpp @@ -423,7 +423,9 @@ class Bytecodes: AllStatic { static bool is_zero_const (Code code) { return (code == _aconst_null || code == _iconst_0 || code == _fconst_0 || code == _dconst_0); } static bool is_invoke (Code code) { return (_invokevirtual <= code && code <= _invokedynamic); } - + static bool has_receiver (Code code) { assert(is_invoke(code), ""); return code == _invokevirtual || + code == _invokespecial || + code == _invokeinterface; } static bool has_optional_appendix(Code code) { return code == _invokedynamic || code == _invokehandle; } static int compute_flags (const char* format, int more_flags = 0); // compute the flags diff --git a/hotspot/src/share/vm/interpreter/interpreter.cpp b/hotspot/src/share/vm/interpreter/interpreter.cpp index 4513eebb79c..24de109f39c 100644 --- a/hotspot/src/share/vm/interpreter/interpreter.cpp +++ b/hotspot/src/share/vm/interpreter/interpreter.cpp @@ -23,7 +23,9 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/bytecodeHistogram.hpp" #include "interpreter/bytecodeInterpreter.hpp" #include "interpreter/interpreter.hpp" diff --git a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp index 0b83ad21eab..682f70032e5 100644 --- a/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp +++ b/hotspot/src/share/vm/interpreter/interpreterRuntime.cpp @@ -26,6 +26,7 @@ #include "classfile/systemDictionary.hpp" #include "classfile/vmSymbols.hpp" #include "compiler/compileBroker.hpp" +#include "compiler/disassembler.hpp" #include "gc_interface/collectedHeap.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" diff --git a/hotspot/src/share/vm/opto/block.hpp b/hotspot/src/share/vm/opto/block.hpp index d0d67b7fcbf..a2e4615b8d0 100644 --- a/hotspot/src/share/vm/opto/block.hpp +++ b/hotspot/src/share/vm/opto/block.hpp @@ -292,7 +292,7 @@ class Block : public CFGElement { void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs); bool schedule_local(PhaseCFG *cfg, Matcher &m, GrowableArray &ready_cnt, VectorSet &next_call); // Cleanup if any code lands between a Call and his Catch - void call_catch_cleanup(Block_Array &bbs); + void call_catch_cleanup(Block_Array &bbs, Compile *C); // Detect implicit-null-check opportunities. Basically, find NULL checks // with suitable memory ops nearby. Use the memory op to do the NULL check. // I can generate a memory op if there is not one nearby. diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp index 4fdebf526cb..ba30d230cee 100644 --- a/hotspot/src/share/vm/opto/c2_globals.hpp +++ b/hotspot/src/share/vm/opto/c2_globals.hpp @@ -115,6 +115,12 @@ notproduct(bool, VerifyOpto, false, \ "Apply more time consuming verification during compilation") \ \ + notproduct(bool, VerifyIdealNodeCount, false, \ + "Verify that tracked dead ideal node count is accurate") \ + \ + notproduct(bool, PrintIdealNodeCount, false, \ + "Print liveness counts of ideal nodes") \ + \ notproduct(bool, VerifyOptoOopOffsets, false, \ "Check types of base addresses in field references") \ \ diff --git a/hotspot/src/share/vm/opto/callGenerator.cpp b/hotspot/src/share/vm/opto/callGenerator.cpp index 93f2b859ba0..33d8c0c6241 100644 --- a/hotspot/src/share/vm/opto/callGenerator.cpp +++ b/hotspot/src/share/vm/opto/callGenerator.cpp @@ -139,7 +139,7 @@ JVMState* DirectCallGenerator::generate(JVMState* jvms) { if (!is_static) { // Make an explicit receiver null_check as part of this call. // Since we share a map with the caller, his JVMS gets adjusted. - kit.null_check_receiver(method()); + kit.null_check_receiver_before_call(method()); if (kit.stopped()) { // And dump it back to the caller, decorated with any exceptions: return kit.transfer_exceptions_into_jvms(); @@ -207,7 +207,7 @@ JVMState* VirtualCallGenerator::generate(JVMState* jvms) { >= (uint)ImplicitNullCheckThreshold))) { // Make an explicit receiver null_check as part of this call. // Since we share a map with the caller, his JVMS gets adjusted. - receiver = kit.null_check_receiver(method()); + receiver = kit.null_check_receiver_before_call(method()); if (kit.stopped()) { // And dump it back to the caller, decorated with any exceptions: return kit.transfer_exceptions_into_jvms(); @@ -491,7 +491,7 @@ JVMState* PredictedCallGenerator::generate(JVMState* jvms) { jvms->bci(), log->identify(_predicted_receiver)); } - receiver = kit.null_check_receiver(method()); + receiver = kit.null_check_receiver_before_call(method()); if (kit.stopped()) { return kit.transfer_exceptions_into_jvms(); } @@ -597,7 +597,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* switch (iid) { case vmIntrinsics::_invokeBasic: { - // get MethodHandle receiver + // Get MethodHandle receiver: Node* receiver = kit.argument(0); if (receiver->Opcode() == Op_ConP) { const TypeOopPtr* oop_ptr = receiver->bottom_type()->is_oopptr(); @@ -618,7 +618,7 @@ CallGenerator* CallGenerator::for_method_handle_inline(JVMState* jvms, ciMethod* case vmIntrinsics::_linkToSpecial: case vmIntrinsics::_linkToInterface: { - // pop MemberName argument + // Get MemberName argument: Node* member_name = kit.argument(callee->arg_size() - 1); if (member_name->Opcode() == Op_ConP) { const TypeOopPtr* oop_ptr = member_name->bottom_type()->is_oopptr(); diff --git a/hotspot/src/share/vm/opto/callnode.hpp b/hotspot/src/share/vm/opto/callnode.hpp index a412e2b0d9f..f7f4faaa6e3 100644 --- a/hotspot/src/share/vm/opto/callnode.hpp +++ b/hotspot/src/share/vm/opto/callnode.hpp @@ -344,17 +344,26 @@ public: OopMap *oop_map() const { return _oop_map; } void set_oop_map(OopMap *om) { _oop_map = om; } + private: + void verify_input(JVMState* jvms, uint idx) const { + assert(verify_jvms(jvms), "jvms must match"); + Node* n = in(idx); + assert((!n->bottom_type()->isa_long() && !n->bottom_type()->isa_double()) || + in(idx + 1)->is_top(), "2nd half of long/double"); + } + + public: // Functionality from old debug nodes which has changed Node *local(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->locoff() + idx); return in(jvms->locoff() + idx); } Node *stack(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->stkoff() + idx); return in(jvms->stkoff() + idx); } Node *argument(JVMState* jvms, uint idx) const { - assert(verify_jvms(jvms), "jvms must match"); + verify_input(jvms, jvms->argoff() + idx); return in(jvms->argoff() + idx); } Node *monitor_box(JVMState* jvms, uint idx) const { diff --git a/hotspot/src/share/vm/opto/chaitin.cpp b/hotspot/src/share/vm/opto/chaitin.cpp index e626a10d875..0fbe723c44f 100644 --- a/hotspot/src/share/vm/opto/chaitin.cpp +++ b/hotspot/src/share/vm/opto/chaitin.cpp @@ -1495,7 +1495,7 @@ void PhaseChaitin::fixup_spills() { cisc->ins_req(1,src); // Requires a memory edge } b->_nodes.map(j,cisc); // Insert into basic block - n->subsume_by(cisc); // Correct graph + n->subsume_by(cisc, C); // Correct graph // ++_used_cisc_instructions; #ifndef PRODUCT diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index c4da70ee32c..ffb7ed7b8d5 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -23,11 +23,13 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" #include "classfile/systemDictionary.hpp" #include "code/exceptionHandlerTable.hpp" #include "code/nmethod.hpp" #include "compiler/compileLog.hpp" +#include "compiler/disassembler.hpp" #include "compiler/oopMap.hpp" #include "opto/addnode.hpp" #include "opto/block.hpp" @@ -316,7 +318,12 @@ void Compile::gvn_replace_by(Node* n, Node* nn) { } - +static inline bool not_a_node(const Node* n) { + if (n == NULL) return true; + if (((intptr_t)n & 1) != 0) return true; // uninitialized, etc. + if (*(address*)n == badAddress) return true; // kill by Node::destruct + return false; +} // Identify all nodes that are reachable from below, useful. // Use breadth-first pass that records state in a Unique_Node_List, @@ -337,12 +344,27 @@ void Compile::identify_useful_nodes(Unique_Node_List &useful) { uint max = n->len(); for( uint i = 0; i < max; ++i ) { Node *m = n->in(i); - if( m == NULL ) continue; + if (not_a_node(m)) continue; useful.push(m); } } } +// Update dead_node_list with any missing dead nodes using useful +// list. Consider all non-useful nodes to be useless i.e., dead nodes. +void Compile::update_dead_node_list(Unique_Node_List &useful) { + uint max_idx = unique(); + VectorSet& useful_node_set = useful.member_set(); + + for (uint node_idx = 0; node_idx < max_idx; node_idx++) { + // If node with index node_idx is not in useful set, + // mark it as dead in dead node list. + if (! useful_node_set.test(node_idx) ) { + record_dead_node(node_idx); + } + } +} + // Disconnect all useless nodes by disconnecting those at the boundary. void Compile::remove_useless_nodes(Unique_Node_List &useful) { uint next = 0; @@ -582,6 +604,8 @@ Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr _inner_loops(0), _scratch_const_size(-1), _in_scratch_emit_size(false), + _dead_node_list(comp_arena()), + _dead_node_count(0), #ifndef PRODUCT _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")), _printer(IdealGraphPrinter::printer()), @@ -873,6 +897,8 @@ Compile::Compile( ciEnv* ci_env, _trace_opto_output(TraceOptoOutput), _printer(NULL), #endif + _dead_node_list(comp_arena()), + _dead_node_count(0), _congraph(NULL) { C = this; @@ -1069,6 +1095,72 @@ void Compile::set_cached_top_node(Node* tn) { assert(_top == NULL || top()->is_top(), ""); } +#ifdef ASSERT +uint Compile::count_live_nodes_by_graph_walk() { + Unique_Node_List useful(comp_arena()); + // Get useful node list by walking the graph. + identify_useful_nodes(useful); + return useful.size(); +} + +void Compile::print_missing_nodes() { + + // Return if CompileLog is NULL and PrintIdealNodeCount is false. + if ((_log == NULL) && (! PrintIdealNodeCount)) { + return; + } + + // This is an expensive function. It is executed only when the user + // specifies VerifyIdealNodeCount option or otherwise knows the + // additional work that needs to be done to identify reachable nodes + // by walking the flow graph and find the missing ones using + // _dead_node_list. + + Unique_Node_List useful(comp_arena()); + // Get useful node list by walking the graph. + identify_useful_nodes(useful); + + uint l_nodes = C->live_nodes(); + uint l_nodes_by_walk = useful.size(); + + if (l_nodes != l_nodes_by_walk) { + if (_log != NULL) { + _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk))); + _log->stamp(); + _log->end_head(); + } + VectorSet& useful_member_set = useful.member_set(); + int last_idx = l_nodes_by_walk; + for (int i = 0; i < last_idx; i++) { + if (useful_member_set.test(i)) { + if (_dead_node_list.test(i)) { + if (_log != NULL) { + _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i); + } + if (PrintIdealNodeCount) { + // Print the log message to tty + tty->print_cr("mismatched_node idx='%d' both live and dead'", i); + useful.at(i)->dump(); + } + } + } + else if (! _dead_node_list.test(i)) { + if (_log != NULL) { + _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i); + } + if (PrintIdealNodeCount) { + // Print the log message to tty + tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i); + } + } + } + if (_log != NULL) { + _log->tail("mismatched_nodes"); + } + } +} +#endif + #ifndef PRODUCT void Compile::verify_top(Node* tn) const { if (tn != NULL) { @@ -2087,7 +2179,7 @@ static bool oop_offset_is_sane(const TypeInstPtr* tp) { // Eliminate trivially redundant StoreCMs and accumulate their // precedence edges. -static void eliminate_redundant_card_marks(Node* n) { +void Compile::eliminate_redundant_card_marks(Node* n) { assert(n->Opcode() == Op_StoreCM, "expected StoreCM"); if (n->in(MemNode::Address)->outcnt() > 1) { // There are multiple users of the same address so it might be @@ -2122,7 +2214,7 @@ static void eliminate_redundant_card_marks(Node* n) { // Eliminate the previous StoreCM prev->set_req(MemNode::Memory, mem->in(MemNode::Memory)); assert(mem->outcnt() == 0, "should be dead"); - mem->disconnect_inputs(NULL); + mem->disconnect_inputs(NULL, this); } else { prev = mem; } @@ -2133,7 +2225,7 @@ static void eliminate_redundant_card_marks(Node* n) { //------------------------------final_graph_reshaping_impl---------------------- // Implement items 1-5 from final_graph_reshaping below. -static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { +void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { if ( n->outcnt() == 0 ) return; // dead node uint nop = n->Opcode(); @@ -2163,8 +2255,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { #ifdef ASSERT if( n->is_Mem() ) { - Compile* C = Compile::current(); - int alias_idx = C->get_alias_index(n->as_Mem()->adr_type()); + int alias_idx = get_alias_index(n->as_Mem()->adr_type()); assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw || // oop will be recorded in oop map if load crosses safepoint n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() || @@ -2213,7 +2304,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { break; case Op_Opaque1: // Remove Opaque Nodes before matching case Op_Opaque2: // Remove Opaque Nodes before matching - n->subsume_by(n->in(1)); + n->subsume_by(n->in(1), this); break; case Op_CallStaticJava: case Op_CallJava: @@ -2337,8 +2428,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass; // Look for existing ConN node of the same exact type. - Compile* C = Compile::current(); - Node* r = C->root(); + Node* r = root(); uint cnt = r->outcnt(); for (uint i = 0; i < cnt; i++) { Node* m = r->raw_out(i); @@ -2352,14 +2442,14 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { // Decode a narrow oop to match address // [R12 + narrow_oop_reg<<3 + offset] if (t->isa_oopptr()) { - nn = new (C) DecodeNNode(nn, t); + nn = new (this) DecodeNNode(nn, t); } else { - nn = new (C) DecodeNKlassNode(nn, t); + nn = new (this) DecodeNKlassNode(nn, t); } n->set_req(AddPNode::Base, nn); n->set_req(AddPNode::Address, nn); if (addp->outcnt() == 0) { - addp->disconnect_inputs(NULL); + addp->disconnect_inputs(NULL, this); } } } @@ -2371,7 +2461,6 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { #ifdef _LP64 case Op_CastPP: if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { - Compile* C = Compile::current(); Node* in1 = n->in(1); const Type* t = n->bottom_type(); Node* new_in1 = in1->clone(); @@ -2400,9 +2489,9 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { new_in1->set_req(0, n->in(0)); } - n->subsume_by(new_in1); + n->subsume_by(new_in1, this); if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } } break; @@ -2419,7 +2508,6 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { } assert(in1->is_DecodeNarrowPtr(), "sanity"); - Compile* C = Compile::current(); Node* new_in2 = NULL; if (in2->is_DecodeNarrowPtr()) { assert(in2->Opcode() == in1->Opcode(), "must be same node type"); @@ -2432,7 +2520,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { // oops implicit null check is not generated. // This will allow to generate normal oop implicit null check. if (Matcher::gen_narrow_oop_implicit_null_checks()) - new_in2 = ConNode::make(C, TypeNarrowOop::NULL_PTR); + new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR); // // This transformation together with CastPP transformation above // will generated code for implicit NULL checks for compressed oops. @@ -2471,19 +2559,19 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { // NullCheck base_reg // } else if (t->isa_oopptr()) { - new_in2 = ConNode::make(C, t->make_narrowoop()); + new_in2 = ConNode::make(this, t->make_narrowoop()); } else if (t->isa_klassptr()) { - new_in2 = ConNode::make(C, t->make_narrowklass()); + new_in2 = ConNode::make(this, t->make_narrowklass()); } } if (new_in2 != NULL) { - Node* cmpN = new (C) CmpNNode(in1->in(1), new_in2); - n->subsume_by( cmpN ); + Node* cmpN = new (this) CmpNNode(in1->in(1), new_in2); + n->subsume_by(cmpN, this); if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } if (in2->outcnt() == 0) { - in2->disconnect_inputs(NULL); + in2->disconnect_inputs(NULL, this); } } } @@ -2501,21 +2589,20 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { case Op_EncodePKlass: { Node* in1 = n->in(1); if (in1->is_DecodeNarrowPtr()) { - n->subsume_by(in1->in(1)); + n->subsume_by(in1->in(1), this); } else if (in1->Opcode() == Op_ConP) { - Compile* C = Compile::current(); const Type* t = in1->bottom_type(); if (t == TypePtr::NULL_PTR) { assert(t->isa_oopptr(), "null klass?"); - n->subsume_by(ConNode::make(C, TypeNarrowOop::NULL_PTR)); + n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this); } else if (t->isa_oopptr()) { - n->subsume_by(ConNode::make(C, t->make_narrowoop())); + n->subsume_by(ConNode::make(this, t->make_narrowoop()), this); } else if (t->isa_klassptr()) { - n->subsume_by(ConNode::make(C, t->make_narrowklass())); + n->subsume_by(ConNode::make(this, t->make_narrowklass()), this); } } if (in1->outcnt() == 0) { - in1->disconnect_inputs(NULL); + in1->disconnect_inputs(NULL, this); } break; } @@ -2538,7 +2625,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { } } assert(proj != NULL, "must be found"); - p->subsume_by(proj); + p->subsume_by(proj, this); } } break; @@ -2558,7 +2645,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { unique_in = NULL; } if (unique_in != NULL) { - n->subsume_by(unique_in); + n->subsume_by(unique_in, this); } } break; @@ -2571,16 +2658,15 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { Node* d = n->find_similar(Op_DivI); if (d) { // Replace them with a fused divmod if supported - Compile* C = Compile::current(); if (Matcher::has_match_rule(Op_DivModI)) { - DivModINode* divmod = DivModINode::make(C, n); - d->subsume_by(divmod->div_proj()); - n->subsume_by(divmod->mod_proj()); + DivModINode* divmod = DivModINode::make(this, n); + d->subsume_by(divmod->div_proj(), this); + n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new (C) MulINode(d, d->in(2)); - Node* sub = new (C) SubINode(d->in(1), mult); - n->subsume_by( sub ); + Node* mult = new (this) MulINode(d, d->in(2)); + Node* sub = new (this) SubINode(d->in(1), mult); + n->subsume_by(sub, this); } } } @@ -2592,16 +2678,15 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { Node* d = n->find_similar(Op_DivL); if (d) { // Replace them with a fused divmod if supported - Compile* C = Compile::current(); if (Matcher::has_match_rule(Op_DivModL)) { - DivModLNode* divmod = DivModLNode::make(C, n); - d->subsume_by(divmod->div_proj()); - n->subsume_by(divmod->mod_proj()); + DivModLNode* divmod = DivModLNode::make(this, n); + d->subsume_by(divmod->div_proj(), this); + n->subsume_by(divmod->mod_proj(), this); } else { // replace a%b with a-((a/b)*b) - Node* mult = new (C) MulLNode(d, d->in(2)); - Node* sub = new (C) SubLNode(d->in(1), mult); - n->subsume_by( sub ); + Node* mult = new (this) MulLNode(d, d->in(2)); + Node* sub = new (this) SubLNode(d->in(1), mult); + n->subsume_by(sub, this); } } } @@ -2620,8 +2705,8 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { if (n->req()-1 > 2) { // Replace many operand PackNodes with a binary tree for matching PackNode* p = (PackNode*) n; - Node* btp = p->binary_tree_pack(Compile::current(), 1, n->req()); - n->subsume_by(btp); + Node* btp = p->binary_tree_pack(this, 1, n->req()); + n->subsume_by(btp, this); } break; case Op_Loop: @@ -2645,18 +2730,16 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { if (t != NULL && t->is_con()) { juint shift = t->get_con(); if (shift > mask) { // Unsigned cmp - Compile* C = Compile::current(); - n->set_req(2, ConNode::make(C, TypeInt::make(shift & mask))); + n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask))); } } else { if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) { - Compile* C = Compile::current(); - Node* shift = new (C) AndINode(in2, ConNode::make(C, TypeInt::make(mask))); + Node* shift = new (this) AndINode(in2, ConNode::make(this, TypeInt::make(mask))); n->set_req(2, shift); } } if (in2->outcnt() == 0) { // Remove dead node - in2->disconnect_inputs(NULL); + in2->disconnect_inputs(NULL, this); } } break; @@ -2674,7 +2757,7 @@ static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) { //------------------------------final_graph_reshaping_walk--------------------- // Replacing Opaque nodes with their input in final_graph_reshaping_impl(), // requires that the walk visits a node's inputs before visiting the node. -static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { +void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) { ResourceArea *area = Thread::current()->resource_area(); Unique_Node_List sfpt(area); @@ -2741,7 +2824,7 @@ static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Re n->set_req(j, in->in(1)); } if (in->outcnt() == 0) { - in->disconnect_inputs(NULL); + in->disconnect_inputs(NULL, this); } } } @@ -3014,7 +3097,8 @@ void Compile::record_failure(const char* reason) { } Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog) - : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false) + : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false), + _phase_name(name), _dolog(dolog) { if (dolog) { C = Compile::current(); @@ -3024,15 +3108,34 @@ Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, boo _log = NULL; } if (_log != NULL) { - _log->begin_head("phase name='%s' nodes='%d'", name, C->unique()); + _log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes()); _log->stamp(); _log->end_head(); } } Compile::TracePhase::~TracePhase() { + + C = Compile::current(); + if (_dolog) { + _log = C->log(); + } else { + _log = NULL; + } + +#ifdef ASSERT + if (PrintIdealNodeCount) { + tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'", + _phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk()); + } + + if (VerifyIdealNodeCount) { + Compile::current()->print_missing_nodes(); + } +#endif + if (_log != NULL) { - _log->done("phase nodes='%d'", C->unique()); + _log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes()); } } diff --git a/hotspot/src/share/vm/opto/compile.hpp b/hotspot/src/share/vm/opto/compile.hpp index 44bf277689c..aacb72cbfb4 100644 --- a/hotspot/src/share/vm/opto/compile.hpp +++ b/hotspot/src/share/vm/opto/compile.hpp @@ -75,6 +75,8 @@ class TypeFunc; class Unique_Node_List; class nmethod; class WarmCallInfo; +class Node_Stack; +struct Final_Reshape_Counts; //------------------------------Compile---------------------------------------- // This class defines a top-level Compiler invocation. @@ -98,6 +100,8 @@ class Compile : public Phase { private: Compile* C; CompileLog* _log; + const char* _phase_name; + bool _dolog; public: TracePhase(const char* name, elapsedTimer* accumulator, bool dolog); ~TracePhase(); @@ -313,6 +317,9 @@ class Compile : public Phase { // Node management uint _unique; // Counter for unique Node indices + VectorSet _dead_node_list; // Set of dead nodes + uint _dead_node_count; // Number of dead nodes; VectorSet::Size() is O(N). + // So use this to keep count and make the call O(1). debug_only(static int _debug_idx;) // Monotonic counter (not reset), use -XX:BreakAtNode= Arena _node_arena; // Arena for new-space Nodes Arena _old_arena; // Arena for old-space Nodes, lifetime during xform @@ -534,7 +541,7 @@ class Compile : public Phase { ciEnv* env() const { return _env; } CompileLog* log() const { return _log; } bool failing() const { return _env->failing() || _failure_reason != NULL; } - const char* failure_reason() { return _failure_reason; } + const char* failure_reason() { return _failure_reason; } bool failure_reason_is(const char* r) { return (r==_failure_reason) || (r!=NULL && _failure_reason!=NULL && strcmp(r, _failure_reason)==0); } void record_failure(const char* reason); @@ -549,7 +556,7 @@ class Compile : public Phase { record_method_not_compilable(reason, true); } bool check_node_count(uint margin, const char* reason) { - if (unique() + margin > (uint)MaxNodeLimit) { + if (live_nodes() + margin > (uint)MaxNodeLimit) { record_method_not_compilable(reason); return true; } else { @@ -558,25 +565,41 @@ class Compile : public Phase { } // Node management - uint unique() const { return _unique; } - uint next_unique() { return _unique++; } - void set_unique(uint i) { _unique = i; } - static int debug_idx() { return debug_only(_debug_idx)+0; } - static void set_debug_idx(int i) { debug_only(_debug_idx = i); } - Arena* node_arena() { return &_node_arena; } - Arena* old_arena() { return &_old_arena; } - RootNode* root() const { return _root; } - void set_root(RootNode* r) { _root = r; } - StartNode* start() const; // (Derived from root.) + uint unique() const { return _unique; } + uint next_unique() { return _unique++; } + void set_unique(uint i) { _unique = i; } + static int debug_idx() { return debug_only(_debug_idx)+0; } + static void set_debug_idx(int i) { debug_only(_debug_idx = i); } + Arena* node_arena() { return &_node_arena; } + Arena* old_arena() { return &_old_arena; } + RootNode* root() const { return _root; } + void set_root(RootNode* r) { _root = r; } + StartNode* start() const; // (Derived from root.) void init_start(StartNode* s); - Node* immutable_memory(); + Node* immutable_memory(); - Node* recent_alloc_ctl() const { return _recent_alloc_ctl; } - Node* recent_alloc_obj() const { return _recent_alloc_obj; } - void set_recent_alloc(Node* ctl, Node* obj) { + Node* recent_alloc_ctl() const { return _recent_alloc_ctl; } + Node* recent_alloc_obj() const { return _recent_alloc_obj; } + void set_recent_alloc(Node* ctl, Node* obj) { _recent_alloc_ctl = ctl; _recent_alloc_obj = obj; - } + } + void record_dead_node(uint idx) { if (_dead_node_list.test_set(idx)) return; + _dead_node_count++; + } + uint dead_node_count() { return _dead_node_count; } + void reset_dead_node_list() { _dead_node_list.Reset(); + _dead_node_count = 0; + } + uint live_nodes() { + int val = _unique - _dead_node_count; + assert (val >= 0, err_msg_res("number of tracked dead nodes %d more than created nodes %d", _unique, _dead_node_count)); + return (uint) val; + } +#ifdef ASSERT + uint count_live_nodes_by_graph_walk(); + void print_missing_nodes(); +#endif // Constant table ConstantTable& constant_table() { return _constant_table; } @@ -678,6 +701,7 @@ class Compile : public Phase { void identify_useful_nodes(Unique_Node_List &useful); + void update_dead_node_list(Unique_Node_List &useful); void remove_useless_nodes (Unique_Node_List &useful); WarmCallInfo* warm_calls() const { return _warm_calls; } @@ -892,6 +916,11 @@ class Compile : public Phase { static juint _intrinsic_hist_count[vmIntrinsics::ID_LIMIT]; static jubyte _intrinsic_hist_flags[vmIntrinsics::ID_LIMIT]; #endif + // Function calls made by the public function final_graph_reshaping. + // No need to be made public as they are not called elsewhere. + void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc); + void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ); + void eliminate_redundant_card_marks(Node* n); public: diff --git a/hotspot/src/share/vm/opto/doCall.cpp b/hotspot/src/share/vm/opto/doCall.cpp index ba806d79c55..ca9a0a6d67e 100644 --- a/hotspot/src/share/vm/opto/doCall.cpp +++ b/hotspot/src/share/vm/opto/doCall.cpp @@ -350,7 +350,7 @@ void Parse::do_call() { // Set frequently used booleans const bool is_virtual = bc() == Bytecodes::_invokevirtual; const bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface; - const bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial; + const bool has_receiver = Bytecodes::has_receiver(bc()); // Find target being called bool will_link; @@ -380,6 +380,8 @@ void Parse::do_call() { // Note: In the absence of miranda methods, an abstract class K can perform // an invokevirtual directly on an interface method I.m if K implements I. + // orig_callee is the resolved callee which's signature includes the + // appendix argument. const int nargs = orig_callee->arg_size(); // Push appendix argument (MethodType, CallSite, etc.), if one. @@ -572,7 +574,7 @@ void Parse::do_call() { } // If there is going to be a trap, put it at the next bytecode: set_bci(iter().next_bci()); - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); set_bci(iter().cur_bci()); // put it back } } diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp index 40acbdbe770..659828e509f 100644 --- a/hotspot/src/share/vm/opto/escape.cpp +++ b/hotspot/src/share/vm/opto/escape.cpp @@ -2320,7 +2320,7 @@ PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, Gro } } } - if ((int)C->unique() + 2*NodeLimitFudgeFactor > MaxNodeLimit) { + if ((int) (C->live_nodes() + 2*NodeLimitFudgeFactor) > MaxNodeLimit) { if (C->do_escape_analysis() == true && !C->failing()) { // Retry compilation without escape analysis. // If this is the first failure, the sentinel string will "stick" diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index 6cd2ecd237e..4fffeeb80d3 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -1359,7 +1359,7 @@ void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_ // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. for( i=0; i < _num_blocks; i++ ) - _blocks[i]->call_catch_cleanup(_bbs); + _blocks[i]->call_catch_cleanup(_bbs, C); #ifndef PRODUCT if (trace_opto_pipelining()) { diff --git a/hotspot/src/share/vm/opto/graphKit.cpp b/hotspot/src/share/vm/opto/graphKit.cpp index 95d5357d7d3..ba7ec2caf77 100644 --- a/hotspot/src/share/vm/opto/graphKit.cpp +++ b/hotspot/src/share/vm/opto/graphKit.cpp @@ -93,6 +93,16 @@ JVMState* GraphKit::sync_jvms() const { return jvms; } +//--------------------------------sync_jvms_for_reexecute--------------------- +// Make sure our current jvms agrees with our parse state. This version +// uses the reexecute_sp for reexecuting bytecodes. +JVMState* GraphKit::sync_jvms_for_reexecute() { + JVMState* jvms = this->jvms(); + jvms->set_bci(bci()); // Record the new bci in the JVMState + jvms->set_sp(reexecute_sp()); // Record the new sp in the JVMState + return jvms; +} + #ifdef ASSERT bool GraphKit::jvms_in_sync() const { Parse* parse = is_Parse(); @@ -143,7 +153,7 @@ void GraphKit::verify_exception_state(SafePointNode* ex_map) { void GraphKit::stop_and_kill_map() { SafePointNode* dead_map = stop(); if (dead_map != NULL) { - dead_map->disconnect_inputs(NULL); // Mark the map as killed. + dead_map->disconnect_inputs(NULL, C); // Mark the map as killed. assert(dead_map->is_killed(), "must be so marked"); } } @@ -826,7 +836,16 @@ void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) { // Walk the inline list to fill in the correct set of JVMState's // Also fill in the associated edges for each JVMState. - JVMState* youngest_jvms = sync_jvms(); + // If the bytecode needs to be reexecuted we need to put + // the arguments back on the stack. + const bool should_reexecute = jvms()->should_reexecute(); + JVMState* youngest_jvms = should_reexecute ? sync_jvms_for_reexecute() : sync_jvms(); + + // NOTE: set_bci (called from sync_jvms) might reset the reexecute bit to + // undefined if the bci is different. This is normal for Parse but it + // should not happen for LibraryCallKit because only one bci is processed. + assert(!is_LibraryCallKit() || (jvms()->should_reexecute() == should_reexecute), + "in LibraryCallKit the reexecute bit should not change"); // If we are guaranteed to throw, we can prune everything but the // input to the current bytecode. @@ -860,7 +879,7 @@ void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) { } // Presize the call: - debug_only(uint non_debug_edges = call->req()); + DEBUG_ONLY(uint non_debug_edges = call->req()); call->add_req_batch(top(), youngest_jvms->debug_depth()); assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), ""); @@ -965,7 +984,7 @@ void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) { assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, ""); } -bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) { +bool GraphKit::compute_stack_effects(int& inputs, int& depth) { Bytecodes::Code code = java_bc(); if (code == Bytecodes::_wide) { code = method()->java_code_at_bci(bci() + 1); @@ -1005,14 +1024,11 @@ bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) { case Bytecodes::_getfield: case Bytecodes::_putfield: { - bool is_get = (depth >= 0), is_static = (depth & 1); - ciBytecodeStream iter(method()); - iter.reset_to_bci(bci()); - iter.next(); bool ignored_will_link; - ciField* field = iter.get_field(ignored_will_link); + ciField* field = method()->get_field_at_bci(bci(), ignored_will_link); int size = field->type()->size(); - inputs = (is_static ? 0 : 1); + bool is_get = (depth >= 0), is_static = (depth & 1); + inputs = (is_static ? 0 : 1); if (is_get) { depth = size - inputs; } else { @@ -1028,26 +1044,11 @@ bool GraphKit::compute_stack_effects(int& inputs, int& depth, bool for_parse) { case Bytecodes::_invokedynamic: case Bytecodes::_invokeinterface: { - ciBytecodeStream iter(method()); - iter.reset_to_bci(bci()); - iter.next(); bool ignored_will_link; ciSignature* declared_signature = NULL; - ciMethod* callee = iter.get_method(ignored_will_link, &declared_signature); + ciMethod* ignored_callee = method()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); assert(declared_signature != NULL, "cannot be null"); - // (Do not use ciMethod::arg_size(), because - // it might be an unloaded method, which doesn't - // know whether it is static or not.) - if (for_parse) { - // Case 1: When called from parse we are *before* the invoke (in the - // caller) and need to to adjust the inputs by an appendix - // argument that will be pushed implicitly. - inputs = callee->invoke_arg_size(code) - (iter.has_appendix() ? 1 : 0); - } else { - // Case 2: Here we are *after* the invoke (in the callee) and need to - // remove any appendix arguments that were popped. - inputs = callee->invoke_arg_size(code) - (callee->has_member_arg() ? 1 : 0); - } + inputs = declared_signature->arg_size_for_bc(code); int size = declared_signature->return_type()->size(); depth = size - inputs; } @@ -1178,7 +1179,7 @@ Node* GraphKit::null_check_common(Node* value, BasicType type, Node *chk = NULL; switch(type) { case T_LONG : chk = new (C) CmpLNode(value, _gvn.zerocon(T_LONG)); break; - case T_INT : chk = new (C) CmpINode( value, _gvn.intcon(0)); break; + case T_INT : chk = new (C) CmpINode(value, _gvn.intcon(0)); break; case T_ARRAY : // fall through type = T_OBJECT; // simplify further tests case T_OBJECT : { @@ -1229,7 +1230,8 @@ Node* GraphKit::null_check_common(Node* value, BasicType type, break; } - default : ShouldNotReachHere(); + default: + fatal(err_msg_res("unexpected type: %s", type2name(type))); } assert(chk != NULL, "sanity check"); chk = _gvn.transform(chk); @@ -1809,7 +1811,7 @@ void GraphKit::replace_call(CallNode* call, Node* result) { } // Disconnect the call from the graph - call->disconnect_inputs(NULL); + call->disconnect_inputs(NULL, C); C->gvn_replace_by(call, C->top()); // Clean up any MergeMems that feed other MergeMems since the @@ -1861,15 +1863,17 @@ void GraphKit::uncommon_trap(int trap_request, // occurs here, the runtime will make sure an MDO exists. There is // no need to call method()->ensure_method_data() at this point. + // Set the stack pointer to the right value for reexecution: + set_sp(reexecute_sp()); + #ifdef ASSERT if (!must_throw) { // Make sure the stack has at least enough depth to execute // the current bytecode. - int inputs, ignore; - if (compute_stack_effects(inputs, ignore)) { - assert(sp() >= inputs, "must have enough JVMS stack to execute"); - // It is a frequent error in library_call.cpp to issue an - // uncommon trap with the _sp value already popped. + int inputs, ignored_depth; + if (compute_stack_effects(inputs, ignored_depth)) { + assert(sp() >= inputs, err_msg_res("must have enough JVMS stack to execute %s: sp=%d, inputs=%d", + Bytecodes::name(java_bc()), sp(), inputs)); } } #endif @@ -1900,7 +1904,8 @@ void GraphKit::uncommon_trap(int trap_request, case Deoptimization::Action_make_not_compilable: break; default: - assert(false, "bad action"); + fatal(err_msg_res("unknown action %d: %s", action, Deoptimization::trap_action_name(action))); + break; #endif } @@ -2667,7 +2672,7 @@ Node* GraphKit::gen_checkcast(Node *obj, Node* superklass, case SSC_always_false: // It needs a null check because a null will *pass* the cast check. // A non-null value will always produce an exception. - return do_null_assert(obj, T_OBJECT); + return null_assert(obj); } } } @@ -2786,7 +2791,7 @@ Node* GraphKit::insert_mem_bar(int opcode, Node* precedent) { mb->init_req(TypeFunc::Control, control()); mb->init_req(TypeFunc::Memory, reset_memory()); Node* membar = _gvn.transform(mb); - set_control(_gvn.transform(new (C) ProjNode(membar,TypeFunc::Control) )); + set_control(_gvn.transform(new (C) ProjNode(membar, TypeFunc::Control))); set_all_memory_call(membar); return membar; } @@ -3148,7 +3153,7 @@ Node* GraphKit::new_array(Node* klass_node, // array klass (maybe variable) Node* cmp_lh = _gvn.transform( new(C) CmpINode(layout_val, intcon(layout_con)) ); Node* bol_lh = _gvn.transform( new(C) BoolNode(cmp_lh, BoolTest::eq) ); { BuildCutout unless(this, bol_lh, PROB_MAX); - _sp += nargs; + inc_sp(nargs); uncommon_trap(Deoptimization::Reason_class_check, Deoptimization::Action_maybe_recompile); } @@ -3391,7 +3396,7 @@ void GraphKit::add_predicate_impl(Deoptimization::DeoptReason reason, int nargs) { PreserveJVMState pjvms(this); set_control(iffalse); - _sp += nargs; + inc_sp(nargs); uncommon_trap(reason, Deoptimization::Action_maybe_recompile); } Node* iftrue = _gvn.transform(new (C) IfTrueNode(iff)); diff --git a/hotspot/src/share/vm/opto/graphKit.hpp b/hotspot/src/share/vm/opto/graphKit.hpp index ede5b92e68e..4d7d96178ae 100644 --- a/hotspot/src/share/vm/opto/graphKit.hpp +++ b/hotspot/src/share/vm/opto/graphKit.hpp @@ -41,6 +41,7 @@ class FastLockNode; class FastUnlockNode; class IdealKit; +class LibraryCallKit; class Parse; class RootNode; @@ -60,10 +61,12 @@ class GraphKit : public Phase { PhaseGVN &_gvn; // Some optimizations while parsing SafePointNode* _map; // Parser map from JVM to Nodes SafePointNode* _exceptions;// Parser map(s) for exception state(s) - int _sp; // JVM Expression Stack Pointer int _bci; // JVM Bytecode Pointer ciMethod* _method; // JVM Current Method + private: + int _sp; // JVM Expression Stack Pointer; don't modify directly! + private: SafePointNode* map_not_null() const { assert(_map != NULL, "must call stopped() to test for reset compiler map"); @@ -80,7 +83,8 @@ class GraphKit : public Phase { } #endif - virtual Parse* is_Parse() const { return NULL; } + virtual Parse* is_Parse() const { return NULL; } + virtual LibraryCallKit* is_LibraryCallKit() const { return NULL; } ciEnv* env() const { return _env; } PhaseGVN& gvn() const { return _gvn; } @@ -141,7 +145,7 @@ class GraphKit : public Phase { _bci = jvms->bci(); _method = jvms->has_method() ? jvms->method() : NULL; } void set_map(SafePointNode* m) { _map = m; debug_only(verify_map()); } - void set_sp(int i) { assert(i >= 0, "must be non-negative"); _sp = i; } + void set_sp(int sp) { assert(sp >= 0, err_msg_res("sp must be non-negative: %d", sp)); _sp = sp; } void clean_stack(int from_sp); // clear garbage beyond from_sp to top void inc_sp(int i) { set_sp(sp() + i); } @@ -149,7 +153,9 @@ class GraphKit : public Phase { void set_bci(int bci) { _bci = bci; } // Make sure jvms has current bci & sp. - JVMState* sync_jvms() const; + JVMState* sync_jvms() const; + JVMState* sync_jvms_for_reexecute(); + #ifdef ASSERT // Make sure JVMS has an updated copy of bci and sp. // Also sanity-check method, depth, and monitor depth. @@ -286,7 +292,7 @@ class GraphKit : public Phase { // How many stack inputs does the current BC consume? // And, how does the stack change after the bytecode? // Returns false if unknown. - bool compute_stack_effects(int& inputs, int& depth, bool for_parse = false); + bool compute_stack_effects(int& inputs, int& depth); // Add a fixed offset to a pointer Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) { @@ -337,20 +343,37 @@ class GraphKit : public Phase { Node* load_object_klass(Node* object); // Find out the length of an array. Node* load_array_length(Node* array); + + // Helper function to do a NULL pointer check or ZERO check based on type. - Node* null_check_common(Node* value, BasicType type, - bool assert_null, Node* *null_control); // Throw an exception if a given value is null. // Return the value cast to not-null. // Be clever about equivalent dominating null checks. - Node* do_null_check(Node* value, BasicType type) { - return null_check_common(value, type, false, NULL); + Node* null_check_common(Node* value, BasicType type, + bool assert_null = false, Node* *null_control = NULL); + Node* null_check(Node* value, BasicType type = T_OBJECT) { + return null_check_common(value, type); + } + Node* null_check_receiver() { + assert(argument(0)->bottom_type()->isa_ptr(), "must be"); + return null_check(argument(0)); + } + Node* zero_check_int(Node* value) { + assert(value->bottom_type()->basic_type() == T_INT, + err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type()))); + return null_check_common(value, T_INT); + } + Node* zero_check_long(Node* value) { + assert(value->bottom_type()->basic_type() == T_LONG, + err_msg_res("wrong type: %s", type2name(value->bottom_type()->basic_type()))); + return null_check_common(value, T_LONG); } // Throw an uncommon trap if a given value is __not__ null. // Return the value cast to null, and be clever about dominating checks. - Node* do_null_assert(Node* value, BasicType type) { - return null_check_common(value, type, true, NULL); + Node* null_assert(Node* value, BasicType type = T_OBJECT) { + return null_check_common(value, type, true); } + // Null check oop. Return null-path control into (*null_control). // Return a cast-not-null node which depends on the not-null control. // If never_see_null, use an uncommon trap (*null_control sees a top). @@ -371,9 +394,9 @@ class GraphKit : public Phase { // Replace all occurrences of one node by another. void replace_in_map(Node* old, Node* neww); - void push(Node* n) { map_not_null(); _map->set_stack(_map->_jvms, _sp++, n); } - Node* pop() { map_not_null(); return _map->stack( _map->_jvms, --_sp); } - Node* peek(int off = 0) { map_not_null(); return _map->stack( _map->_jvms, _sp - off - 1); } + void push(Node* n) { map_not_null(); _map->set_stack(_map->_jvms, _sp++ , n); } + Node* pop() { map_not_null(); return _map->stack( _map->_jvms, --_sp ); } + Node* peek(int off = 0) { map_not_null(); return _map->stack( _map->_jvms, _sp - off - 1 ); } void push_pair(Node* ldval) { push(ldval); @@ -580,19 +603,15 @@ class GraphKit : public Phase { //---------- help for generating calls -------------- - // Do a null check on the receiver, which is in argument(0). - Node* null_check_receiver(ciMethod* callee) { + // Do a null check on the receiver as it would happen before the call to + // callee (with all arguments still on the stack). + Node* null_check_receiver_before_call(ciMethod* callee) { assert(!callee->is_static(), "must be a virtual method"); - int nargs = 1 + callee->signature()->size(); - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - Node* receiver = argument(0); - _sp += nargs; - receiver = do_null_check(receiver, T_OBJECT); - _sp -= nargs; - return receiver; + const int nargs = callee->arg_size(); + inc_sp(nargs); + Node* n = null_check_receiver(); + dec_sp(nargs); + return n; } // Fill in argument edges for the call from argument(0), argument(1), ... @@ -645,6 +664,9 @@ class GraphKit : public Phase { klass, reason_string, must_throw, keep_exact_action); } + // SP when bytecode needs to be reexecuted. + virtual int reexecute_sp() { return sp(); } + // Report if there were too many traps at the current method and bci. // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded. // If there is no MDO at all, report no trap unless told to assume it. diff --git a/hotspot/src/share/vm/opto/ifg.cpp b/hotspot/src/share/vm/opto/ifg.cpp index 4827a17d8df..44828fa6ab8 100644 --- a/hotspot/src/share/vm/opto/ifg.cpp +++ b/hotspot/src/share/vm/opto/ifg.cpp @@ -573,7 +573,7 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) { b->_nodes.remove(j - 1); if( lrgs(r)._def == n ) lrgs(r)._def = 0; - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); _cfg._bbs.map(n->_idx,NULL); n->replace_by(C->top()); // Since yanking a Node from block, high pressure moves up one diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index 24a6a3182d0..5ca4bae13d7 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -1006,7 +1006,7 @@ static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Bloc //------------------------------call_catch_cleanup----------------------------- // If we inserted any instructions between a Call and his CatchNode, // clone the instructions on all paths below the Catch. -void Block::call_catch_cleanup(Block_Array &bbs) { +void Block::call_catch_cleanup(Block_Array &bbs, Compile* C) { // End of region to clone uint end = end_idx(); @@ -1068,7 +1068,7 @@ void Block::call_catch_cleanup(Block_Array &bbs) { // Remove the now-dead cloned ops for(uint i3 = beg; i3 < end; i3++ ) { - _nodes[beg]->disconnect_inputs(NULL); + _nodes[beg]->disconnect_inputs(NULL, C); _nodes.remove(beg); } @@ -1081,7 +1081,7 @@ void Block::call_catch_cleanup(Block_Array &bbs) { Node *n = sb->_nodes[j]; if (n->outcnt() == 0 && (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){ - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); sb->_nodes.remove(j); new_cnt--; } diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index 6b90061ff4f..43d4d05022e 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -67,30 +67,64 @@ class LibraryIntrinsic : public InlineCallGenerator { // Local helper class for LibraryIntrinsic: class LibraryCallKit : public GraphKit { private: - LibraryIntrinsic* _intrinsic; // the library intrinsic being called + LibraryIntrinsic* _intrinsic; // the library intrinsic being called + Node* _result; // the result node, if any + int _reexecute_sp; // the stack pointer when bytecode needs to be reexecuted const TypeOopPtr* sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr = false); public: - LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic) - : GraphKit(caller), - _intrinsic(intrinsic) + LibraryCallKit(JVMState* jvms, LibraryIntrinsic* intrinsic) + : GraphKit(jvms), + _intrinsic(intrinsic), + _result(NULL) { + // Check if this is a root compile. In that case we don't have a caller. + if (!jvms->has_method()) { + _reexecute_sp = sp(); + } else { + // Find out how many arguments the interpreter needs when deoptimizing + // and save the stack pointer value so it can used by uncommon_trap. + // We find the argument count by looking at the declared signature. + bool ignored_will_link; + ciSignature* declared_signature = NULL; + ciMethod* ignored_callee = caller()->get_method_at_bci(bci(), ignored_will_link, &declared_signature); + const int nargs = declared_signature->arg_size_for_bc(caller()->java_code_at_bci(bci())); + _reexecute_sp = sp() + nargs; // "push" arguments back on stack + } } + virtual LibraryCallKit* is_LibraryCallKit() const { return (LibraryCallKit*)this; } + ciMethod* caller() const { return jvms()->method(); } int bci() const { return jvms()->bci(); } LibraryIntrinsic* intrinsic() const { return _intrinsic; } vmIntrinsics::ID intrinsic_id() const { return _intrinsic->intrinsic_id(); } ciMethod* callee() const { return _intrinsic->method(); } - ciSignature* signature() const { return callee()->signature(); } - int arg_size() const { return callee()->arg_size(); } bool try_to_inline(); Node* try_to_predicate(); + void push_result() { + // Push the result onto the stack. + if (!stopped() && result() != NULL) { + BasicType bt = result()->bottom_type()->basic_type(); + push_node(bt, result()); + } + } + + private: + void fatal_unexpected_iid(vmIntrinsics::ID iid) { + fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid))); + } + + void set_result(Node* n) { assert(_result == NULL, "only set once"); _result = n; } + void set_result(RegionNode* region, PhiNode* value); + Node* result() { return _result; } + + virtual int reexecute_sp() { return _reexecute_sp; } + // Helper functions to inline natives - void push_result(RegionNode* region, PhiNode* value); Node* generate_guard(Node* test, RegionNode* region, float true_prob); Node* generate_slow_guard(Node* test, RegionNode* region); Node* generate_fair_guard(Node* test, RegionNode* region); @@ -108,21 +142,19 @@ class LibraryCallKit : public GraphKit { bool disjoint_bases, const char* &name, bool dest_uninitialized); Node* load_mirror_from_klass(Node* klass); Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path, int offset); - Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs, + Node* load_klass_from_mirror(Node* mirror, bool never_see_null, RegionNode* region, int null_path) { int offset = java_lang_Class::klass_offset_in_bytes(); - return load_klass_from_mirror_common(mirror, never_see_null, nargs, + return load_klass_from_mirror_common(mirror, never_see_null, region, null_path, offset); } Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path) { int offset = java_lang_Class::array_klass_offset_in_bytes(); - return load_klass_from_mirror_common(mirror, never_see_null, nargs, + return load_klass_from_mirror_common(mirror, never_see_null, region, null_path, offset); } @@ -161,16 +193,14 @@ class LibraryCallKit : public GraphKit { bool inline_string_indexOf(); Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i); bool inline_string_equals(); - Node* pop_math_arg(); + Node* round_double_node(Node* n); bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); bool inline_trig(vmIntrinsics::ID id); - bool inline_trans(vmIntrinsics::ID id); - bool inline_abs(vmIntrinsics::ID id); - bool inline_sqrt(vmIntrinsics::ID id); + bool inline_math(vmIntrinsics::ID id); + bool inline_exp(); + bool inline_pow(); void finish_pow_exp(Node* result, Node* x, Node* y, const TypeFunc* call_type, address funcAddr, const char* funcName); - bool inline_pow(vmIntrinsics::ID id); - bool inline_exp(vmIntrinsics::ID id); bool inline_min_max(vmIntrinsics::ID id); Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y); // This returns Type::AnyPtr, RawPtr, or OopPtr. @@ -179,7 +209,7 @@ class LibraryCallKit : public GraphKit { // Helper for inline_unsafe_access. // Generates the guards that check whether the result of // Unsafe.getObject should be recorded in an SATB log buffer. - void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, int nargs, bool need_mem_bar); + void insert_pre_barrier(Node* base_oop, Node* offset, Node* pre_val, bool need_mem_bar); bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile); bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static); bool inline_unsafe_allocate(); @@ -253,11 +283,7 @@ class LibraryCallKit : public GraphKit { bool inline_unsafe_load_store(BasicType type, LoadStoreKind kind); bool inline_unsafe_ordered_store(BasicType type); bool inline_fp_conversions(vmIntrinsics::ID id); - bool inline_numberOfLeadingZeros(vmIntrinsics::ID id); - bool inline_numberOfTrailingZeros(vmIntrinsics::ID id); - bool inline_bitCount(vmIntrinsics::ID id); - bool inline_reverseBytes(vmIntrinsics::ID id); - + bool inline_number_methods(vmIntrinsics::ID id); bool inline_reference_get(); bool inline_aescrypt_Block(vmIntrinsics::ID id); bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id); @@ -321,15 +347,18 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { switch (id) { case vmIntrinsics::_compareTo: if (!SpecialStringCompareTo) return NULL; + if (!Matcher::match_rule_supported(Op_StrComp)) return NULL; break; case vmIntrinsics::_indexOf: if (!SpecialStringIndexOf) return NULL; break; case vmIntrinsics::_equals: if (!SpecialStringEquals) return NULL; + if (!Matcher::match_rule_supported(Op_StrEquals)) return NULL; break; case vmIntrinsics::_equalsC: if (!SpecialArraysEquals) return NULL; + if (!Matcher::match_rule_supported(Op_AryEq)) return NULL; break; case vmIntrinsics::_arraycopy: if (!InlineArrayCopy) return NULL; @@ -382,6 +411,19 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL; break; + case vmIntrinsics::_reverseBytes_c: + if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false; + break; + case vmIntrinsics::_reverseBytes_s: + if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return false; + break; + case vmIntrinsics::_reverseBytes_i: + if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return false; + break; + case vmIntrinsics::_reverseBytes_l: + if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return false; + break; + case vmIntrinsics::_Reference_get: // Use the intrinsic version of Reference.get() so that the value in // the referent field can be registered by the G1 pre-barrier code. @@ -488,10 +530,13 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { tty->print_cr("Intrinsic %s", str); } #endif + ciMethod* callee = kit.callee(); + const int bci = kit.bci(); + // Try to inline the intrinsic. if (kit.try_to_inline()) { if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { @@ -500,6 +545,8 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { (is_virtual() ? " virtual='1'" : ""), C->unique() - nodes); } + // Push the result from the inlined method onto the stack. + kit.push_result(); return kit.transfer_exceptions_into_jvms(); } @@ -508,12 +555,12 @@ JVMState* LibraryIntrinsic::generate(JVMState* jvms) { if (jvms->has_method()) { // Not a root compile. const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)"; - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + CompileTask::print_inlining(callee, jvms->depth() - 1, bci, msg); } else { // Root compile tty->print("Did not generate intrinsic %s%s at bci:%d in", vmIntrinsics::name_at(intrinsic_id()), - (is_virtual() ? " (virtual)" : ""), kit.bci()); + (is_virtual() ? " (virtual)" : ""), bci); } } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); @@ -532,9 +579,15 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { tty->print_cr("Predicate for intrinsic %s", str); } #endif + ciMethod* callee = kit.callee(); + const int bci = kit.bci(); Node* slow_ctl = kit.try_to_predicate(); if (!kit.failing()) { + if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) { + CompileTask::print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)"); + } + C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked); if (C->log()) { C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'", vmIntrinsics::name_at(intrinsic_id()), @@ -549,12 +602,12 @@ Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) { if (jvms->has_method()) { // Not a root compile. const char* msg = "failed to generate predicate for intrinsic"; - CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg); + CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, bci, msg); } else { // Root compile tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in", vmIntrinsics::name_at(intrinsic_id()), - (is_virtual() ? " (virtual)" : ""), kit.bci()); + (is_virtual() ? " (virtual)" : ""), bci); } } C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed); @@ -566,6 +619,7 @@ bool LibraryCallKit::try_to_inline() { const bool is_store = true; const bool is_native_ptr = true; const bool is_static = true; + const bool is_volatile = true; if (!jvms()->has_method()) { // Root JVMState has a null method. @@ -575,13 +629,11 @@ bool LibraryCallKit::try_to_inline() { } assert(merged_memory(), ""); + switch (intrinsic_id()) { - case vmIntrinsics::_hashCode: - return inline_native_hashcode(intrinsic()->is_virtual(), !is_static); - case vmIntrinsics::_identityHashCode: - return inline_native_hashcode(/*!virtual*/ false, is_static); - case vmIntrinsics::_getClass: - return inline_native_getClass(); + case vmIntrinsics::_hashCode: return inline_native_hashcode(intrinsic()->is_virtual(), !is_static); + case vmIntrinsics::_identityHashCode: return inline_native_hashcode(/*!virtual*/ false, is_static); + case vmIntrinsics::_getClass: return inline_native_getClass(); case vmIntrinsics::_dsin: case vmIntrinsics::_dcos: @@ -592,203 +644,114 @@ bool LibraryCallKit::try_to_inline() { case vmIntrinsics::_dexp: case vmIntrinsics::_dlog: case vmIntrinsics::_dlog10: - case vmIntrinsics::_dpow: - return inline_math_native(intrinsic_id()); + case vmIntrinsics::_dpow: return inline_math_native(intrinsic_id()); case vmIntrinsics::_min: - case vmIntrinsics::_max: - return inline_min_max(intrinsic_id()); + case vmIntrinsics::_max: return inline_min_max(intrinsic_id()); - case vmIntrinsics::_arraycopy: - return inline_arraycopy(); + case vmIntrinsics::_arraycopy: return inline_arraycopy(); - case vmIntrinsics::_compareTo: - return inline_string_compareTo(); - case vmIntrinsics::_indexOf: - return inline_string_indexOf(); - case vmIntrinsics::_equals: - return inline_string_equals(); + case vmIntrinsics::_compareTo: return inline_string_compareTo(); + case vmIntrinsics::_indexOf: return inline_string_indexOf(); + case vmIntrinsics::_equals: return inline_string_equals(); - case vmIntrinsics::_getObject: - return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false); - case vmIntrinsics::_getBoolean: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false); - case vmIntrinsics::_getByte: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false); - case vmIntrinsics::_getShort: - return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false); - case vmIntrinsics::_getChar: - return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false); - case vmIntrinsics::_getInt: - return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false); - case vmIntrinsics::_getLong: - return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false); - case vmIntrinsics::_getFloat: - return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false); - case vmIntrinsics::_getDouble: - return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false); + case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, !is_volatile); + case vmIntrinsics::_getBoolean: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, !is_volatile); + case vmIntrinsics::_getByte: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_getShort: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_getChar: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_getInt: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, !is_volatile); + case vmIntrinsics::_getLong: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, !is_volatile); + case vmIntrinsics::_getFloat: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_getDouble: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_putObject: - return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false); - case vmIntrinsics::_putBoolean: - return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false); - case vmIntrinsics::_putByte: - return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false); - case vmIntrinsics::_putShort: - return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false); - case vmIntrinsics::_putChar: - return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false); - case vmIntrinsics::_putInt: - return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false); - case vmIntrinsics::_putLong: - return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false); - case vmIntrinsics::_putFloat: - return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false); - case vmIntrinsics::_putDouble: - return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false); + case vmIntrinsics::_putObject: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, !is_volatile); + case vmIntrinsics::_putBoolean: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, !is_volatile); + case vmIntrinsics::_putByte: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_putShort: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_putChar: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_putInt: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, !is_volatile); + case vmIntrinsics::_putLong: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, !is_volatile); + case vmIntrinsics::_putFloat: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_putDouble: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, !is_volatile); - case vmIntrinsics::_getByte_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false); - case vmIntrinsics::_getShort_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false); - case vmIntrinsics::_getChar_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false); - case vmIntrinsics::_getInt_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false); - case vmIntrinsics::_getLong_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false); - case vmIntrinsics::_getFloat_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false); - case vmIntrinsics::_getDouble_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false); - case vmIntrinsics::_getAddress_raw: - return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false); + case vmIntrinsics::_getByte_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_getShort_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_getChar_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_getInt_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_INT, !is_volatile); + case vmIntrinsics::_getLong_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_LONG, !is_volatile); + case vmIntrinsics::_getFloat_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_getDouble_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_DOUBLE, !is_volatile); + case vmIntrinsics::_getAddress_raw: return inline_unsafe_access( is_native_ptr, !is_store, T_ADDRESS, !is_volatile); - case vmIntrinsics::_putByte_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false); - case vmIntrinsics::_putShort_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false); - case vmIntrinsics::_putChar_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false); - case vmIntrinsics::_putInt_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_INT, false); - case vmIntrinsics::_putLong_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false); - case vmIntrinsics::_putFloat_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false); - case vmIntrinsics::_putDouble_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false); - case vmIntrinsics::_putAddress_raw: - return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false); + case vmIntrinsics::_putByte_raw: return inline_unsafe_access( is_native_ptr, is_store, T_BYTE, !is_volatile); + case vmIntrinsics::_putShort_raw: return inline_unsafe_access( is_native_ptr, is_store, T_SHORT, !is_volatile); + case vmIntrinsics::_putChar_raw: return inline_unsafe_access( is_native_ptr, is_store, T_CHAR, !is_volatile); + case vmIntrinsics::_putInt_raw: return inline_unsafe_access( is_native_ptr, is_store, T_INT, !is_volatile); + case vmIntrinsics::_putLong_raw: return inline_unsafe_access( is_native_ptr, is_store, T_LONG, !is_volatile); + case vmIntrinsics::_putFloat_raw: return inline_unsafe_access( is_native_ptr, is_store, T_FLOAT, !is_volatile); + case vmIntrinsics::_putDouble_raw: return inline_unsafe_access( is_native_ptr, is_store, T_DOUBLE, !is_volatile); + case vmIntrinsics::_putAddress_raw: return inline_unsafe_access( is_native_ptr, is_store, T_ADDRESS, !is_volatile); - case vmIntrinsics::_getObjectVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true); - case vmIntrinsics::_getBooleanVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true); - case vmIntrinsics::_getByteVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true); - case vmIntrinsics::_getShortVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true); - case vmIntrinsics::_getCharVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true); - case vmIntrinsics::_getIntVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true); - case vmIntrinsics::_getLongVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true); - case vmIntrinsics::_getFloatVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true); - case vmIntrinsics::_getDoubleVolatile: - return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true); + case vmIntrinsics::_getObjectVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, is_volatile); + case vmIntrinsics::_getBooleanVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, is_volatile); + case vmIntrinsics::_getByteVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, is_volatile); + case vmIntrinsics::_getShortVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, is_volatile); + case vmIntrinsics::_getCharVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, is_volatile); + case vmIntrinsics::_getIntVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, is_volatile); + case vmIntrinsics::_getLongVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, is_volatile); + case vmIntrinsics::_getFloatVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, is_volatile); + case vmIntrinsics::_getDoubleVolatile: return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, is_volatile); - case vmIntrinsics::_putObjectVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true); - case vmIntrinsics::_putBooleanVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true); - case vmIntrinsics::_putByteVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true); - case vmIntrinsics::_putShortVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true); - case vmIntrinsics::_putCharVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true); - case vmIntrinsics::_putIntVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true); - case vmIntrinsics::_putLongVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true); - case vmIntrinsics::_putFloatVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true); - case vmIntrinsics::_putDoubleVolatile: - return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true); + case vmIntrinsics::_putObjectVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, is_volatile); + case vmIntrinsics::_putBooleanVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, is_volatile); + case vmIntrinsics::_putByteVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, is_volatile); + case vmIntrinsics::_putShortVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, is_volatile); + case vmIntrinsics::_putCharVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, is_volatile); + case vmIntrinsics::_putIntVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_INT, is_volatile); + case vmIntrinsics::_putLongVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, is_volatile); + case vmIntrinsics::_putFloatVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, is_volatile); + case vmIntrinsics::_putDoubleVolatile: return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, is_volatile); - case vmIntrinsics::_prefetchRead: - return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static); - case vmIntrinsics::_prefetchWrite: - return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static); - case vmIntrinsics::_prefetchReadStatic: - return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static); - case vmIntrinsics::_prefetchWriteStatic: - return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static); + case vmIntrinsics::_prefetchRead: return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static); + case vmIntrinsics::_prefetchWrite: return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static); + case vmIntrinsics::_prefetchReadStatic: return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static); + case vmIntrinsics::_prefetchWriteStatic: return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static); - case vmIntrinsics::_compareAndSwapObject: - return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); - case vmIntrinsics::_compareAndSwapInt: - return inline_unsafe_load_store(T_INT, LS_cmpxchg); - case vmIntrinsics::_compareAndSwapLong: - return inline_unsafe_load_store(T_LONG, LS_cmpxchg); + case vmIntrinsics::_compareAndSwapObject: return inline_unsafe_load_store(T_OBJECT, LS_cmpxchg); + case vmIntrinsics::_compareAndSwapInt: return inline_unsafe_load_store(T_INT, LS_cmpxchg); + case vmIntrinsics::_compareAndSwapLong: return inline_unsafe_load_store(T_LONG, LS_cmpxchg); - case vmIntrinsics::_putOrderedObject: - return inline_unsafe_ordered_store(T_OBJECT); - case vmIntrinsics::_putOrderedInt: - return inline_unsafe_ordered_store(T_INT); - case vmIntrinsics::_putOrderedLong: - return inline_unsafe_ordered_store(T_LONG); + case vmIntrinsics::_putOrderedObject: return inline_unsafe_ordered_store(T_OBJECT); + case vmIntrinsics::_putOrderedInt: return inline_unsafe_ordered_store(T_INT); + case vmIntrinsics::_putOrderedLong: return inline_unsafe_ordered_store(T_LONG); - case vmIntrinsics::_getAndAddInt: - return inline_unsafe_load_store(T_INT, LS_xadd); - case vmIntrinsics::_getAndAddLong: - return inline_unsafe_load_store(T_LONG, LS_xadd); - case vmIntrinsics::_getAndSetInt: - return inline_unsafe_load_store(T_INT, LS_xchg); - case vmIntrinsics::_getAndSetLong: - return inline_unsafe_load_store(T_LONG, LS_xchg); - case vmIntrinsics::_getAndSetObject: - return inline_unsafe_load_store(T_OBJECT, LS_xchg); + case vmIntrinsics::_getAndAddInt: return inline_unsafe_load_store(T_INT, LS_xadd); + case vmIntrinsics::_getAndAddLong: return inline_unsafe_load_store(T_LONG, LS_xadd); + case vmIntrinsics::_getAndSetInt: return inline_unsafe_load_store(T_INT, LS_xchg); + case vmIntrinsics::_getAndSetLong: return inline_unsafe_load_store(T_LONG, LS_xchg); + case vmIntrinsics::_getAndSetObject: return inline_unsafe_load_store(T_OBJECT, LS_xchg); - case vmIntrinsics::_currentThread: - return inline_native_currentThread(); - case vmIntrinsics::_isInterrupted: - return inline_native_isInterrupted(); + case vmIntrinsics::_currentThread: return inline_native_currentThread(); + case vmIntrinsics::_isInterrupted: return inline_native_isInterrupted(); #ifdef TRACE_HAVE_INTRINSICS - case vmIntrinsics::_classID: - return inline_native_classID(); - case vmIntrinsics::_threadID: - return inline_native_threadID(); - case vmIntrinsics::_counterTime: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime"); + case vmIntrinsics::_classID: return inline_native_classID(); + case vmIntrinsics::_threadID: return inline_native_threadID(); + case vmIntrinsics::_counterTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), "counterTime"); #endif - case vmIntrinsics::_currentTimeMillis: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis"); - case vmIntrinsics::_nanoTime: - return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); - case vmIntrinsics::_allocateInstance: - return inline_unsafe_allocate(); - case vmIntrinsics::_copyMemory: - return inline_unsafe_copyMemory(); - case vmIntrinsics::_newArray: - return inline_native_newArray(); - case vmIntrinsics::_getLength: - return inline_native_getLength(); - case vmIntrinsics::_copyOf: - return inline_array_copyOf(false); - case vmIntrinsics::_copyOfRange: - return inline_array_copyOf(true); - case vmIntrinsics::_equalsC: - return inline_array_equals(); - case vmIntrinsics::_clone: - return inline_native_clone(intrinsic()->is_virtual()); + case vmIntrinsics::_currentTimeMillis: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeMillis), "currentTimeMillis"); + case vmIntrinsics::_nanoTime: return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime"); + case vmIntrinsics::_allocateInstance: return inline_unsafe_allocate(); + case vmIntrinsics::_copyMemory: return inline_unsafe_copyMemory(); + case vmIntrinsics::_newArray: return inline_native_newArray(); + case vmIntrinsics::_getLength: return inline_native_getLength(); + case vmIntrinsics::_copyOf: return inline_array_copyOf(false); + case vmIntrinsics::_copyOfRange: return inline_array_copyOf(true); + case vmIntrinsics::_equalsC: return inline_array_equals(); + case vmIntrinsics::_clone: return inline_native_clone(intrinsic()->is_virtual()); - case vmIntrinsics::_isAssignableFrom: - return inline_native_subtype_check(); + case vmIntrinsics::_isAssignableFrom: return inline_native_subtype_check(); case vmIntrinsics::_isInstance: case vmIntrinsics::_getModifiers: @@ -797,44 +760,32 @@ bool LibraryCallKit::try_to_inline() { case vmIntrinsics::_isPrimitive: case vmIntrinsics::_getSuperclass: case vmIntrinsics::_getComponentType: - case vmIntrinsics::_getClassAccessFlags: - return inline_native_Class_query(intrinsic_id()); + case vmIntrinsics::_getClassAccessFlags: return inline_native_Class_query(intrinsic_id()); case vmIntrinsics::_floatToRawIntBits: case vmIntrinsics::_floatToIntBits: case vmIntrinsics::_intBitsToFloat: case vmIntrinsics::_doubleToRawLongBits: case vmIntrinsics::_doubleToLongBits: - case vmIntrinsics::_longBitsToDouble: - return inline_fp_conversions(intrinsic_id()); + case vmIntrinsics::_longBitsToDouble: return inline_fp_conversions(intrinsic_id()); case vmIntrinsics::_numberOfLeadingZeros_i: case vmIntrinsics::_numberOfLeadingZeros_l: - return inline_numberOfLeadingZeros(intrinsic_id()); - case vmIntrinsics::_numberOfTrailingZeros_i: case vmIntrinsics::_numberOfTrailingZeros_l: - return inline_numberOfTrailingZeros(intrinsic_id()); - case vmIntrinsics::_bitCount_i: case vmIntrinsics::_bitCount_l: - return inline_bitCount(intrinsic_id()); - case vmIntrinsics::_reverseBytes_i: case vmIntrinsics::_reverseBytes_l: case vmIntrinsics::_reverseBytes_s: - case vmIntrinsics::_reverseBytes_c: - return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id()); + case vmIntrinsics::_reverseBytes_c: return inline_number_methods(intrinsic_id()); - case vmIntrinsics::_getCallerClass: - return inline_native_Reflection_getCallerClass(); + case vmIntrinsics::_getCallerClass: return inline_native_Reflection_getCallerClass(); - case vmIntrinsics::_Reference_get: - return inline_reference_get(); + case vmIntrinsics::_Reference_get: return inline_reference_get(); case vmIntrinsics::_aescrypt_encryptBlock: - case vmIntrinsics::_aescrypt_decryptBlock: - return inline_aescrypt_Block(intrinsic_id()); + case vmIntrinsics::_aescrypt_decryptBlock: return inline_aescrypt_Block(intrinsic_id()); case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt: case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt: @@ -883,13 +834,13 @@ Node* LibraryCallKit::try_to_predicate() { } } -//------------------------------push_result------------------------------ +//------------------------------set_result------------------------------- // Helper function for finishing intrinsics. -void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) { +void LibraryCallKit::set_result(RegionNode* region, PhiNode* value) { record_for_igvn(region); set_control(_gvn.transform(region)); - BasicType value_type = value->type()->basic_type(); - push_node(value_type, _gvn.transform(value)); + set_result( _gvn.transform(value)); + assert(value->type()->basic_type() == result()->bottom_type()->basic_type(), "sanity"); } //------------------------------generate_guard--------------------------- @@ -1078,7 +1029,6 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1, Node* str2 // to Int nodes containing the lenghts of str1 and str2. // Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2) { - Node* result = NULL; switch (opcode) { case Op_StrIndexOf: @@ -1105,51 +1055,23 @@ Node* LibraryCallKit::make_string_method_node(int opcode, Node* str1_start, Node } //------------------------------inline_string_compareTo------------------------ +// public int java.lang.String.compareTo(String anotherString); bool LibraryCallKit::inline_string_compareTo() { - - if (!Matcher::has_match_rule(Op_StrComp)) return false; - - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - argument = do_null_check(argument, T_OBJECT); - _sp -= 2; + Node* receiver = null_check(argument(0)); + Node* arg = null_check(argument(1)); if (stopped()) { return true; } - - Node* compare = make_string_method_node(Op_StrComp, receiver, argument); - push(compare); + set_result(make_string_method_node(Op_StrComp, receiver, arg)); return true; } //------------------------------inline_string_equals------------------------ bool LibraryCallKit::inline_string_equals() { - - if (!Matcher::has_match_rule(Op_StrEquals)) return false; - - int nargs = 2; - _sp += nargs; - Node* argument = pop(); // pop non-receiver first: it was pushed second - Node* receiver = pop(); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - receiver = do_null_check(receiver, T_OBJECT); - //should not do null check for argument for String.equals(), because spec - //allows to specify NULL as argument. - _sp -= nargs; - + Node* receiver = null_check_receiver(); + // NOTE: Do not null check argument for String.equals() because spec + // allows to specify NULL as argument. + Node* argument = this->argument(1); if (stopped()) { return true; } @@ -1173,9 +1095,7 @@ bool LibraryCallKit::inline_string_equals() { ciInstanceKlass* klass = env()->String_klass(); if (!stopped()) { - _sp += nargs; // gen_instanceof might do an uncommon trap Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); - _sp -= nargs; Node* cmp = _gvn.transform(new (C) CmpINode(inst, intcon(1))); Node* bol = _gvn.transform(new (C) BoolNode(cmp, BoolTest::ne)); @@ -1207,7 +1127,7 @@ bool LibraryCallKit::inline_string_equals() { Node* receiver_cnt = load_String_length(no_ctrl, receiver); // Get start addr of argument - Node* argument_val = load_String_value(no_ctrl, argument); + Node* argument_val = load_String_value(no_ctrl, argument); Node* argument_offset = load_String_offset(no_ctrl, argument); Node* argument_start = array_element_address(argument_val, argument_offset, T_CHAR); @@ -1236,24 +1156,15 @@ bool LibraryCallKit::inline_string_equals() { set_control(_gvn.transform(region)); record_for_igvn(region); - push(_gvn.transform(phi)); - + set_result(_gvn.transform(phi)); return true; } //------------------------------inline_array_equals---------------------------- bool LibraryCallKit::inline_array_equals() { - - if (!Matcher::has_match_rule(Op_AryEq)) return false; - - _sp += 2; - Node *argument2 = pop(); - Node *argument1 = pop(); - - Node* equals = - _gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), - argument1, argument2) ); - push(equals); + Node* arg1 = argument(0); + Node* arg2 = argument(1); + set_result(_gvn.transform(new (C) AryEqNode(control(), memory(TypeAryPtr::CHARS), arg1, arg2))); return true; } @@ -1325,7 +1236,7 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar float likely = PROB_LIKELY(0.9); float unlikely = PROB_UNLIKELY(0.9); - const int nargs = 2; // number of arguments to push back for uncommon trap in predicate + const int nargs = 0; // no arguments to push back for uncommon trap in predicate Node* source = load_String_value(no_ctrl, string_object); Node* sourceOffset = load_String_offset(no_ctrl, string_object); @@ -1396,10 +1307,8 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar //------------------------------inline_string_indexOf------------------------ bool LibraryCallKit::inline_string_indexOf() { - - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); + Node* receiver = argument(0); + Node* arg = argument(1); Node* result; // Disable the use of pcmpestri until it can be guaranteed that @@ -1409,15 +1318,8 @@ bool LibraryCallKit::inline_string_indexOf() { // Generate SSE4.2 version of indexOf // We currently only have match rules that use SSE4.2 - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - argument = do_null_check(argument, T_OBJECT); - _sp -= 2; - + receiver = null_check(receiver); + arg = null_check(arg); if (stopped()) { return true; } @@ -1439,12 +1341,12 @@ bool LibraryCallKit::inline_string_indexOf() { Node* source_cnt = load_String_length(no_ctrl, receiver); // Get start addr of substring - Node* substr = load_String_value(no_ctrl, argument); - Node* substr_offset = load_String_offset(no_ctrl, argument); + Node* substr = load_String_value(no_ctrl, arg); + Node* substr_offset = load_String_offset(no_ctrl, arg); Node* substr_start = array_element_address(substr, substr_offset, T_CHAR); // Get length of source string - Node* substr_cnt = load_String_length(no_ctrl, argument); + Node* substr_cnt = load_String_length(no_ctrl, arg); // Check for substr count > string count Node* cmp = _gvn.transform( new(C) CmpINode(substr_cnt, source_cnt) ); @@ -1477,10 +1379,10 @@ bool LibraryCallKit::inline_string_indexOf() { } else { // Use LibraryCallKit::string_indexOf // don't intrinsify if argument isn't a constant string. - if (!argument->is_Con()) { + if (!arg->is_Con()) { return false; } - const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); + const TypeOopPtr* str_type = _gvn.type(arg)->isa_oopptr(); if (str_type == NULL) { return false; } @@ -1511,21 +1413,15 @@ bool LibraryCallKit::inline_string_indexOf() { return false; } - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. - _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - // No null check on the argument is needed since it's a constant String oop. - _sp -= 2; + receiver = null_check(receiver, T_OBJECT); + // NOTE: No null check on the argument is needed since it's a constant String oop. if (stopped()) { return true; } // The null string as a pattern always returns 0 (match at beginning of string) if (c == 0) { - push(intcon(0)); + set_result(intcon(0)); return true; } @@ -1548,47 +1444,54 @@ bool LibraryCallKit::inline_string_indexOf() { result = string_indexOf(receiver, pat, o, cache, md2); } - - push(result); + set_result(result); return true; } -//--------------------------pop_math_arg-------------------------------- -// Pop a double argument to a math function from the stack -// rounding it if necessary. -Node * LibraryCallKit::pop_math_arg() { - Node *arg = pop_pair(); - if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 ) - arg = _gvn.transform( new (C) RoundDoubleNode(0, arg) ); - return arg; +//--------------------------round_double_node-------------------------------- +// Round a double node if necessary. +Node* LibraryCallKit::round_double_node(Node* n) { + if (Matcher::strict_fp_requires_explicit_rounding && UseSSE <= 1) + n = _gvn.transform(new (C) RoundDoubleNode(0, n)); + return n; +} + +//------------------------------inline_math----------------------------------- +// public static double Math.abs(double) +// public static double Math.sqrt(double) +// public static double Math.log(double) +// public static double Math.log10(double) +bool LibraryCallKit::inline_math(vmIntrinsics::ID id) { + Node* arg = round_double_node(argument(0)); + Node* n; + switch (id) { + case vmIntrinsics::_dabs: n = new (C) AbsDNode( arg); break; + case vmIntrinsics::_dsqrt: n = new (C) SqrtDNode(0, arg); break; + case vmIntrinsics::_dlog: n = new (C) LogDNode( arg); break; + case vmIntrinsics::_dlog10: n = new (C) Log10DNode( arg); break; + default: fatal_unexpected_iid(id); break; + } + set_result(_gvn.transform(n)); + return true; } //------------------------------inline_trig---------------------------------- // Inline sin/cos/tan instructions, if possible. If rounding is required, do // argument reduction which will turn into a fast/slow diamond. bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) { - _sp += arg_size(); // restore stack pointer - Node* arg = pop_math_arg(); - Node* trig = NULL; + Node* arg = round_double_node(argument(0)); + Node* n = NULL; switch (id) { - case vmIntrinsics::_dsin: - trig = _gvn.transform((Node*)new (C) SinDNode(arg)); - break; - case vmIntrinsics::_dcos: - trig = _gvn.transform((Node*)new (C) CosDNode(arg)); - break; - case vmIntrinsics::_dtan: - trig = _gvn.transform((Node*)new (C) TanDNode(arg)); - break; - default: - assert(false, "bad intrinsic was passed in"); - return false; + case vmIntrinsics::_dsin: n = new (C) SinDNode(arg); break; + case vmIntrinsics::_dcos: n = new (C) CosDNode(arg); break; + case vmIntrinsics::_dtan: n = new (C) TanDNode(arg); break; + default: fatal_unexpected_iid(id); break; } + n = _gvn.transform(n); // Rounding required? Check for argument reduction! - if( Matcher::strict_fp_requires_explicit_rounding ) { - + if (Matcher::strict_fp_requires_explicit_rounding) { static const double pi_4 = 0.7853981633974483; static const double neg_pi_4 = -0.7853981633974483; // pi/2 in 80-bit extended precision @@ -1623,8 +1526,8 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) { // probably do the math inside the SIN encoding. // Make the merge point - RegionNode *r = new (C) RegionNode(3); - Node *phi = new (C) PhiNode(r,Type::DOUBLE); + RegionNode* r = new (C) RegionNode(3); + Node* phi = new (C) PhiNode(r, Type::DOUBLE); // Flatten arg so we need only 1 test Node *abs = _gvn.transform(new (C) AbsDNode(arg)); @@ -1639,7 +1542,7 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) { set_control(opt_iff(r,iff)); // Set fast path result - phi->init_req(2,trig); + phi->init_req(2, n); // Slow path - non-blocking leaf call Node* call = NULL; @@ -1661,37 +1564,18 @@ bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) { break; } assert(control()->in(0) == call, ""); - Node* slow_result = _gvn.transform(new (C) ProjNode(call,TypeFunc::Parms)); - r->init_req(1,control()); - phi->init_req(1,slow_result); + Node* slow_result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms)); + r->init_req(1, control()); + phi->init_req(1, slow_result); // Post-merge set_control(_gvn.transform(r)); record_for_igvn(r); - trig = _gvn.transform(phi); + n = _gvn.transform(phi); C->set_has_split_ifs(true); // Has chance for split-if optimization } - // Push result back on JVM stack - push_pair(trig); - return true; -} - -//------------------------------inline_sqrt------------------------------------- -// Inline square root instruction, if possible. -bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dsqrt, "Not square root"); - _sp += arg_size(); // restore stack pointer - push_pair(_gvn.transform(new (C) SqrtDNode(0, pop_math_arg()))); - return true; -} - -//------------------------------inline_abs------------------------------------- -// Inline absolute value instruction, if possible. -bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dabs, "Not absolute value"); - _sp += arg_size(); // restore stack pointer - push_pair(_gvn.transform(new (C) AbsDNode(pop_math_arg()))); + set_result(n); return true; } @@ -1700,24 +1584,18 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu //result=(result.isNaN())? funcAddr():result; // Check: If isNaN() by checking result!=result? then either trap // or go to runtime - Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result,result)); + Node* cmpisnan = _gvn.transform(new (C) CmpDNode(result, result)); // Build the boolean node - Node* bolisnum = _gvn.transform( new (C) BoolNode(cmpisnan, BoolTest::eq) ); + Node* bolisnum = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::eq)); if (!too_many_traps(Deoptimization::Reason_intrinsic)) { - { - BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT); - // End the current control-flow path - push_pair(x); - if (y != NULL) { - push_pair(y); - } + { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT); // The pow or exp intrinsic returned a NaN, which requires a call // to the runtime. Recompile with the runtime call. uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_make_not_entrant); } - push_pair(result); + set_result(result); } else { // If this inlining ever returned NaN in the past, we compile a call // to the runtime to properly handle corner cases @@ -1727,7 +1605,7 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu Node* if_fast = _gvn.transform( new (C) IfTrueNode(iff) ); if (!if_slow->is_top()) { - RegionNode* result_region = new(C) RegionNode(3); + RegionNode* result_region = new (C) RegionNode(3); PhiNode* result_val = new (C) PhiNode(result_region, Type::DOUBLE); result_region->init_req(1, if_fast); @@ -1747,9 +1625,9 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu result_region->init_req(2, control()); result_val->init_req(2, value); - push_result(result_region, result_val); + set_result(result_region, result_val); } else { - push_pair(result); + set_result(result); } } } @@ -1757,25 +1635,19 @@ void LibraryCallKit::finish_pow_exp(Node* result, Node* x, Node* y, const TypeFu //------------------------------inline_exp------------------------------------- // Inline exp instructions, if possible. The Intel hardware only misses // really odd corner cases (+/- Infinity). Just uncommon-trap them. -bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dexp, "Not exp"); +bool LibraryCallKit::inline_exp() { + Node* arg = round_double_node(argument(0)); + Node* n = _gvn.transform(new (C) ExpDNode(0, arg)); - _sp += arg_size(); // restore stack pointer - Node *x = pop_math_arg(); - Node *result = _gvn.transform(new (C) ExpDNode(0,x)); - - finish_pow_exp(result, x, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); + finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); C->set_has_split_ifs(true); // Has chance for split-if optimization - return true; } //------------------------------inline_pow------------------------------------- // Inline power instructions, if possible. -bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_dpow, "Not pow"); - +bool LibraryCallKit::inline_pow() { // Pseudocode for pow // if (x <= 0.0) { // long longy = (long)y; @@ -1793,15 +1665,14 @@ bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) { // } // return result; - _sp += arg_size(); // restore stack pointer - Node* y = pop_math_arg(); - Node* x = pop_math_arg(); + Node* x = round_double_node(argument(0)); + Node* y = round_double_node(argument(2)); Node* result = NULL; if (!too_many_traps(Deoptimization::Reason_intrinsic)) { // Short form: skip the fancy tests and just check for NaN result. - result = _gvn.transform( new (C) PowDNode(0, x, y) ); + result = _gvn.transform(new (C) PowDNode(0, x, y)); } else { // If this inlining ever returned NaN in the past, include all // checks + call to the runtime. @@ -1919,55 +1790,23 @@ bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) { // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - result=_gvn.transform(phi); + result = _gvn.transform(phi); } finish_pow_exp(result, x, y, OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); C->set_has_split_ifs(true); // Has chance for split-if optimization - - return true; -} - -//------------------------------inline_trans------------------------------------- -// Inline transcendental instructions, if possible. The Intel hardware gets -// these right, no funny corner cases missed. -bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) { - _sp += arg_size(); // restore stack pointer - Node* arg = pop_math_arg(); - Node* trans = NULL; - - switch (id) { - case vmIntrinsics::_dlog: - trans = _gvn.transform((Node*)new (C) LogDNode(arg)); - break; - case vmIntrinsics::_dlog10: - trans = _gvn.transform((Node*)new (C) Log10DNode(arg)); - break; - default: - assert(false, "bad intrinsic was passed in"); - return false; - } - - // Push result back on JVM stack - push_pair(trans); return true; } //------------------------------runtime_math----------------------------- bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) { - Node* a = NULL; - Node* b = NULL; - assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(), "must be (DD)D or (D)D type"); // Inputs - _sp += arg_size(); // restore stack pointer - if (call_type == OptoRuntime::Math_DD_D_Type()) { - b = pop_math_arg(); - } - a = pop_math_arg(); + Node* a = round_double_node(argument(0)); + Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : NULL; const TypePtr* no_memory_effects = NULL; Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName, @@ -1979,43 +1818,43 @@ bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, c assert(value_top == top(), "second value must be top"); #endif - push_pair(value); + set_result(value); return true; } //------------------------------inline_math_native----------------------------- bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) { +#define FN_PTR(f) CAST_FROM_FN_PTR(address, f) switch (id) { // These intrinsics are not properly supported on all hardware - case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS"); - case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN"); - case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN"); + case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dcos), "COS"); + case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dsin), "SIN"); + case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dtan), "TAN"); - case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG"); - case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10"); + case vmIntrinsics::_dlog: return Matcher::has_match_rule(Op_LogD) ? inline_math(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog), "LOG"); + case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_math(id) : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10"); // These intrinsics are supported on all hardware - case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false; - case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_abs(id) : false; + case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_math(id) : false; + case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false; - case vmIntrinsics::_dexp: return - Matcher::has_match_rule(Op_ExpD) ? inline_exp(id) : - runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP"); - case vmIntrinsics::_dpow: return - Matcher::has_match_rule(Op_PowD) ? inline_pow(id) : - runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW"); + case vmIntrinsics::_dexp: return Matcher::has_match_rule(Op_ExpD) ? inline_exp() : + runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dexp), "EXP"); + case vmIntrinsics::_dpow: return Matcher::has_match_rule(Op_PowD) ? inline_pow() : + runtime_math(OptoRuntime::Math_DD_D_Type(), FN_PTR(SharedRuntime::dpow), "POW"); +#undef FN_PTR // These intrinsics are not yet correctly implemented case vmIntrinsics::_datan2: return false; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); return false; } } @@ -2030,8 +1869,7 @@ static bool is_simple_name(Node* n) { //----------------------------inline_min_max----------------------------------- bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) { - push(generate_min_max(id, argument(0), argument(1))); - + set_result(generate_min_max(id, argument(0), argument(1))); return true; } @@ -2254,99 +2092,37 @@ inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) { } } -//-------------------inline_numberOfLeadingZeros_int/long----------------------- -// inline int Integer.numberOfLeadingZeros(int) -// inline int Long.numberOfLeadingZeros(long) -bool LibraryCallKit::inline_numberOfLeadingZeros(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_numberOfLeadingZeros_i || id == vmIntrinsics::_numberOfLeadingZeros_l, "not numberOfLeadingZeros"); - if (id == vmIntrinsics::_numberOfLeadingZeros_i && !Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false; - if (id == vmIntrinsics::_numberOfLeadingZeros_l && !Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false; - _sp += arg_size(); // restore stack pointer +//--------------------------inline_number_methods----------------------------- +// inline int Integer.numberOfLeadingZeros(int) +// inline int Long.numberOfLeadingZeros(long) +// +// inline int Integer.numberOfTrailingZeros(int) +// inline int Long.numberOfTrailingZeros(long) +// +// inline int Integer.bitCount(int) +// inline int Long.bitCount(long) +// +// inline char Character.reverseBytes(char) +// inline short Short.reverseBytes(short) +// inline int Integer.reverseBytes(int) +// inline long Long.reverseBytes(long) +bool LibraryCallKit::inline_number_methods(vmIntrinsics::ID id) { + Node* arg = argument(0); + Node* n; switch (id) { - case vmIntrinsics::_numberOfLeadingZeros_i: - push(_gvn.transform(new (C) CountLeadingZerosINode(pop()))); - break; - case vmIntrinsics::_numberOfLeadingZeros_l: - push(_gvn.transform(new (C) CountLeadingZerosLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); - } - return true; -} - -//-------------------inline_numberOfTrailingZeros_int/long---------------------- -// inline int Integer.numberOfTrailingZeros(int) -// inline int Long.numberOfTrailingZeros(long) -bool LibraryCallKit::inline_numberOfTrailingZeros(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_numberOfTrailingZeros_i || id == vmIntrinsics::_numberOfTrailingZeros_l, "not numberOfTrailingZeros"); - if (id == vmIntrinsics::_numberOfTrailingZeros_i && !Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false; - if (id == vmIntrinsics::_numberOfTrailingZeros_l && !Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_numberOfTrailingZeros_i: - push(_gvn.transform(new (C) CountTrailingZerosINode(pop()))); - break; - case vmIntrinsics::_numberOfTrailingZeros_l: - push(_gvn.transform(new (C) CountTrailingZerosLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); - } - return true; -} - -//----------------------------inline_bitCount_int/long----------------------- -// inline int Integer.bitCount(int) -// inline int Long.bitCount(long) -bool LibraryCallKit::inline_bitCount(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_bitCount_i || id == vmIntrinsics::_bitCount_l, "not bitCount"); - if (id == vmIntrinsics::_bitCount_i && !Matcher::has_match_rule(Op_PopCountI)) return false; - if (id == vmIntrinsics::_bitCount_l && !Matcher::has_match_rule(Op_PopCountL)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_bitCount_i: - push(_gvn.transform(new (C) PopCountINode(pop()))); - break; - case vmIntrinsics::_bitCount_l: - push(_gvn.transform(new (C) PopCountLNode(pop_pair()))); - break; - default: - ShouldNotReachHere(); - } - return true; -} - -//----------------------------inline_reverseBytes_int/long/char/short------------------- -// inline Integer.reverseBytes(int) -// inline Long.reverseBytes(long) -// inline Character.reverseBytes(char) -// inline Short.reverseBytes(short) -bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) { - assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l || - id == vmIntrinsics::_reverseBytes_c || id == vmIntrinsics::_reverseBytes_s, - "not reverse Bytes"); - if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false; - if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL)) return false; - if (id == vmIntrinsics::_reverseBytes_c && !Matcher::has_match_rule(Op_ReverseBytesUS)) return false; - if (id == vmIntrinsics::_reverseBytes_s && !Matcher::has_match_rule(Op_ReverseBytesS)) return false; - _sp += arg_size(); // restore stack pointer - switch (id) { - case vmIntrinsics::_reverseBytes_i: - push(_gvn.transform(new (C) ReverseBytesINode(0, pop()))); - break; - case vmIntrinsics::_reverseBytes_l: - push_pair(_gvn.transform(new (C) ReverseBytesLNode(0, pop_pair()))); - break; - case vmIntrinsics::_reverseBytes_c: - push(_gvn.transform(new (C) ReverseBytesUSNode(0, pop()))); - break; - case vmIntrinsics::_reverseBytes_s: - push(_gvn.transform(new (C) ReverseBytesSNode(0, pop()))); - break; - default: - ; + case vmIntrinsics::_numberOfLeadingZeros_i: n = new (C) CountLeadingZerosINode( arg); break; + case vmIntrinsics::_numberOfLeadingZeros_l: n = new (C) CountLeadingZerosLNode( arg); break; + case vmIntrinsics::_numberOfTrailingZeros_i: n = new (C) CountTrailingZerosINode(arg); break; + case vmIntrinsics::_numberOfTrailingZeros_l: n = new (C) CountTrailingZerosLNode(arg); break; + case vmIntrinsics::_bitCount_i: n = new (C) PopCountINode( arg); break; + case vmIntrinsics::_bitCount_l: n = new (C) PopCountLNode( arg); break; + case vmIntrinsics::_reverseBytes_c: n = new (C) ReverseBytesUSNode(0, arg); break; + case vmIntrinsics::_reverseBytes_s: n = new (C) ReverseBytesSNode( 0, arg); break; + case vmIntrinsics::_reverseBytes_i: n = new (C) ReverseBytesINode( 0, arg); break; + case vmIntrinsics::_reverseBytes_l: n = new (C) ReverseBytesLNode( 0, arg); break; + default: fatal_unexpected_iid(id); break; } + set_result(_gvn.transform(n)); return true; } @@ -2356,7 +2132,7 @@ const static BasicType T_ADDRESS_HOLDER = T_LONG; // Helper that guards and inserts a pre-barrier. void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, - Node* pre_val, int nargs, bool need_mem_bar) { + Node* pre_val, bool need_mem_bar) { // We could be accessing the referent field of a reference object. If so, when G1 // is enabled, we need to log the value in the referent field in an SATB buffer. // This routine performs some compile time filters and generates suitable @@ -2406,8 +2182,8 @@ void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, // } // } - float likely = PROB_LIKELY(0.999); - float unlikely = PROB_UNLIKELY(0.999); + float likely = PROB_LIKELY( 0.999); + float unlikely = PROB_UNLIKELY(0.999); IdealKit ideal(this); #define __ ideal. @@ -2419,9 +2195,7 @@ void LibraryCallKit::insert_pre_barrier(Node* base_oop, Node* offset, sync_kit(ideal); Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass())); - _sp += nargs; // gen_instanceof might do an uncommon trap Node* is_instof = gen_instanceof(base_oop, ref_klass_con); - _sp -= nargs; // Update IdealKit memory and control from graphKit. __ sync_kit(this); @@ -2505,7 +2279,7 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT if (!is_store) { // Object getObject(Object base, int/long offset), etc. @@ -2543,42 +2317,19 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ]; - - // Argument words: "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words - int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0); - assert(callee()->arg_size() == nargs, "must be"); - - debug_only(int saved_sp = _sp); - _sp += nargs; - - Node* val; - debug_only(val = (Node*)(uintptr_t)-1); - - - if (is_store) { - // Get the value being stored. (Pop it first; it was pushed last.) - switch (type) { - case T_DOUBLE: - case T_LONG: - case T_ADDRESS: - val = pop_pair(); - break; - default: - val = pop(); - } - } + Node* receiver = argument(0); // type: oop // Build address expression. See the code in inline_unsafe_prefetch. - Node *adr; - Node *heap_base_oop = top(); + Node* adr; + Node* heap_base_oop = top(); Node* offset = top(); + Node* val; if (!is_native_ptr) { - // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset - offset = pop_pair(); // The base is either a Java object or a value produced by Unsafe.staticFieldBase - Node* base = pop(); + Node* base = argument(1); // type: oop + // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset + offset = argument(2); // type: long // We currently rely on the cookies produced by Unsafe.xxxFieldOffset // to be plain byte offsets, which are also the same as those accepted // by oopDesc::field_base. @@ -2588,18 +2339,14 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas offset = ConvL2X(offset); adr = make_unsafe_address(base, offset); heap_base_oop = base; + val = is_store ? argument(4) : NULL; } else { - Node* ptr = pop_pair(); - // Adjust Java long to machine word: - ptr = ConvL2X(ptr); + Node* ptr = argument(1); // type: long + ptr = ConvL2X(ptr); // adjust Java long to machine word adr = make_unsafe_address(NULL, ptr); + val = is_store ? argument(3) : NULL; } - // Pop receiver last: it was pushed first. - Node *receiver = pop(); - - assert(saved_sp == _sp, "must have correct argument count"); - const TypePtr *adr_type = _gvn.type(adr)->isa_ptr(); // First guess at the value type. @@ -2633,13 +2380,7 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas } } - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + receiver = null_check(receiver); if (stopped()) { return true; } @@ -2671,34 +2412,36 @@ bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, Bas if (!is_store) { Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile); - // load value and push onto stack + // load value switch (type) { case T_BOOLEAN: case T_CHAR: case T_BYTE: case T_SHORT: case T_INT: + case T_LONG: case T_FLOAT: - push(p); + case T_DOUBLE: break; case T_OBJECT: if (need_read_barrier) { - insert_pre_barrier(heap_base_oop, offset, p, nargs, !(is_volatile || need_mem_bar)); + insert_pre_barrier(heap_base_oop, offset, p, !(is_volatile || need_mem_bar)); } - push(p); break; case T_ADDRESS: // Cast to an int type. - p = _gvn.transform( new (C) CastP2XNode(NULL,p) ); + p = _gvn.transform(new (C) CastP2XNode(NULL, p)); p = ConvX2L(p); - push_pair(p); break; - case T_DOUBLE: - case T_LONG: - push_pair( p ); + default: + fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); break; - default: ShouldNotReachHere(); } + // The load node has the control of the preceding MemBarCPUOrder. All + // following nodes will have the control of the MemBarCPUOrder inserted at + // the end of this method. So, pushing the load onto the stack at a later + // point is fine. + set_result(p); } else { // place effect of store into memory switch (type) { @@ -2762,7 +2505,7 @@ bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, b { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT // Object getObject(Object base, int/long offset), etc. BasicType rtype = sig->return_type()->basic_type(); @@ -2780,19 +2523,21 @@ bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, b C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" if not static, plus (oop/offset) or (lo/hi) args - int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3); - - debug_only(int saved_sp = _sp); - _sp += nargs; + const int idx = is_static ? 0 : 1; + if (!is_static) { + null_check_receiver(); + if (stopped()) { + return true; + } + } // Build address expression. See the code in inline_unsafe_access. Node *adr; if (!is_native_ptr) { - // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset - Node* offset = pop_pair(); // The base is either a Java object or a value produced by Unsafe.staticFieldBase - Node* base = pop(); + Node* base = argument(idx + 0); // type: oop + // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset + Node* offset = argument(idx + 1); // type: long // We currently rely on the cookies produced by Unsafe.xxxFieldOffset // to be plain byte offsets, which are also the same as those accepted // by oopDesc::field_base. @@ -2802,31 +2547,11 @@ bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, b offset = ConvL2X(offset); adr = make_unsafe_address(base, offset); } else { - Node* ptr = pop_pair(); - // Adjust Java long to machine word: - ptr = ConvL2X(ptr); + Node* ptr = argument(idx + 0); // type: long + ptr = ConvL2X(ptr); // adjust Java long to machine word adr = make_unsafe_address(NULL, ptr); } - if (is_static) { - assert(saved_sp == _sp, "must have correct argument count"); - } else { - // Pop receiver last: it was pushed first. - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when the primitive is inlined into a method - // which handles NullPointerExceptions. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; - if (stopped()) { - return true; - } - } - // Generate the read or write prefetch Node *prefetch; if (is_store) { @@ -2841,7 +2566,22 @@ bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, b } //----------------------------inline_unsafe_load_store---------------------------- - +// This method serves a couple of different customers (depending on LoadStoreKind): +// +// LS_cmpxchg: +// public final native boolean compareAndSwapObject(Object o, long offset, Object expected, Object x); +// public final native boolean compareAndSwapInt( Object o, long offset, int expected, int x); +// public final native boolean compareAndSwapLong( Object o, long offset, long expected, long x); +// +// LS_xadd: +// public int getAndAddInt( Object o, long offset, int delta) +// public long getAndAddLong(Object o, long offset, long delta) +// +// LS_xchg: +// int getAndSet(Object o, long offset, int newValue) +// long getAndSet(Object o, long offset, long newValue) +// Object getAndSet(Object o, long offset, Object newValue) +// bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind) { // This basic scheme here is the same as inline_unsafe_access, but // differs in enough details that combining them would make the code @@ -2856,7 +2596,8 @@ bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind BasicType rtype; { ResourceMark rm; - ciSignature* sig = signature(); + // Check the signatures. + ciSignature* sig = callee()->signature(); rtype = sig->return_type()->basic_type(); if (kind == LS_xadd || kind == LS_xchg) { // Check the signatures. @@ -2881,28 +2622,31 @@ bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind } #endif //PRODUCT - // number of stack slots per value argument (1 or 2) - int type_words = type2size[type]; - C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" plus oop plus offset (plus oldvalue) plus newvalue/delta; - int nargs = 1 + 1 + 2 + ((kind == LS_cmpxchg) ? type_words : 0) + type_words; + // Get arguments: + Node* receiver = NULL; + Node* base = NULL; + Node* offset = NULL; + Node* oldval = NULL; + Node* newval = NULL; + if (kind == LS_cmpxchg) { + const bool two_slot_type = type2size[type] == 2; + receiver = argument(0); // type: oop + base = argument(1); // type: oop + offset = argument(2); // type: long + oldval = argument(4); // type: oop, int, or long + newval = argument(two_slot_type ? 6 : 5); // type: oop, int, or long + } else if (kind == LS_xadd || kind == LS_xchg){ + receiver = argument(0); // type: oop + base = argument(1); // type: oop + offset = argument(2); // type: long + oldval = NULL; + newval = argument(4); // type: oop, int, or long + } - // pop arguments: newval, offset, base, and receiver - debug_only(int saved_sp = _sp); - _sp += nargs; - Node* newval = (type_words == 1) ? pop() : pop_pair(); - Node* oldval = (kind == LS_cmpxchg) ? ((type_words == 1) ? pop() : pop_pair()) : NULL; - Node *offset = pop_pair(); - Node *base = pop(); - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check receiver. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + // Null check receiver. + receiver = null_check(receiver); if (stopped()) { return true; } @@ -3008,7 +2752,7 @@ bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind post_barrier(control(), load_store, base, adr, alias_idx, newval, T_OBJECT, true); break; default: - ShouldNotReachHere(); + fatal(err_msg_res("unexpected type %d: %s", type, type2name(type))); break; } @@ -3029,10 +2773,14 @@ bool LibraryCallKit::inline_unsafe_load_store(BasicType type, LoadStoreKind kind #endif assert(type2size[load_store->bottom_type()->basic_type()] == type2size[rtype], "result type should match"); - push_node(load_store->bottom_type()->basic_type(), load_store); + set_result(load_store); return true; } +//----------------------------inline_unsafe_ordered_store---------------------- +// public native void sun.misc.Unsafe.putOrderedObject(Object o, long offset, Object x); +// public native void sun.misc.Unsafe.putOrderedInt(Object o, long offset, int x); +// public native void sun.misc.Unsafe.putOrderedLong(Object o, long offset, long x); bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { // This is another variant of inline_unsafe_access, differing in // that it always issues store-store ("release") barrier and ensures @@ -3044,7 +2792,7 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { { ResourceMark rm; // Check the signatures. - ciSignature* sig = signature(); + ciSignature* sig = callee()->signature(); #ifdef ASSERT BasicType rtype = sig->return_type()->basic_type(); assert(rtype == T_VOID, "must return void"); @@ -3055,27 +2803,16 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { } #endif //PRODUCT - // number of stack slots per value argument (1 or 2) - int type_words = type2size[type]; - C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - // Argument words: "this" plus oop plus offset plus value; - int nargs = 1 + 1 + 2 + type_words; + // Get arguments: + Node* receiver = argument(0); // type: oop + Node* base = argument(1); // type: oop + Node* offset = argument(2); // type: long + Node* val = argument(4); // type: oop, int, or long - // pop arguments: val, offset, base, and receiver - debug_only(int saved_sp = _sp); - _sp += nargs; - Node* val = (type_words == 1) ? pop() : pop_pair(); - Node *offset = pop_pair(); - Node *base = pop(); - Node *receiver = pop(); - assert(saved_sp == _sp, "must have correct argument count"); - - // Null check receiver. - _sp += nargs; - do_null_check(receiver, T_OBJECT); - _sp -= nargs; + // Null check receiver. + receiver = null_check(receiver); if (stopped()) { return true; } @@ -3092,7 +2829,7 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { insert_mem_bar(Op_MemBarRelease); insert_mem_bar(Op_MemBarCPUOrder); // Ensure that the store is atomic for longs: - bool require_atomic_access = true; + const bool require_atomic_access = true; Node* store; if (type == T_OBJECT) // reference stores need a store barrier. store = store_oop_to_unknown(control(), base, adr, adr_type, val, type); @@ -3103,20 +2840,17 @@ bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) { return true; } +//----------------------------inline_unsafe_allocate--------------------------- +// public native Object sun.mics.Unsafe.allocateInstance(Class cls); bool LibraryCallKit::inline_unsafe_allocate() { if (callee()->is_static()) return false; // caller must have the capability! - int nargs = 1 + 1; - assert(signature()->size() == nargs-1, "alloc has 1 argument"); - null_check_receiver(callee()); // check then ignore argument(0) - _sp += nargs; // set original stack for use by uncommon_trap - Node* cls = do_null_check(argument(1), T_OBJECT); - _sp -= nargs; + + null_check_receiver(); // null-check, then ignore + Node* cls = null_check(argument(1)); if (stopped()) return true; - Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0); - _sp += nargs; // set original stack for use by uncommon_trap - kls = do_null_check(kls, T_OBJECT); - _sp -= nargs; + Node* kls = load_klass_from_mirror(cls, false, NULL, 0); + kls = null_check(kls); if (stopped()) return true; // argument was like int.class // Note: The argument might still be an illegal value like @@ -3127,12 +2861,11 @@ bool LibraryCallKit::inline_unsafe_allocate() { // can generate code to load it as unsigned byte. Node* inst = make_load(NULL, insp, TypeInt::UBYTE, T_BOOLEAN); Node* bits = intcon(InstanceKlass::fully_initialized); - Node* test = _gvn.transform( new (C) SubINode(inst, bits) ); + Node* test = _gvn.transform(new (C) SubINode(inst, bits)); // The 'test' is non-zero if we need to take a slow path. Node* obj = new_instance(kls, test); - push(obj); - + set_result(obj); return true; } @@ -3143,15 +2876,10 @@ bool LibraryCallKit::inline_unsafe_allocate() { * return myklass->trace_id & ~0x3 */ bool LibraryCallKit::inline_native_classID() { - int nargs = 1 + 1; - null_check_receiver(callee()); // check then ignore argument(0) - _sp += nargs; - Node* cls = do_null_check(argument(1), T_OBJECT); - _sp -= nargs; - Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0); - _sp += nargs; - kls = do_null_check(kls, T_OBJECT); - _sp -= nargs; + null_check_receiver(); // null-check, then ignore + Node* cls = null_check(argument(1), T_OBJECT); + Node* kls = load_klass_from_mirror(cls, false, NULL, 0); + kls = null_check(kls, T_OBJECT); ByteSize offset = TRACE_ID_OFFSET; Node* insp = basic_plus_adr(kls, in_bytes(offset)); Node* tvalue = make_load(NULL, insp, TypeLong::LONG, T_LONG); @@ -3162,7 +2890,7 @@ bool LibraryCallKit::inline_native_classID() { const TypePtr *adr_type = _gvn.type(insp)->isa_ptr(); store_to_memory(control(), insp, orl, T_LONG, adr_type); - push_pair(andl); + set_result(andl); return true; } @@ -3177,13 +2905,12 @@ bool LibraryCallKit::inline_native_threadID() { size_t thread_id_size = OSThread::thread_id_size(); if (thread_id_size == (size_t) BytesPerLong) { threadid = ConvL2I(make_load(control(), p, TypeLong::LONG, T_LONG)); - push(threadid); } else if (thread_id_size == (size_t) BytesPerInt) { threadid = make_load(control(), p, TypeInt::INT, T_INT); - push(threadid); } else { ShouldNotReachHere(); } + set_result(threadid); return true; } #endif @@ -3192,29 +2919,28 @@ bool LibraryCallKit::inline_native_threadID() { // inline code for System.currentTimeMillis() and System.nanoTime() // these have the same type and signature bool LibraryCallKit::inline_native_time_funcs(address funcAddr, const char* funcName) { - const TypeFunc *tf = OptoRuntime::void_long_Type(); + const TypeFunc* tf = OptoRuntime::void_long_Type(); const TypePtr* no_memory_effects = NULL; Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects); Node* value = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+0)); #ifdef ASSERT - Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms + 1)); + Node* value_top = _gvn.transform(new (C) ProjNode(time, TypeFunc::Parms+1)); assert(value_top == top(), "second value must be top"); #endif - push_pair(value); + set_result(value); return true; } //------------------------inline_native_currentThread------------------ bool LibraryCallKit::inline_native_currentThread() { Node* junk = NULL; - push(generate_current_thread(junk)); + set_result(generate_current_thread(junk)); return true; } //------------------------inline_native_isInterrupted------------------ +// private native boolean java.lang.Thread.isInterrupted(boolean ClearInterrupted); bool LibraryCallKit::inline_native_isInterrupted() { - const int nargs = 1+1; // receiver + boolean - assert(nargs == arg_size(), "sanity"); // Add a fast path to t.isInterrupted(clear_int): // (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int)) // ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int) @@ -3312,9 +3038,8 @@ bool LibraryCallKit::inline_native_isInterrupted() { set_i_o( _gvn.transform(io_phi) ); } - push_result(result_rgn, result_val); C->set_has_split_ifs(true); // Has chance for split-if optimization - + set_result(result_rgn, result_val); return true; } @@ -3334,7 +3059,6 @@ Node* LibraryCallKit::load_mirror_from_klass(Node* klass) { // If the region is NULL, force never_see_null = true. Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror, bool never_see_null, - int nargs, RegionNode* region, int null_path, int offset) { @@ -3342,7 +3066,6 @@ Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror, Node* p = basic_plus_adr(mirror, offset); const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL; Node* kls = _gvn.transform( LoadKlassNode::make(_gvn, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type) ); - _sp += nargs; // any deopt will start just before call to enclosing method Node* null_ctl = top(); kls = null_check_oop(kls, &null_ctl, never_see_null); if (region != NULL) { @@ -3351,7 +3074,6 @@ Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror, } else { assert(null_ctl == top(), "no loose ends"); } - _sp -= nargs; return kls; } @@ -3376,7 +3098,6 @@ Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) { //-------------------------inline_native_Class_query------------------- bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { - int nargs = 1+0; // just the Class mirror, in most cases const Type* return_type = TypeInt::BOOL; Node* prim_return_value = top(); // what happens if it's a primitive class? bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check); @@ -3384,11 +3105,14 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT }; + Node* mirror = argument(0); + Node* obj = top(); + switch (id) { case vmIntrinsics::_isInstance: - nargs = 1+1; // the Class mirror, plus the object getting queried about // nothing is an instance of a primitive type prim_return_value = intcon(0); + obj = argument(1); break; case vmIntrinsics::_getModifiers: prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC); @@ -3419,12 +3143,10 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { return_type = TypeInt::INT; // not bool! 6297094 break; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } - Node* mirror = argument(0); - Node* obj = (nargs <= 1)? top(): argument(1); - const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr(); if (mirror_con == NULL) return false; // cannot happen? @@ -3451,9 +3173,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { // For Reflection.getClassAccessFlags(), the null check occurs in // the wrong place; see inline_unsafe_access(), above, for a similar // situation. - _sp += nargs; // set original stack for use by uncommon_trap - mirror = do_null_check(mirror, T_OBJECT); - _sp -= nargs; + mirror = null_check(mirror); // If mirror or obj is dead, only null-path is taken. if (stopped()) return true; @@ -3461,11 +3181,10 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { // Now load the mirror's klass metaobject, and null-check it. // Side-effects region with the control path if the klass is null. - Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs, - region, _prim_path); + Node* kls = load_klass_from_mirror(mirror, never_see_null, region, _prim_path); // If kls is null, we have a primitive mirror. phi->init_req(_prim_path, prim_return_value); - if (stopped()) { push_result(region, phi); return true; } + if (stopped()) { set_result(region, phi); return true; } Node* p; // handy temp Node* null_ctl; @@ -3476,9 +3195,7 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { switch (id) { case vmIntrinsics::_isInstance: // nothing is an instance of a primitive type - _sp += nargs; // gen_instanceof might do an uncommon trap query_value = gen_instanceof(obj, kls); - _sp -= nargs; break; case vmIntrinsics::_getModifiers: @@ -3553,16 +3270,16 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { break; default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } // Fall-through is the normal case of a query to a real class. phi->init_req(1, query_value); region->init_req(1, control()); - push_result(region, phi); C->set_has_split_ifs(true); // Has chance for split-if optimization - + set_result(region, phi); return true; } @@ -3570,8 +3287,6 @@ bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) { // This intrinsic takes the JNI calls out of the heart of // UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc. bool LibraryCallKit::inline_native_subtype_check() { - int nargs = 1+1; // the Class mirror, plus the other class getting examined - // Pull both arguments off the stack. Node* args[2]; // two java.lang.Class mirrors: superc, subc args[0] = argument(0); @@ -3602,9 +3317,7 @@ bool LibraryCallKit::inline_native_subtype_check() { int which_arg; for (which_arg = 0; which_arg <= 1; which_arg++) { Node* arg = args[which_arg]; - _sp += nargs; // set original stack for use by uncommon_trap - arg = do_null_check(arg, T_OBJECT); - _sp -= nargs; + arg = null_check(arg); if (stopped()) break; args[which_arg] = _gvn.transform(arg); @@ -3618,9 +3331,7 @@ bool LibraryCallKit::inline_native_subtype_check() { for (which_arg = 0; which_arg <= 1; which_arg++) { Node* kls = klasses[which_arg]; Node* null_ctl = top(); - _sp += nargs; // set original stack for use by uncommon_trap kls = null_check_oop(kls, &null_ctl, never_see_null); - _sp -= nargs; int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path); region->init_req(prim_path, null_ctl); if (stopped()) break; @@ -3670,8 +3381,7 @@ bool LibraryCallKit::inline_native_subtype_check() { } set_control(_gvn.transform(region)); - push(_gvn.transform(phi)); - + set_result(_gvn.transform(phi)); return true; } @@ -3719,14 +3429,12 @@ Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region, //-----------------------inline_native_newArray-------------------------- +// private static native Object java.lang.reflect.newArray(Class componentType, int length); bool LibraryCallKit::inline_native_newArray() { - int nargs = 2; Node* mirror = argument(0); Node* count_val = argument(1); - _sp += nargs; // set original stack for use by uncommon_trap - mirror = do_null_check(mirror, T_OBJECT); - _sp -= nargs; + mirror = null_check(mirror); // If mirror or obj is dead, only null-path is taken. if (stopped()) return true; @@ -3740,7 +3448,6 @@ bool LibraryCallKit::inline_native_newArray() { bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check); Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null, - nargs, result_reg, _slow_path); Node* normal_ctl = control(); Node* no_array_ctl = result_reg->in(_slow_path); @@ -3767,7 +3474,7 @@ bool LibraryCallKit::inline_native_newArray() { // Normal case: The array type has been cached in the java.lang.Class. // The following call works fine even if the array type is polymorphic. // It could be a dynamic mix of int[], boolean[], Object[], etc. - Node* obj = new_array(klass_node, count_val, nargs); + Node* obj = new_array(klass_node, count_val, 0); // no arguments to push result_reg->init_req(_normal_path, control()); result_val->init_req(_normal_path, obj); result_io ->init_req(_normal_path, i_o()); @@ -3777,23 +3484,18 @@ bool LibraryCallKit::inline_native_newArray() { // Return the combined state. set_i_o( _gvn.transform(result_io) ); set_all_memory( _gvn.transform(result_mem) ); - push_result(result_reg, result_val); - C->set_has_split_ifs(true); // Has chance for split-if optimization + C->set_has_split_ifs(true); // Has chance for split-if optimization + set_result(result_reg, result_val); return true; } //----------------------inline_native_getLength-------------------------- +// public static native int java.lang.reflect.Array.getLength(Object array); bool LibraryCallKit::inline_native_getLength() { if (too_many_traps(Deoptimization::Reason_intrinsic)) return false; - int nargs = 1; - Node* array = argument(0); - - _sp += nargs; // set original stack for use by uncommon_trap - array = do_null_check(array, T_OBJECT); - _sp -= nargs; - + Node* array = null_check(argument(0)); // If array is dead, only null-path is taken. if (stopped()) return true; @@ -3803,7 +3505,6 @@ bool LibraryCallKit::inline_native_getLength() { if (non_array != NULL) { PreserveJVMState pjvms(this); set_control(non_array); - _sp += nargs; // push the arguments back on the stack uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_maybe_recompile); } @@ -3813,19 +3514,21 @@ bool LibraryCallKit::inline_native_getLength() { // The works fine even if the array type is polymorphic. // It could be a dynamic mix of int[], boolean[], Object[], etc. - push( load_array_length(array) ); - - C->set_has_split_ifs(true); // Has chance for split-if optimization + Node* result = load_array_length(array); + C->set_has_split_ifs(true); // Has chance for split-if optimization + set_result(result); return true; } //------------------------inline_array_copyOf---------------------------- +// public static T[] java.util.Arrays.copyOf( U[] original, int newLength, Class newType); +// public static T[] java.util.Arrays.copyOfRange(U[] original, int from, int to, Class newType); bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { + return false; if (too_many_traps(Deoptimization::Reason_intrinsic)) return false; - // Restore the stack and pop off the arguments. - int nargs = 3 + (is_copyOfRange? 1: 0); + // Get the arguments. Node* original = argument(0); Node* start = is_copyOfRange? argument(1): intcon(0); Node* end = is_copyOfRange? argument(2): argument(1); @@ -3833,23 +3536,21 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { Node* newcopy; - //set the original stack and the reexecute bit for the interpreter to reexecute - //the bytecode that invokes Arrays.copyOf if deoptimization happens + // Set the original stack and the reexecute bit for the interpreter to reexecute + // the bytecode that invokes Arrays.copyOf if deoptimization happens. { PreserveReexecuteState preexecs(this); - _sp += nargs; jvms()->set_should_reexecute(true); - array_type_mirror = do_null_check(array_type_mirror, T_OBJECT); - original = do_null_check(original, T_OBJECT); + array_type_mirror = null_check(array_type_mirror); + original = null_check(original); // Check if a null path was taken unconditionally. if (stopped()) return true; Node* orig_length = load_array_length(original); - Node* klass_node = load_klass_from_mirror(array_type_mirror, false, 0, - NULL, 0); - klass_node = do_null_check(klass_node, T_OBJECT); + Node* klass_node = load_klass_from_mirror(array_type_mirror, false, NULL, 0); + klass_node = null_check(klass_node); RegionNode* bailout = new (C) RegionNode(1); record_for_igvn(bailout); @@ -3872,7 +3573,7 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { Node* length = end; if (_gvn.type(start) != TypeInt::ZERO) { - length = _gvn.transform( new (C) SubINode(end, start) ); + length = _gvn.transform(new (C) SubINode(end, start)); } // Bail out if length is negative. @@ -3883,19 +3584,18 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { if (bailout->req() > 1) { PreserveJVMState pjvms(this); - set_control( _gvn.transform(bailout) ); + set_control(_gvn.transform(bailout)); uncommon_trap(Deoptimization::Reason_intrinsic, Deoptimization::Action_maybe_recompile); } if (!stopped()) { - // How many elements will we copy from the original? // The answer is MinI(orig_length - start, length). - Node* orig_tail = _gvn.transform( new(C) SubINode(orig_length, start) ); + Node* orig_tail = _gvn.transform(new (C) SubINode(orig_length, start)); Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length); - newcopy = new_array(klass_node, length, 0); + newcopy = new_array(klass_node, length, 0); // no argments to push // Generate a direct call to the right arraycopy function(s). // We know the copy is disjoint but we might not know if the @@ -3910,14 +3610,12 @@ bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) { original, start, newcopy, intcon(0), moved, disjoint_bases, length_never_negative); } - } //original reexecute and sp are set back here - - if(!stopped()) { - push(newcopy); - } + } // original reexecute is set back here C->set_has_split_ifs(true); // Has chance for split-if optimization - + if (!stopped()) { + set_result(newcopy); + } return true; } @@ -3969,7 +3667,7 @@ LibraryCallKit::generate_method_call(vmIntrinsics::ID method_id, bool is_virtual SharedRuntime::get_resolve_static_call_stub(), method, bci()); } else if (is_virtual) { - null_check_receiver(method); + null_check_receiver(); int vtable_index = Method::invalid_vtable_index; if (UseInlineCaches) { // Suppress the vtable call @@ -3983,7 +3681,7 @@ LibraryCallKit::generate_method_call(vmIntrinsics::ID method_id, bool is_virtual SharedRuntime::get_resolve_virtual_call_stub(), method, vtable_index, bci()); } else { // neither virtual nor static: opt_virtual - null_check_receiver(method); + null_check_receiver(); slow_call = new(C) CallStaticJavaNode(tf, SharedRuntime::get_resolve_opt_virtual_call_stub(), method, bci()); @@ -4012,7 +3710,7 @@ bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) { Node* obj = NULL; if (!is_static) { // Check for hashing null object - obj = null_check_receiver(callee()); + obj = null_check_receiver(); if (stopped()) return true; // unconditionally null result_reg->init_req(_null_path, top()); result_val->init_req(_null_path, top()); @@ -4028,9 +3726,9 @@ bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) { // Unconditionally null? Then return right away. if (stopped()) { - set_control( result_reg->in(_null_path) ); + set_control( result_reg->in(_null_path)); if (!stopped()) - push( result_val ->in(_null_path) ); + set_result(result_val->in(_null_path)); return true; } @@ -4103,8 +3801,7 @@ bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) { if (!stopped()) { // No need for PreserveJVMState, because we're using up the present state. set_all_memory(init_mem); - vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode; - if (is_static) hashCode_id = vmIntrinsics::_identityHashCode; + vmIntrinsics::ID hashCode_id = is_static ? vmIntrinsics::_identityHashCode : vmIntrinsics::_hashCode; CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static); Node* slow_result = set_results_for_java_call(slow_call); // this->control() comes from set_results_for_java_call @@ -4117,48 +3814,38 @@ bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) { // Return the combined state. set_i_o( _gvn.transform(result_io) ); set_all_memory( _gvn.transform(result_mem) ); - push_result(result_reg, result_val); + set_result(result_reg, result_val); return true; } //---------------------------inline_native_getClass---------------------------- +// public final native Class java.lang.Object.getClass(); +// // Build special case code for calls to getClass on an object. bool LibraryCallKit::inline_native_getClass() { - Node* obj = null_check_receiver(callee()); + Node* obj = null_check_receiver(); if (stopped()) return true; - push( load_mirror_from_klass(load_object_klass(obj)) ); + set_result(load_mirror_from_klass(load_object_klass(obj))); return true; } //-----------------inline_native_Reflection_getCallerClass--------------------- +// public static native Class sun.reflect.Reflection.getCallerClass(int realFramesToSkip); +// // In the presence of deep enough inlining, getCallerClass() becomes a no-op. // // NOTE that this code must perform the same logic as // vframeStream::security_get_caller_frame in that it must skip // Method.invoke() and auxiliary frames. - - - - bool LibraryCallKit::inline_native_Reflection_getCallerClass() { - ciMethod* method = callee(); - #ifndef PRODUCT if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) { tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass"); } #endif - debug_only(int saved_sp = _sp); - - // Argument words: (int depth) - int nargs = 1; - - _sp += nargs; - Node* caller_depth_node = pop(); - - assert(saved_sp == _sp, "must have correct argument count"); + Node* caller_depth_node = argument(0); // The depth value must be a constant in order for the runtime call // to be eliminated. @@ -4230,7 +3917,8 @@ bool LibraryCallKit::inline_native_Reflection_getCallerClass() { tty->print_cr(" Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth); tty->print_cr(" JVM state at this point:"); for (int i = _depth; i >= 1; i--) { - tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8()); + ciMethod* m = jvms()->of_depth(i)->method(); + tty->print_cr(" %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8()); } } #endif @@ -4240,14 +3928,17 @@ bool LibraryCallKit::inline_native_Reflection_getCallerClass() { // Acquire method holder as java.lang.Class ciInstanceKlass* caller_klass = caller_jvms->method()->holder(); ciInstance* caller_mirror = caller_klass->java_mirror(); + // Push this as a constant - push(makecon(TypeInstPtr::make(caller_mirror))); + set_result(makecon(TypeInstPtr::make(caller_mirror))); + #ifndef PRODUCT if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) { tty->print_cr(" Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth); tty->print_cr(" JVM state at this point:"); for (int i = _depth; i >= 1; i--) { - tty->print_cr(" %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8()); + ciMethod* m = jvms()->of_depth(i)->method(); + tty->print_cr(" %d) %s.%s", i, m->holder()->name()->as_utf8(), m->name()->as_utf8()); } } #endif @@ -4283,36 +3974,23 @@ bool LibraryCallKit::is_method_invoke_or_aux_frame(JVMState* jvms) { } bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) { - // restore the arguments - _sp += arg_size(); + Node* arg = argument(0); + Node* result; switch (id) { - case vmIntrinsics::_floatToRawIntBits: - push(_gvn.transform( new (C) MoveF2INode(pop()))); - break; - - case vmIntrinsics::_intBitsToFloat: - push(_gvn.transform( new (C) MoveI2FNode(pop()))); - break; - - case vmIntrinsics::_doubleToRawLongBits: - push_pair(_gvn.transform( new (C) MoveD2LNode(pop_pair()))); - break; - - case vmIntrinsics::_longBitsToDouble: - push_pair(_gvn.transform( new (C) MoveL2DNode(pop_pair()))); - break; + case vmIntrinsics::_floatToRawIntBits: result = new (C) MoveF2INode(arg); break; + case vmIntrinsics::_intBitsToFloat: result = new (C) MoveI2FNode(arg); break; + case vmIntrinsics::_doubleToRawLongBits: result = new (C) MoveD2LNode(arg); break; + case vmIntrinsics::_longBitsToDouble: result = new (C) MoveL2DNode(arg); break; case vmIntrinsics::_doubleToLongBits: { - Node* value = pop_pair(); - // two paths (plus control) merge in a wood RegionNode *r = new (C) RegionNode(3); Node *phi = new (C) PhiNode(r, TypeLong::LONG); - Node *cmpisnan = _gvn.transform( new (C) CmpDNode(value, value)); + Node *cmpisnan = _gvn.transform(new (C) CmpDNode(arg, arg)); // Build the boolean node - Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) ); + Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne)); // Branch either way. // NaN case is less traveled, which makes all the difference. @@ -4330,35 +4008,30 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) { r->init_req(1, iftrue); // Else fall through - Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) ); + Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan)); set_control(iffalse); - phi->init_req(2, _gvn.transform( new (C) MoveD2LNode(value))); + phi->init_req(2, _gvn.transform(new (C) MoveD2LNode(arg))); r->init_req(2, iffalse); // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - Node* result = _gvn.transform(phi); - assert(result->bottom_type()->isa_long(), "must be"); - push_pair(result); - C->set_has_split_ifs(true); // Has chance for split-if optimization - + result = phi; + assert(result->bottom_type()->isa_long(), "must be"); break; } case vmIntrinsics::_floatToIntBits: { - Node* value = pop(); - // two paths (plus control) merge in a wood RegionNode *r = new (C) RegionNode(3); Node *phi = new (C) PhiNode(r, TypeInt::INT); - Node *cmpisnan = _gvn.transform( new (C) CmpFNode(value, value)); + Node *cmpisnan = _gvn.transform(new (C) CmpFNode(arg, arg)); // Build the boolean node - Node *bolisnan = _gvn.transform( new (C) BoolNode( cmpisnan, BoolTest::ne ) ); + Node *bolisnan = _gvn.transform(new (C) BoolNode(cmpisnan, BoolTest::ne)); // Branch either way. // NaN case is less traveled, which makes all the difference. @@ -4376,29 +4049,27 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) { r->init_req(1, iftrue); // Else fall through - Node *iffalse = _gvn.transform( new (C) IfFalseNode(opt_ifisnan) ); + Node *iffalse = _gvn.transform(new (C) IfFalseNode(opt_ifisnan)); set_control(iffalse); - phi->init_req(2, _gvn.transform( new (C) MoveF2INode(value))); + phi->init_req(2, _gvn.transform(new (C) MoveF2INode(arg))); r->init_req(2, iffalse); // Post merge set_control(_gvn.transform(r)); record_for_igvn(r); - Node* result = _gvn.transform(phi); - assert(result->bottom_type()->isa_int(), "must be"); - push(result); - C->set_has_split_ifs(true); // Has chance for split-if optimization - + result = phi; + assert(result->bottom_type()->isa_int(), "must be"); break; } default: - ShouldNotReachHere(); + fatal_unexpected_iid(id); + break; } - + set_result(_gvn.transform(result)); return true; } @@ -4409,23 +4080,19 @@ bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) { #endif //_LP64 //----------------------inline_unsafe_copyMemory------------------------- +// public native void sun.misc.Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes); bool LibraryCallKit::inline_unsafe_copyMemory() { if (callee()->is_static()) return false; // caller must have the capability! - int nargs = 1 + 5 + 3; // 5 args: (src: ptr,off, dst: ptr,off, size) - assert(signature()->size() == nargs-1, "copy has 5 arguments"); - null_check_receiver(callee()); // check then ignore argument(0) + null_check_receiver(); // null-check receiver if (stopped()) return true; C->set_has_unsafe_access(true); // Mark eventual nmethod as "unsafe". - Node* src_ptr = argument(1); - Node* src_off = ConvL2X(argument(2)); - assert(argument(3)->is_top(), "2nd half of long"); - Node* dst_ptr = argument(4); - Node* dst_off = ConvL2X(argument(5)); - assert(argument(6)->is_top(), "2nd half of long"); - Node* size = ConvL2X(argument(7)); - assert(argument(8)->is_top(), "2nd half of long"); + Node* src_ptr = argument(1); // type: oop + Node* src_off = ConvL2X(argument(2)); // type: long + Node* dst_ptr = argument(4); // type: oop + Node* dst_off = ConvL2X(argument(5)); // type: long + Node* size = ConvL2X(argument(7)); // type: long assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled"); @@ -4545,6 +4212,8 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b } //------------------------inline_native_clone---------------------------- +// protected native Object java.lang.Object.clone(); +// // Here are the simple edge cases: // null receiver => normal trap // virtual and clone was overridden => slow path to out-of-line clone @@ -4561,20 +4230,16 @@ void LibraryCallKit::copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, b // can be sharply typed as an object array, a type array, or an instance. // bool LibraryCallKit::inline_native_clone(bool is_virtual) { - int nargs = 1; PhiNode* result_val; - //set the original stack and the reexecute bit for the interpreter to reexecute - //the bytecode that invokes Object.clone if deoptimization happens + // Set the reexecute bit for the interpreter to reexecute + // the bytecode that invokes Object.clone if deoptimization happens. { PreserveReexecuteState preexecs(this); jvms()->set_should_reexecute(true); - //null_check_receiver will adjust _sp (push and pop) - Node* obj = null_check_receiver(callee()); + Node* obj = null_check_receiver(); if (stopped()) return true; - _sp += nargs; - Node* obj_klass = load_object_klass(obj); const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr(); const TypeOopPtr* toop = ((tklass != NULL) @@ -4611,7 +4276,7 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { set_control(array_ctl); Node* obj_length = load_array_length(obj); Node* obj_size = NULL; - Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); + Node* alloc_obj = new_array(obj_klass, obj_length, 0, &obj_size); // no arguments to push if (!use_ReduceInitialCardMarks()) { // If it is an oop array, it requires very special treatment, @@ -4711,10 +4376,9 @@ bool LibraryCallKit::inline_native_clone(bool is_virtual) { set_control( _gvn.transform(result_reg) ); set_i_o( _gvn.transform(result_i_o) ); set_all_memory( _gvn.transform(result_mem) ); - } //original reexecute and sp are set back here - - push(_gvn.transform(result_val)); + } // original reexecute is set back here + set_result(_gvn.transform(result_val)); return true; } @@ -4755,25 +4419,25 @@ address LibraryCallKit::basictype2arraycopy(BasicType t, //------------------------------inline_arraycopy----------------------- +// public static native void java.lang.System.arraycopy(Object src, int srcPos, +// Object dest, int destPos, +// int length); bool LibraryCallKit::inline_arraycopy() { - // Restore the stack and pop off the arguments. - int nargs = 5; // 2 oops, 3 ints, no size_t or long - assert(callee()->signature()->size() == nargs, "copy has 5 arguments"); - - Node *src = argument(0); - Node *src_offset = argument(1); - Node *dest = argument(2); - Node *dest_offset = argument(3); - Node *length = argument(4); + // Get the arguments. + Node* src = argument(0); // type: oop + Node* src_offset = argument(1); // type: int + Node* dest = argument(2); // type: oop + Node* dest_offset = argument(3); // type: int + Node* length = argument(4); // type: int // Compile time checks. If any of these checks cannot be verified at compile time, // we do not make a fast path for this call. Instead, we let the call remain as it // is. The checks we choose to mandate at compile time are: // // (1) src and dest are arrays. - const Type* src_type = src->Value(&_gvn); + const Type* src_type = src->Value(&_gvn); const Type* dest_type = dest->Value(&_gvn); - const TypeAryPtr* top_src = src_type->isa_aryptr(); + const TypeAryPtr* top_src = src_type->isa_aryptr(); const TypeAryPtr* top_dest = dest_type->isa_aryptr(); if (top_src == NULL || top_src->klass() == NULL || top_dest == NULL || top_dest->klass() == NULL) { @@ -4828,15 +4492,13 @@ bool LibraryCallKit::inline_arraycopy() { record_for_igvn(slow_region); // (3) operands must not be null - // We currently perform our null checks with the do_null_check routine. + // We currently perform our null checks with the null_check routine. // This means that the null exceptions will be reported in the caller // rather than (correctly) reported inside of the native arraycopy call. // This should be corrected, given time. We do our null check with the // stack pointer restored. - _sp += nargs; - src = do_null_check(src, T_ARRAY); - dest = do_null_check(dest, T_ARRAY); - _sp -= nargs; + src = null_check(src, T_ARRAY); + dest = null_check(dest, T_ARRAY); // (4) src_offset must not be negative. generate_negative_guard(src_offset, slow_region); @@ -5179,7 +4841,7 @@ LibraryCallKit::generate_arraycopy(const TypePtr* adr_type, slow_control = top(); if (slow_region != NULL) slow_control = _gvn.transform(slow_region); - debug_only(slow_region = (RegionNode*)badAddress); + DEBUG_ONLY(slow_region = (RegionNode*)badAddress); set_control(checked_control); if (!stopped()) { @@ -5674,33 +5336,22 @@ LibraryCallKit::generate_unchecked_arraycopy(const TypePtr* adr_type, } //----------------------------inline_reference_get---------------------------- - +// public T java.lang.ref.Reference.get(); bool LibraryCallKit::inline_reference_get() { - const int nargs = 1; // self - - guarantee(java_lang_ref_Reference::referent_offset > 0, - "should have already been set"); - - int referent_offset = java_lang_ref_Reference::referent_offset; - - // Restore the stack and pop off the argument - _sp += nargs; - Node *reference_obj = pop(); - - // Null check on self without removing any arguments. - _sp += nargs; - reference_obj = do_null_check(reference_obj, T_OBJECT); - _sp -= nargs;; + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "should have already been set"); + // Get the argument: + Node* reference_obj = null_check_receiver(); if (stopped()) return true; - Node *adr = basic_plus_adr(reference_obj, reference_obj, referent_offset); + Node* adr = basic_plus_adr(reference_obj, reference_obj, referent_offset); ciInstanceKlass* klass = env()->Object_klass(); const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass); Node* no_ctrl = NULL; - Node *result = make_load(no_ctrl, adr, object_type, T_OBJECT); + Node* result = make_load(no_ctrl, adr, object_type, T_OBJECT); // Use the pre-barrier to record the value in the referent field pre_barrier(false /* do_load */, @@ -5713,7 +5364,7 @@ bool LibraryCallKit::inline_reference_get() { // across safepoint since GC can change its value. insert_mem_bar(Op_MemBarCPUOrder); - push(result); + set_result(result); return true; } @@ -5770,15 +5421,11 @@ bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) { } if (stubAddr == NULL) return false; - // Restore the stack and pop off the arguments. - int nargs = 5; // this + 2 oop/offset combos - assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments"); - - Node *aescrypt_object = argument(0); - Node *src = argument(1); - Node *src_offset = argument(2); - Node *dest = argument(3); - Node *dest_offset = argument(4); + Node* aescrypt_object = argument(0); + Node* src = argument(1); + Node* src_offset = argument(2); + Node* dest = argument(3); + Node* dest_offset = argument(4); // (1) src and dest are arrays. const Type* src_type = src->Value(&_gvn); @@ -5829,16 +5476,12 @@ bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) { } if (stubAddr == NULL) return false; - - // Restore the stack and pop off the arguments. - int nargs = 6; // this + oop/offset + len + oop/offset - assert(callee()->signature()->size() == nargs-1, "wrong number of arguments"); - Node *cipherBlockChaining_object = argument(0); - Node *src = argument(1); - Node *src_offset = argument(2); - Node *len = argument(3); - Node *dest = argument(4); - Node *dest_offset = argument(5); + Node* cipherBlockChaining_object = argument(0); + Node* src = argument(1); + Node* src_offset = argument(2); + Node* len = argument(3); + Node* dest = argument(4); + Node* dest_offset = argument(5); // (1) src and dest are arrays. const Type* src_type = src->Value(&_gvn); @@ -5920,11 +5563,8 @@ Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) // Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) { // First, check receiver for NULL since it is virtual method. - int nargs = arg_size(); Node* objCBC = argument(0); - _sp += nargs; - objCBC = do_null_check(objCBC, T_OBJECT); - _sp -= nargs; + objCBC = null_check(objCBC); if (stopped()) return NULL; // Always NULL @@ -5948,9 +5588,7 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt } ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass(); - _sp += nargs; // gen_instanceof might do an uncommon trap Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt))); - _sp -= nargs; Node* cmp_instof = _gvn.transform(new (C) CmpINode(instof, intcon(1))); Node* bool_instof = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne)); @@ -5966,7 +5604,7 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt RegionNode* region = new(C) RegionNode(3); region->init_req(1, instof_false); Node* src = argument(1); - Node *dest = argument(4); + Node* dest = argument(4); Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest)); Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq)); Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN); @@ -5974,7 +5612,4 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt record_for_igvn(region); return _gvn.transform(region); - } - - diff --git a/hotspot/src/share/vm/opto/locknode.cpp b/hotspot/src/share/vm/opto/locknode.cpp index 1e6a6c20c84..26c6291686b 100644 --- a/hotspot/src/share/vm/opto/locknode.cpp +++ b/hotspot/src/share/vm/opto/locknode.cpp @@ -165,7 +165,7 @@ void Parse::do_monitor_enter() { kill_dead_locals(); // Null check; get casted pointer. - Node *obj = do_null_check(peek(), T_OBJECT); + Node* obj = null_check(peek()); // Check for locking null object if (stopped()) return; diff --git a/hotspot/src/share/vm/opto/loopTransform.cpp b/hotspot/src/share/vm/opto/loopTransform.cpp index 53d0fffdc31..259e10a56df 100644 --- a/hotspot/src/share/vm/opto/loopTransform.cpp +++ b/hotspot/src/share/vm/opto/loopTransform.cpp @@ -269,10 +269,10 @@ void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) { bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const { Node *test = ((IdealLoopTree*)this)->tail(); int body_size = ((IdealLoopTree*)this)->_body.size(); - int uniq = phase->C->unique(); + int live_node_count = phase->C->live_nodes(); // Peeling does loop cloning which can result in O(N^2) node construction if( body_size > 255 /* Prevent overflow for large body_size */ - || (body_size * body_size + uniq > MaxNodeLimit) ) { + || (body_size * body_size + live_node_count > MaxNodeLimit) ) { return false; // too large to safely clone } while( test != _head ) { // Scan till run off top of loop @@ -601,7 +601,7 @@ bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const { return false; if (new_body_size > unroll_limit || // Unrolling can result in a large amount of node construction - new_body_size >= MaxNodeLimit - phase->C->unique()) { + new_body_size >= MaxNodeLimit - (uint) phase->C->live_nodes()) { return false; } @@ -2268,7 +2268,7 @@ bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_ // Skip next optimizations if running low on nodes. Note that // policy_unswitching and policy_maximally_unroll have this check. - uint nodes_left = MaxNodeLimit - phase->C->unique(); + uint nodes_left = MaxNodeLimit - (uint) phase->C->live_nodes(); if ((2 * _body.size()) > nodes_left) { return true; } diff --git a/hotspot/src/share/vm/opto/loopUnswitch.cpp b/hotspot/src/share/vm/opto/loopUnswitch.cpp index fc3380f4909..047c00e8021 100644 --- a/hotspot/src/share/vm/opto/loopUnswitch.cpp +++ b/hotspot/src/share/vm/opto/loopUnswitch.cpp @@ -59,7 +59,7 @@ bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const { if (!_head->is_Loop()) { return false; } - uint nodes_left = MaxNodeLimit - phase->C->unique(); + uint nodes_left = MaxNodeLimit - phase->C->live_nodes(); if (2 * _body.size() > nodes_left) { return false; // Too speculative if running low on nodes. } diff --git a/hotspot/src/share/vm/opto/loopopts.cpp b/hotspot/src/share/vm/opto/loopopts.cpp index 015a063a815..31f080dbf7b 100644 --- a/hotspot/src/share/vm/opto/loopopts.cpp +++ b/hotspot/src/share/vm/opto/loopopts.cpp @@ -729,7 +729,7 @@ static bool merge_point_too_heavy(Compile* C, Node* region) { for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) { weight += region->fast_out(i)->outcnt(); } - int nodes_left = MaxNodeLimit - C->unique(); + int nodes_left = MaxNodeLimit - C->live_nodes(); if (weight * 8 > nodes_left) { #ifndef PRODUCT if (PrintOpto) diff --git a/hotspot/src/share/vm/opto/macro.cpp b/hotspot/src/share/vm/opto/macro.cpp index 16987d97468..2c45140816e 100644 --- a/hotspot/src/share/vm/opto/macro.cpp +++ b/hotspot/src/share/vm/opto/macro.cpp @@ -2262,7 +2262,7 @@ void PhaseMacroExpand::expand_lock_node(LockNode *lock) { Node *slow_ctrl = _fallthroughproj->clone(); transform_later(slow_ctrl); _igvn.hash_delete(_fallthroughproj); - _fallthroughproj->disconnect_inputs(NULL); + _fallthroughproj->disconnect_inputs(NULL, C); region->init_req(1, slow_ctrl); // region inputs are now complete transform_later(region); @@ -2327,7 +2327,7 @@ void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) { Node *slow_ctrl = _fallthroughproj->clone(); transform_later(slow_ctrl); _igvn.hash_delete(_fallthroughproj); - _fallthroughproj->disconnect_inputs(NULL); + _fallthroughproj->disconnect_inputs(NULL, C); region->init_req(1, slow_ctrl); // region inputs are now complete transform_later(region); diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index 53924d0c9a0..0c6f5ea8ec2 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -342,6 +342,7 @@ void Matcher::match( ) { // Reset node counter so MachNodes start with _idx at 0 int nodes = C->unique(); // save value C->set_unique(0); + C->reset_dead_node_list(); // Recursively match trees from old space into new space. // Correct leaves of new-space Nodes; they point to old-space. diff --git a/hotspot/src/share/vm/opto/node.cpp b/hotspot/src/share/vm/opto/node.cpp index cc35098e823..4c83d906003 100644 --- a/hotspot/src/share/vm/opto/node.cpp +++ b/hotspot/src/share/vm/opto/node.cpp @@ -57,7 +57,7 @@ void Node::verify_construction() { int new_debug_idx = old_debug_idx+1; if (new_debug_idx > 0) { // Arrange that the lowest five decimal digits of _debug_idx - // will repeat thos of _idx. In case this is somehow pathological, + // will repeat those of _idx. In case this is somehow pathological, // we continue to assign negative numbers (!) consecutively. const int mod = 100000; int bump = (int)(_idx - new_debug_idx) % mod; @@ -67,7 +67,7 @@ void Node::verify_construction() { } Compile::set_debug_idx(new_debug_idx); set_debug_idx( new_debug_idx ); - assert(Compile::current()->unique() < (uint)MaxNodeLimit, "Node limit exceeded"); + assert(Compile::current()->unique() < (UINT_MAX - 1), "Node limit exceeded UINT_MAX"); if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) { tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx); BREAKPOINT; @@ -802,7 +802,7 @@ int Node::replace_edge(Node* old, Node* neww) { //-------------------------disconnect_inputs----------------------------------- // NULL out all inputs to eliminate incoming Def-Use edges. // Return the number of edges between 'n' and 'this' -int Node::disconnect_inputs(Node *n) { +int Node::disconnect_inputs(Node *n, Compile* C) { int edges_to_n = 0; uint cnt = req(); @@ -824,6 +824,9 @@ int Node::disconnect_inputs(Node *n) { // Node::destruct requires all out edges be deleted first // debug_only(destruct();) // no reuse benefit expected + if (edges_to_n == 0) { + C->record_dead_node(_idx); + } return edges_to_n; } diff --git a/hotspot/src/share/vm/opto/node.hpp b/hotspot/src/share/vm/opto/node.hpp index f746f2e42af..f0c1df3a8b0 100644 --- a/hotspot/src/share/vm/opto/node.hpp +++ b/hotspot/src/share/vm/opto/node.hpp @@ -410,7 +410,7 @@ protected: int replace_edge(Node* old, Node* neww); // NULL out all inputs to eliminate incoming Def-Use edges. // Return the number of edges between 'n' and 'this' - int disconnect_inputs(Node *n); + int disconnect_inputs(Node *n, Compile *c); // Quickly, return true if and only if I am Compile::current()->top(). bool is_top() const { @@ -458,9 +458,9 @@ public: void replace_by(Node* new_node); // Globally replace this node by a given new node, updating all uses // and cutting input edges of old node. - void subsume_by(Node* new_node) { + void subsume_by(Node* new_node, Compile* c) { replace_by(new_node); - disconnect_inputs(NULL); + disconnect_inputs(NULL, c); } void set_req_X( uint i, Node *n, PhaseIterGVN *igvn ); // Find the one non-null required input. RegionNode only diff --git a/hotspot/src/share/vm/opto/output.cpp b/hotspot/src/share/vm/opto/output.cpp index f16bbc6fb55..b0d7a9948ea 100644 --- a/hotspot/src/share/vm/opto/output.cpp +++ b/hotspot/src/share/vm/opto/output.cpp @@ -513,7 +513,7 @@ void Compile::shorten_branches(uint* blk_starts, int& code_size, int& reloc_size } adjust_block_start += diff; b->_nodes.map(idx, replacement); - mach->subsume_by(replacement); + mach->subsume_by(replacement, C); mach = replacement; progress = true; @@ -1425,7 +1425,7 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { jmp_rule[i] = mach->rule(); #endif b->_nodes.map(j, replacement); - mach->subsume_by(replacement); + mach->subsume_by(replacement, C); n = replacement; mach = replacement; } diff --git a/hotspot/src/share/vm/opto/parse1.cpp b/hotspot/src/share/vm/opto/parse1.cpp index d0b52f554ec..7009352917f 100644 --- a/hotspot/src/share/vm/opto/parse1.cpp +++ b/hotspot/src/share/vm/opto/parse1.cpp @@ -601,8 +601,8 @@ Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses) set_map(entry_map); do_exits(); - if (log) log->done("parse nodes='%d' memory='%d'", - C->unique(), C->node_arena()->used()); + if (log) log->done("parse nodes='%d' live='%d' memory='%d'", + C->unique(), C->live_nodes(), C->node_arena()->used()); } //---------------------------do_all_blocks------------------------------------- @@ -1008,7 +1008,7 @@ SafePointNode* Parse::create_entry_map() { // If this is an inlined method, we may have to do a receiver null check. if (_caller->has_method() && is_normal_parse() && !method()->is_static()) { GraphKit kit(_caller); - kit.null_check_receiver(method()); + kit.null_check_receiver_before_call(method()); _caller = kit.transfer_exceptions_into_jvms(); if (kit.stopped()) { _exits.add_exception_states_from(_caller); @@ -1398,7 +1398,7 @@ void Parse::do_one_block() { #ifdef ASSERT int pre_bc_sp = sp(); int inputs, depth; - bool have_se = !stopped() && compute_stack_effects(inputs, depth, /*for_parse*/ true); + bool have_se = !stopped() && compute_stack_effects(inputs, depth); assert(!have_se || pre_bc_sp >= inputs, err_msg_res("have enough stack to execute this BC: pre_bc_sp=%d, inputs=%d", pre_bc_sp, inputs)); #endif //ASSERT diff --git a/hotspot/src/share/vm/opto/parse2.cpp b/hotspot/src/share/vm/opto/parse2.cpp index af91156dad3..9a14c9a58f9 100644 --- a/hotspot/src/share/vm/opto/parse2.cpp +++ b/hotspot/src/share/vm/opto/parse2.cpp @@ -48,7 +48,7 @@ void Parse::array_load(BasicType elem_type) { const Type* elem = Type::TOP; Node* adr = array_addressing(elem_type, 0, &elem); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); Node* ld = make_load(control(), adr, elem, elem_type, adr_type); push(ld); @@ -60,7 +60,7 @@ void Parse::array_store(BasicType elem_type) { Node* adr = array_addressing(elem_type, 1); if (stopped()) return; // guaranteed null or range check Node* val = pop(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type); store_to_memory(control(), adr, val, elem_type, adr_type); } @@ -73,7 +73,7 @@ Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) { Node *ary = peek(1+vals); // in case of exception // Null check the array base, with correct stack contents - ary = do_null_check(ary, T_ARRAY); + ary = null_check(ary, T_ARRAY); // Compile-time detect of null-exception? if (stopped()) return top(); @@ -681,7 +681,7 @@ void Parse::l2f() { void Parse::do_irem() { // Must keep both values on the expression-stack during null-check - do_null_check(peek(), T_INT); + zero_check_int(peek()); // Compile-time detect of null-exception? if (stopped()) return; @@ -958,7 +958,7 @@ inline int Parse::repush_if_args() { DEBUG_ONLY(sync_jvms()); // argument(n) requires a synced jvms assert(argument(0) != NULL, "must exist"); assert(bc_depth == 1 || argument(1) != NULL, "two must exist"); - _sp += bc_depth; + inc_sp(bc_depth); return bc_depth; } @@ -1581,8 +1581,8 @@ void Parse::do_one_bytecode() { set_pair_local( iter().get_index(), dstore_rounding(pop_pair()) ); break; - case Bytecodes::_pop: _sp -= 1; break; - case Bytecodes::_pop2: _sp -= 2; break; + case Bytecodes::_pop: dec_sp(1); break; + case Bytecodes::_pop2: dec_sp(2); break; case Bytecodes::_swap: a = pop(); b = pop(); @@ -1650,7 +1650,7 @@ void Parse::do_one_bytecode() { case Bytecodes::_arraylength: { // Must do null-check with value on expression stack - Node *ary = do_null_check(peek(), T_ARRAY); + Node *ary = null_check(peek(), T_ARRAY); // Compile-time detect of null-exception? if (stopped()) return; a = pop(); @@ -1667,15 +1667,15 @@ void Parse::do_one_bytecode() { case Bytecodes::_laload: { a = array_addressing(T_LONG, 0); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index - push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); + dec_sp(2); // Pop array and index + push_pair(make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS)); break; } case Bytecodes::_daload: { a = array_addressing(T_DOUBLE, 0); if (stopped()) return; // guaranteed null or range check - _sp -= 2; // Pop array and index - push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); + dec_sp(2); // Pop array and index + push_pair(make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES)); break; } case Bytecodes::_bastore: array_store(T_BYTE); break; @@ -1699,7 +1699,7 @@ void Parse::do_one_bytecode() { a = array_addressing(T_LONG, 2); if (stopped()) return; // guaranteed null or range check c = pop_pair(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS); break; } @@ -1707,7 +1707,7 @@ void Parse::do_one_bytecode() { a = array_addressing(T_DOUBLE, 2); if (stopped()) return; // guaranteed null or range check c = pop_pair(); - _sp -= 2; // Pop array and index + dec_sp(2); // Pop array and index c = dstore_rounding(c); store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES); break; @@ -1733,7 +1733,7 @@ void Parse::do_one_bytecode() { break; case Bytecodes::_idiv: // Must keep both values on the expression-stack during null-check - do_null_check(peek(), T_INT); + zero_check_int(peek()); // Compile-time detect of null-exception? if (stopped()) return; b = pop(); @@ -2041,7 +2041,7 @@ void Parse::do_one_bytecode() { case Bytecodes::_lrem: // Must keep both values on the expression-stack during null-check assert(peek(0) == top(), "long word order"); - do_null_check(peek(1), T_LONG); + zero_check_long(peek(1)); // Compile-time detect of null-exception? if (stopped()) return; b = pop_pair(); @@ -2053,7 +2053,7 @@ void Parse::do_one_bytecode() { case Bytecodes::_ldiv: // Must keep both values on the expression-stack during null-check assert(peek(0) == top(), "long word order"); - do_null_check(peek(1), T_LONG); + zero_check_long(peek(1)); // Compile-time detect of null-exception? if (stopped()) return; b = pop_pair(); @@ -2175,7 +2175,7 @@ void Parse::do_one_bytecode() { case Bytecodes::_athrow: // null exception oop throws NULL pointer exception - do_null_check(peek(), T_OBJECT); + null_check(peek()); if (stopped()) return; // Hook the thrown exception directly to subsequent handlers. if (BailoutToInterpreterForThrows) { diff --git a/hotspot/src/share/vm/opto/parse3.cpp b/hotspot/src/share/vm/opto/parse3.cpp index 119fe7d1922..0f92b53f135 100644 --- a/hotspot/src/share/vm/opto/parse3.cpp +++ b/hotspot/src/share/vm/opto/parse3.cpp @@ -116,7 +116,7 @@ void Parse::do_field_access(bool is_get, bool is_field) { Node* obj; if (is_field) { int obj_depth = is_get ? 0 : field->type()->size(); - obj = do_null_check(peek(obj_depth), T_OBJECT); + obj = null_check(peek(obj_depth)); // Compile-time detect of null-exception? if (stopped()) return; @@ -126,11 +126,11 @@ void Parse::do_field_access(bool is_get, bool is_field) { #endif if (is_get) { - --_sp; // pop receiver before getting + (void) pop(); // pop receiver before getting do_get_xxx(obj, field, is_field); } else { do_put_xxx(obj, field, is_field); - --_sp; // pop receiver after putting + (void) pop(); // pop receiver after putting } } else { const TypeInstPtr* tip = TypeInstPtr::make(field_holder->java_mirror()); @@ -230,7 +230,7 @@ void Parse::do_get_xxx(Node* obj, ciField* field, bool is_field) { } // If there is going to be a trap, put it at the next bytecode: set_bci(iter().next_bci()); - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); set_bci(iter().cur_bci()); // put it back } @@ -463,7 +463,7 @@ void Parse::do_multianewarray() { // Note: the reexecute bit will be set in GraphKit::add_safepoint_edges() // when AllocateArray node for newarray is created. { PreserveReexecuteState preexecs(this); - _sp += ndimensions; + inc_sp(ndimensions); // Pass 0 as nargs since uncommon trap code does not need to restore stack. obj = expand_multianewarray(array_klass, &length[0], ndimensions, 0); } //original reexecute and sp are set back here @@ -492,7 +492,7 @@ void Parse::do_multianewarray() { // Create a java array for dimension sizes Node* dims = NULL; { PreserveReexecuteState preexecs(this); - _sp += ndimensions; + inc_sp(ndimensions); Node* dims_array_klass = makecon(TypeKlassPtr::make(ciArrayKlass::make(ciType::make(T_INT)))); dims = new_array(dims_array_klass, intcon(ndimensions), 0); diff --git a/hotspot/src/share/vm/opto/parseHelper.cpp b/hotspot/src/share/vm/opto/parseHelper.cpp index 19b0f513366..f2a1bd2bef4 100644 --- a/hotspot/src/share/vm/opto/parseHelper.cpp +++ b/hotspot/src/share/vm/opto/parseHelper.cpp @@ -84,7 +84,7 @@ void Parse::do_checkcast() { C->log()->identify(tp->klass())); } } - do_null_assert(obj, T_OBJECT); + null_assert(obj); assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" ); if (!stopped()) { profile_null_checkcast(); @@ -116,7 +116,7 @@ void Parse::do_instanceof() { C->log()->elem("assert_null reason='instanceof' klass='%d'", C->log()->identify(klass)); } - do_null_assert(peek(), T_OBJECT); + null_assert(peek()); assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" ); if (!stopped()) { // The object is now known to be null. @@ -139,10 +139,10 @@ void Parse::do_instanceof() { // pull array from stack and check that the store is valid void Parse::array_store_check() { - // Shorthand access to array store elements - Node *obj = stack(_sp-1); - Node *idx = stack(_sp-2); - Node *ary = stack(_sp-3); + // Shorthand access to array store elements without popping them. + Node *obj = peek(0); + Node *idx = peek(1); + Node *ary = peek(2); if (_gvn.type(obj) == TypePtr::NULL_PTR) { // There's never a type check on null values. diff --git a/hotspot/src/share/vm/opto/phaseX.cpp b/hotspot/src/share/vm/opto/phaseX.cpp index 849327cf8bb..f2a44f0e7a7 100644 --- a/hotspot/src/share/vm/opto/phaseX.cpp +++ b/hotspot/src/share/vm/opto/phaseX.cpp @@ -383,6 +383,8 @@ PhaseRemoveUseless::PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklis // Identify nodes that are reachable from below, useful. C->identify_useful_nodes(_useful); + // Update dead node list + C->update_dead_node_list(_useful); // Remove all useless nodes from PhaseValues' recorded types // Must be done before disconnecting nodes to preserve hash-table-invariant @@ -1190,7 +1192,7 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) { } } } - + C->record_dead_node(dead->_idx); if (dead->is_macro()) { C->remove_macro_node(dead); } @@ -1199,6 +1201,11 @@ void PhaseIterGVN::remove_globally_dead_node( Node *dead ) { continue; } } + // Constant node that has no out-edges and has only one in-edge from + // root is usually dead. However, sometimes reshaping walk makes + // it reachable by adding use edges. So, we will NOT count Con nodes + // as dead to be conservative about the dead node count at any + // given time. } // Aggressively kill globally dead uses diff --git a/hotspot/src/share/vm/opto/postaloc.cpp b/hotspot/src/share/vm/opto/postaloc.cpp index 912e07704e7..a27145b5c7b 100644 --- a/hotspot/src/share/vm/opto/postaloc.cpp +++ b/hotspot/src/share/vm/opto/postaloc.cpp @@ -146,7 +146,7 @@ int PhaseChaitin::yank_if_dead_recurse(Node *old, Node *orig_old, Block *current } } // Disconnect control and remove precedence edges if any exist - old->disconnect_inputs(NULL); + old->disconnect_inputs(NULL, C); } return blk_adjust; } @@ -513,7 +513,7 @@ void PhaseChaitin::post_allocate_copy_removal() { b->_nodes.remove(j--); phi_dex--; _cfg._bbs.map(phi->_idx,NULL); phi->replace_by(u); - phi->disconnect_inputs(NULL); + phi->disconnect_inputs(NULL, C); continue; } // Note that if value[pidx] exists, then we merged no new values here diff --git a/hotspot/src/share/vm/opto/reg_split.cpp b/hotspot/src/share/vm/opto/reg_split.cpp index fe720e89334..1695dee6008 100644 --- a/hotspot/src/share/vm/opto/reg_split.cpp +++ b/hotspot/src/share/vm/opto/reg_split.cpp @@ -747,7 +747,7 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) { if( i >= cnt ) { // Found one unique input assert(Find_id(n) == Find_id(u), "should be the same lrg"); n->replace_by(u); // Then replace with unique input - n->disconnect_inputs(NULL); + n->disconnect_inputs(NULL, C); b->_nodes.remove(insidx); insidx--; b->_ihrp_index--; diff --git a/hotspot/src/share/vm/opto/stringopts.cpp b/hotspot/src/share/vm/opto/stringopts.cpp index 6be806acc12..c471887d881 100644 --- a/hotspot/src/share/vm/opto/stringopts.cpp +++ b/hotspot/src/share/vm/opto/stringopts.cpp @@ -241,13 +241,13 @@ class StringConcat : public ResourceObj { _stringopts->gvn()->transform(call); C->gvn_replace_by(uct, call); - uct->disconnect_inputs(NULL); + uct->disconnect_inputs(NULL, C); } } void cleanup() { // disconnect the hook node - _arguments->disconnect_inputs(NULL); + _arguments->disconnect_inputs(NULL, _stringopts->C); } }; @@ -358,7 +358,7 @@ void StringConcat::eliminate_initialize(InitializeNode* init) { C->gvn_replace_by(mem_proj, mem); } C->gvn_replace_by(init, C->top()); - init->disconnect_inputs(NULL); + init->disconnect_inputs(NULL, C); } Node_List PhaseStringOpts::collect_toString_calls() { @@ -1477,6 +1477,6 @@ void PhaseStringOpts::replace_string_concat(StringConcat* sc) { kit.replace_call(sc->end(), result); // Unhook any hook nodes - string_sizes->disconnect_inputs(NULL); + string_sizes->disconnect_inputs(NULL, C); sc->cleanup(); } diff --git a/hotspot/src/share/vm/opto/type.hpp b/hotspot/src/share/vm/opto/type.hpp index 39bbf0306a9..7868b2f7856 100644 --- a/hotspot/src/share/vm/opto/type.hpp +++ b/hotspot/src/share/vm/opto/type.hpp @@ -242,8 +242,10 @@ public: const TypeInt *isa_int() const; // Returns NULL if not an Int const TypeLong *is_long() const; const TypeLong *isa_long() const; // Returns NULL if not a Long + const TypeD *isa_double() const; // Returns NULL if not a Double{Top,Con,Bot} const TypeD *is_double_constant() const; // Asserts it is a DoubleCon const TypeD *isa_double_constant() const; // Returns NULL if not a DoubleCon + const TypeF *isa_float() const; // Returns NULL if not a Float{Top,Con,Bot} const TypeF *is_float_constant() const; // Asserts it is a FloatCon const TypeF *isa_float_constant() const; // Returns NULL if not a FloatCon const TypeTuple *is_tuple() const; // Collection of fields, NOT a pointer @@ -1320,24 +1322,6 @@ inline double Type::getd() const { return ((TypeD*)this)->_d; } -inline const TypeF *Type::is_float_constant() const { - assert( _base == FloatCon, "Not a Float" ); - return (TypeF*)this; -} - -inline const TypeF *Type::isa_float_constant() const { - return ( _base == FloatCon ? (TypeF*)this : NULL); -} - -inline const TypeD *Type::is_double_constant() const { - assert( _base == DoubleCon, "Not a Double" ); - return (TypeD*)this; -} - -inline const TypeD *Type::isa_double_constant() const { - return ( _base == DoubleCon ? (TypeD*)this : NULL); -} - inline const TypeInt *Type::is_int() const { assert( _base == Int, "Not an Int" ); return (TypeInt*)this; @@ -1356,6 +1340,36 @@ inline const TypeLong *Type::isa_long() const { return ( _base == Long ? (TypeLong*)this : NULL); } +inline const TypeF *Type::isa_float() const { + return ((_base == FloatTop || + _base == FloatCon || + _base == FloatBot) ? (TypeF*)this : NULL); +} + +inline const TypeF *Type::is_float_constant() const { + assert( _base == FloatCon, "Not a Float" ); + return (TypeF*)this; +} + +inline const TypeF *Type::isa_float_constant() const { + return ( _base == FloatCon ? (TypeF*)this : NULL); +} + +inline const TypeD *Type::isa_double() const { + return ((_base == DoubleTop || + _base == DoubleCon || + _base == DoubleBot) ? (TypeD*)this : NULL); +} + +inline const TypeD *Type::is_double_constant() const { + assert( _base == DoubleCon, "Not a Double" ); + return (TypeD*)this; +} + +inline const TypeD *Type::isa_double_constant() const { + return ( _base == DoubleCon ? (TypeD*)this : NULL); +} + inline const TypeTuple *Type::is_tuple() const { assert( _base == Tuple, "Not a Tuple" ); return (TypeTuple*)this; diff --git a/hotspot/src/share/vm/runtime/atomic.cpp b/hotspot/src/share/vm/runtime/atomic.cpp index 80780d78066..dbb66b2f63f 100644 --- a/hotspot/src/share/vm/runtime/atomic.cpp +++ b/hotspot/src/share/vm/runtime/atomic.cpp @@ -36,36 +36,8 @@ #ifdef TARGET_OS_FAMILY_bsd # include "os_bsd.inline.hpp" #endif -#ifdef TARGET_OS_ARCH_linux_x86 -# include "atomic_linux_x86.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_linux_sparc -# include "atomic_linux_sparc.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_linux_zero -# include "atomic_linux_zero.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_solaris_x86 -# include "atomic_solaris_x86.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_solaris_sparc -# include "atomic_solaris_sparc.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_windows_x86 -# include "atomic_windows_x86.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_linux_arm -# include "atomic_linux_arm.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_linux_ppc -# include "atomic_linux_ppc.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_bsd_x86 -# include "atomic_bsd_x86.inline.hpp" -#endif -#ifdef TARGET_OS_ARCH_bsd_zero -# include "atomic_bsd_zero.inline.hpp" -#endif + +#include "runtime/atomic.inline.hpp" jbyte Atomic::cmpxchg(jbyte exchange_value, volatile jbyte* dest, jbyte compare_value) { assert(sizeof(jbyte) == 1, "assumption."); diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp index 1d5d721f4b1..0c96816d794 100644 --- a/hotspot/src/share/vm/runtime/atomic.hpp +++ b/hotspot/src/share/vm/runtime/atomic.hpp @@ -30,60 +30,59 @@ class Atomic : AllStatic { public: // Atomically store to a location - static void store (jbyte store_value, jbyte* dest); - static void store (jshort store_value, jshort* dest); - static void store (jint store_value, jint* dest); - static void store (jlong store_value, jlong* dest); - static void store_ptr(intptr_t store_value, intptr_t* dest); - static void store_ptr(void* store_value, void* dest); + inline static void store (jbyte store_value, jbyte* dest); + inline static void store (jshort store_value, jshort* dest); + inline static void store (jint store_value, jint* dest); + inline static void store (jlong store_value, jlong* dest); + inline static void store_ptr(intptr_t store_value, intptr_t* dest); + inline static void store_ptr(void* store_value, void* dest); - static void store (jbyte store_value, volatile jbyte* dest); - static void store (jshort store_value, volatile jshort* dest); - static void store (jint store_value, volatile jint* dest); - static void store (jlong store_value, volatile jlong* dest); - static void store_ptr(intptr_t store_value, volatile intptr_t* dest); - static void store_ptr(void* store_value, volatile void* dest); + inline static void store (jbyte store_value, volatile jbyte* dest); + inline static void store (jshort store_value, volatile jshort* dest); + inline static void store (jint store_value, volatile jint* dest); + inline static void store (jlong store_value, volatile jlong* dest); + inline static void store_ptr(intptr_t store_value, volatile intptr_t* dest); + inline static void store_ptr(void* store_value, volatile void* dest); - static jlong load(volatile jlong* src); + inline static jlong load(volatile jlong* src); // Atomically add to a location, return updated value - static jint add (jint add_value, volatile jint* dest); - static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest); - static void* add_ptr(intptr_t add_value, volatile void* dest); + inline static jint add (jint add_value, volatile jint* dest); + inline static intptr_t add_ptr(intptr_t add_value, volatile intptr_t* dest); + inline static void* add_ptr(intptr_t add_value, volatile void* dest); - static jlong add (jlong add_value, volatile jlong* dest); + static jlong add (jlong add_value, volatile jlong* dest); // Atomically increment location - static void inc (volatile jint* dest); - static void inc_ptr(volatile intptr_t* dest); - static void inc_ptr(volatile void* dest); + inline static void inc (volatile jint* dest); + inline static void inc_ptr(volatile intptr_t* dest); + inline static void inc_ptr(volatile void* dest); // Atomically decrement a location - static void dec (volatile jint* dest); - static void dec_ptr(volatile intptr_t* dest); - static void dec_ptr(volatile void* dest); + inline static void dec (volatile jint* dest); + inline static void dec_ptr(volatile intptr_t* dest); + inline static void dec_ptr(volatile void* dest); // Performs atomic exchange of *dest with exchange_value. Returns old prior value of *dest. - static jint xchg(jint exchange_value, volatile jint* dest); - static unsigned int xchg(unsigned int exchange_value, - volatile unsigned int* dest); + inline static jint xchg(jint exchange_value, volatile jint* dest); + static unsigned int xchg(unsigned int exchange_value, volatile unsigned int* dest); - static intptr_t xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest); - static void* xchg_ptr(void* exchange_value, volatile void* dest); + inline static intptr_t xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest); + inline static void* xchg_ptr(void* exchange_value, volatile void* dest); // Performs atomic compare of *dest and compare_value, and exchanges *dest with exchange_value // if the comparison succeeded. Returns prior value of *dest. Guarantees a two-way memory // barrier across the cmpxchg. I.e., it's really a 'fence_cmpxchg_acquire'. - static jbyte cmpxchg (jbyte exchange_value, volatile jbyte* dest, jbyte compare_value); - static jint cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value); - static jlong cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value); + static jbyte cmpxchg (jbyte exchange_value, volatile jbyte* dest, jbyte compare_value); + inline static jint cmpxchg (jint exchange_value, volatile jint* dest, jint compare_value); + inline static jlong cmpxchg (jlong exchange_value, volatile jlong* dest, jlong compare_value); - static unsigned int cmpxchg(unsigned int exchange_value, - volatile unsigned int* dest, - unsigned int compare_value); + static unsigned int cmpxchg(unsigned int exchange_value, + volatile unsigned int* dest, + unsigned int compare_value); - static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value); - static void* cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value); + inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value); + inline static void* cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value); }; #endif // SHARE_VM_RUNTIME_ATOMIC_HPP diff --git a/hotspot/src/share/vm/runtime/atomic.inline.hpp b/hotspot/src/share/vm/runtime/atomic.inline.hpp new file mode 100644 index 00000000000..e89bf0c0887 --- /dev/null +++ b/hotspot/src/share/vm/runtime/atomic.inline.hpp @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP +#define SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP + +#include "runtime/atomic.hpp" + +// Linux +#ifdef TARGET_OS_ARCH_linux_x86 +# include "atomic_linux_x86.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_sparc +# include "atomic_linux_sparc.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_zero +# include "atomic_linux_zero.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_arm +# include "atomic_linux_arm.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_linux_ppc +# include "atomic_linux_ppc.inline.hpp" +#endif + +// Solaris +#ifdef TARGET_OS_ARCH_solaris_x86 +# include "atomic_solaris_x86.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_solaris_sparc +# include "atomic_solaris_sparc.inline.hpp" +#endif + +// Windows +#ifdef TARGET_OS_ARCH_windows_x86 +# include "atomic_windows_x86.inline.hpp" +#endif + +// BSD +#ifdef TARGET_OS_ARCH_bsd_x86 +# include "atomic_bsd_x86.inline.hpp" +#endif +#ifdef TARGET_OS_ARCH_bsd_zero +# include "atomic_bsd_zero.inline.hpp" +#endif + +#endif // SHARE_VM_RUNTIME_ATOMIC_INLINE_HPP diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp index c761c0b337d..63df5aa5f88 100644 --- a/hotspot/src/share/vm/runtime/deoptimization.cpp +++ b/hotspot/src/share/vm/runtime/deoptimization.cpp @@ -1242,8 +1242,8 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra nmethodLocker nl(fr.pc()); // Log a message - Events::log_deopt_message(thread, "Uncommon trap %d fr.pc " INTPTR_FORMAT, - trap_request, fr.pc()); + Events::log(thread, "Uncommon trap: trap_request=" PTR32_FORMAT " fr.pc=" INTPTR_FORMAT, + trap_request, fr.pc()); { ResourceMark rm; @@ -1274,6 +1274,11 @@ JRT_ENTRY(void, Deoptimization::uncommon_trap_inner(JavaThread* thread, jint tra MethodData* trap_mdo = get_method_data(thread, trap_method, create_if_missing); + // Log a message + Events::log_deopt_message(thread, "Uncommon trap: reason=%s action=%s pc=" INTPTR_FORMAT " method=%s @ %d", + trap_reason_name(reason), trap_action_name(action), fr.pc(), + trap_method->name_and_sig_as_C_string(), trap_bci); + // Print a bunch of diagnostics, if requested. if (TraceDeoptimization || LogCompilation) { ResourceMark rm; diff --git a/hotspot/src/share/vm/runtime/frame.cpp b/hotspot/src/share/vm/runtime/frame.cpp index 1788d3d0de4..f20fca0154c 100644 --- a/hotspot/src/share/vm/runtime/frame.cpp +++ b/hotspot/src/share/vm/runtime/frame.cpp @@ -23,6 +23,7 @@ */ #include "precompiled.hpp" +#include "compiler/disassembler.hpp" #include "gc_interface/collectedHeap.inline.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/oopMapCache.hpp" diff --git a/hotspot/src/share/vm/runtime/frame.hpp b/hotspot/src/share/vm/runtime/frame.hpp index 6e990dce73d..2e27430f7fb 100644 --- a/hotspot/src/share/vm/runtime/frame.hpp +++ b/hotspot/src/share/vm/runtime/frame.hpp @@ -25,7 +25,6 @@ #ifndef SHARE_VM_RUNTIME_FRAME_HPP #define SHARE_VM_RUNTIME_FRAME_HPP -#include "asm/assembler.hpp" #include "oops/method.hpp" #include "runtime/basicLock.hpp" #include "runtime/monitorChunk.hpp" diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 363f065bcd8..539235422d6 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -31,6 +31,7 @@ #include "compiler/abstractCompiler.hpp" #include "compiler/compileBroker.hpp" #include "compiler/compilerOracle.hpp" +#include "compiler/disassembler.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterRuntime.hpp" #include "memory/gcLocker.inline.hpp" diff --git a/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp b/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp index b6068a5d9c7..2ba8e159fec 100644 --- a/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp +++ b/hotspot/src/share/vm/runtime/stubCodeGenerator.cpp @@ -23,25 +23,13 @@ */ #include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/codeCache.hpp" #include "compiler/disassembler.hpp" #include "oops/oop.inline.hpp" #include "prims/forte.hpp" #include "runtime/stubCodeGenerator.hpp" -#ifdef TARGET_ARCH_x86 -# include "assembler_x86.inline.hpp" -#endif -#ifdef TARGET_ARCH_sparc -# include "assembler_sparc.inline.hpp" -#endif -#ifdef TARGET_ARCH_zero -# include "assembler_zero.inline.hpp" -#endif -#ifdef TARGET_ARCH_arm -# include "assembler_arm.inline.hpp" -#endif -#ifdef TARGET_ARCH_ppc -# include "assembler_ppc.inline.hpp" -#endif // Implementation of StubCodeDesc diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp index 23b0bbf32ad..9ca52bec2cf 100644 --- a/hotspot/src/share/vm/runtime/thread.cpp +++ b/hotspot/src/share/vm/runtime/thread.cpp @@ -3667,7 +3667,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { } // initialize compiler(s) -#if defined(COMPILER1) || defined(COMPILER2) +#if defined(COMPILER1) || defined(COMPILER2) || defined(SHARK) CompileBroker::compilation_init(); #endif diff --git a/hotspot/src/share/vm/shark/llvmHeaders.hpp b/hotspot/src/share/vm/shark/llvmHeaders.hpp index ab05e027274..1f2c2c79778 100644 --- a/hotspot/src/share/vm/shark/llvmHeaders.hpp +++ b/hotspot/src/share/vm/shark/llvmHeaders.hpp @@ -35,6 +35,7 @@ #undef DEBUG #endif +#include #include #include #include @@ -42,29 +43,21 @@ #include #include #include -#if SHARK_LLVM_VERSION < 27 -#include -#endif +#if SHARK_LLVM_VERSION <= 31 #include -#if SHARK_LLVM_VERSION >= 29 -#include #else -#include +#include #endif -#include +#include +#include #include #include #include -#if SHARK_LLVM_VERSION >= 27 +#include #include #include #include -#if SHARK_LLVM_VERSION >= 29 #include -#else -#include -#endif -#endif #include diff --git a/hotspot/src/share/vm/shark/llvmValue.hpp b/hotspot/src/share/vm/shark/llvmValue.hpp index f7451ac715e..4eef6ef8142 100644 --- a/hotspot/src/share/vm/shark/llvmValue.hpp +++ b/hotspot/src/share/vm/shark/llvmValue.hpp @@ -56,6 +56,10 @@ class LLVMValue : public AllStatic { { return llvm::ConstantPointerNull::get(SharkType::oop_type()); } + static llvm::ConstantPointerNull* nullKlass() + { + return llvm::ConstantPointerNull::get(SharkType::klass_type()); + } public: static llvm::ConstantInt* bit_constant(int value) diff --git a/hotspot/src/share/vm/shark/sharkBlock.cpp b/hotspot/src/share/vm/shark/sharkBlock.cpp index 6c8bee378ba..b4e98365efc 100644 --- a/hotspot/src/share/vm/shark/sharkBlock.cpp +++ b/hotspot/src/share/vm/shark/sharkBlock.cpp @@ -170,10 +170,12 @@ void SharkBlock::parse_bytecode(int start, int limit) { case Bytecodes::_ldc: case Bytecodes::_ldc_w: - case Bytecodes::_ldc2_w: - push(SharkConstant::for_ldc(iter())->value(builder())); + case Bytecodes::_ldc2_w: { + SharkConstant* constant = SharkConstant::for_ldc(iter()); + assert(constant->is_loaded(), "trap should handle unloaded classes"); + push(constant->value(builder())); break; - + } case Bytecodes::_iload_0: case Bytecodes::_lload_0: case Bytecodes::_fload_0: @@ -1000,9 +1002,9 @@ void SharkBlock::do_div_or_rem(bool is_long, bool is_rem) { builder()->SetInsertPoint(done); PHINode *result; if (is_long) - result = builder()->CreatePHI(SharkType::jlong_type(), "result"); + result = builder()->CreatePHI(SharkType::jlong_type(), 0, "result"); else - result = builder()->CreatePHI(SharkType::jint_type(), "result"); + result = builder()->CreatePHI(SharkType::jint_type(), 0, "result"); result->addIncoming(special_result, special_case); result->addIncoming(general_result, general_case); @@ -1036,12 +1038,12 @@ void SharkBlock::do_field_access(bool is_get, bool is_field) { value = constant->value(builder()); } if (!is_get || value == NULL) { - if (!is_field) - object = builder()->CreateInlineOop(field->holder()); - + if (!is_field) { + object = builder()->CreateInlineOop(field->holder()->java_mirror()); + } BasicType basic_type = field->type()->basic_type(); - const Type *stack_type = SharkType::to_stackType(basic_type); - const Type *field_type = SharkType::to_arrayType(basic_type); + Type *stack_type = SharkType::to_stackType(basic_type); + Type *field_type = SharkType::to_arrayType(basic_type); Value *addr = builder()->CreateAddressOfStructEntry( object, in_ByteSize(field->offset_in_bytes()), @@ -1050,8 +1052,12 @@ void SharkBlock::do_field_access(bool is_get, bool is_field) { // Do the access if (is_get) { - Value *field_value = builder()->CreateLoad(addr); - + Value* field_value; + if (field->is_volatile()) { + field_value = builder()->CreateAtomicLoad(addr); + } else { + field_value = builder()->CreateLoad(addr); + } if (field_type != stack_type) { field_value = builder()->CreateIntCast( field_value, stack_type, basic_type != T_CHAR); @@ -1067,13 +1073,15 @@ void SharkBlock::do_field_access(bool is_get, bool is_field) { field_value, field_type, basic_type != T_CHAR); } - builder()->CreateStore(field_value, addr); + if (field->is_volatile()) { + builder()->CreateAtomicStore(field_value, addr); + } else { + builder()->CreateStore(field_value, addr); + } - if (!field->type()->is_primitive_type()) + if (!field->type()->is_primitive_type()) { builder()->CreateUpdateBarrierSet(oopDesc::bs(), addr); - - if (field->is_volatile()) - builder()->CreateMemoryBarrier(SharkBuilder::BARRIER_STORELOAD); + } } } @@ -1105,7 +1113,7 @@ void SharkBlock::do_lcmp() { builder()->CreateBr(done); builder()->SetInsertPoint(done); - PHINode *result = builder()->CreatePHI(SharkType::jint_type(), "result"); + PHINode *result = builder()->CreatePHI(SharkType::jint_type(), 0, "result"); result->addIncoming(LLVMValue::jint_constant(-1), lt); result->addIncoming(LLVMValue::jint_constant(0), eq); result->addIncoming(LLVMValue::jint_constant(1), gt); @@ -1152,7 +1160,7 @@ void SharkBlock::do_fcmp(bool is_double, bool unordered_is_greater) { builder()->CreateBr(done); builder()->SetInsertPoint(done); - PHINode *result = builder()->CreatePHI(SharkType::jint_type(), "result"); + PHINode *result = builder()->CreatePHI(SharkType::jint_type(), 0, "result"); result->addIncoming(LLVMValue::jint_constant(-1), lt); result->addIncoming(LLVMValue::jint_constant(0), eq); result->addIncoming(LLVMValue::jint_constant(1), gt); diff --git a/hotspot/src/share/vm/shark/sharkBuilder.cpp b/hotspot/src/share/vm/shark/sharkBuilder.cpp index f4908ff26fb..f9c22bd1696 100644 --- a/hotspot/src/share/vm/shark/sharkBuilder.cpp +++ b/hotspot/src/share/vm/shark/sharkBuilder.cpp @@ -47,14 +47,14 @@ SharkBuilder::SharkBuilder(SharkCodeBuffer* code_buffer) // Helpers for accessing structures Value* SharkBuilder::CreateAddressOfStructEntry(Value* base, ByteSize offset, - const Type* type, + Type* type, const char* name) { return CreateBitCast(CreateStructGEP(base, in_bytes(offset)), type, name); } LoadInst* SharkBuilder::CreateValueOfStructEntry(Value* base, ByteSize offset, - const Type* type, + Type* type, const char* name) { return CreateLoad( CreateAddressOfStructEntry( @@ -71,7 +71,7 @@ LoadInst* SharkBuilder::CreateArrayLength(Value* arrayoop) { } Value* SharkBuilder::CreateArrayAddress(Value* arrayoop, - const Type* element_type, + Type* element_type, int element_bytes, ByteSize base_offset, Value* index, @@ -114,7 +114,7 @@ Value* SharkBuilder::CreateArrayAddress(Value* arrayoop, // Helpers for creating intrinsics and external functions. -const Type* SharkBuilder::make_type(char type, bool void_ok) { +Type* SharkBuilder::make_type(char type, bool void_ok) { switch (type) { // Primitive types case 'c': @@ -146,6 +146,8 @@ const Type* SharkBuilder::make_type(char type, bool void_ok) { return PointerType::getUnqual(SharkType::monitor_type()); case 'O': return SharkType::oop_type(); + case 'K': + return SharkType::klass_type(); // Miscellaneous case 'v': @@ -159,14 +161,14 @@ const Type* SharkBuilder::make_type(char type, bool void_ok) { } } -const FunctionType* SharkBuilder::make_ftype(const char* params, +FunctionType* SharkBuilder::make_ftype(const char* params, const char* ret) { - std::vector param_types; + std::vector param_types; for (const char* c = params; *c; c++) param_types.push_back(make_type(*c, false)); assert(strlen(ret) == 1, "should be"); - const Type *return_type = make_type(*ret, true); + Type *return_type = make_type(*ret, true); return FunctionType::get(return_type, param_types, false); } @@ -274,7 +276,7 @@ Value* SharkBuilder::d2l() { } Value* SharkBuilder::is_subtype_of() { - return make_function((address) SharkRuntime::is_subtype_of, "OO", "c"); + return make_function((address) SharkRuntime::is_subtype_of, "KK", "c"); } Value* SharkBuilder::current_time_millis() { @@ -352,79 +354,14 @@ Value* SharkBuilder::check_special_condition_for_native_trans() { "T", "v"); } -// Low-level non-VM calls - -// The ARM-specific code here is to work around unimplemented -// atomic exchange and memory barrier intrinsics in LLVM. -// -// Delegating to external functions for these would normally -// incur a speed penalty, but Linux on ARM is a special case -// in that atomic operations on that platform are handled by -// external functions anyway. It would be *preferable* for -// the calls to be hidden away in LLVM, but it's not hurting -// performance so having the calls here is acceptable. -// -// If you are building Shark on a platform without atomic -// exchange and/or memory barrier intrinsics then it is only -// acceptable to mimic this approach if your platform cannot -// perform these operations without delegating to a function. - -#ifdef ARM -static jint zero_cmpxchg_int(volatile jint *ptr, jint oldval, jint newval) { - return Atomic::cmpxchg(newval, ptr, oldval); -} -#endif // ARM - -Value* SharkBuilder::cmpxchg_int() { - return make_function( -#ifdef ARM - (address) zero_cmpxchg_int, -#else - "llvm.atomic.cmp.swap.i32.p0i32", -#endif // ARM - "Iii", "i"); -} - -#ifdef ARM -static intptr_t zero_cmpxchg_ptr(volatile intptr_t* ptr, - intptr_t oldval, - intptr_t newval) { - return Atomic::cmpxchg_ptr(newval, ptr, oldval); -} -#endif // ARM - -Value* SharkBuilder::cmpxchg_ptr() { - return make_function( -#ifdef ARM - (address) zero_cmpxchg_ptr, -#else - "llvm.atomic.cmp.swap.i" LP64_ONLY("64") NOT_LP64("32") ".p0i" LP64_ONLY("64") NOT_LP64("32"), -#endif // ARM - "Xxx", "x"); -} - Value* SharkBuilder::frame_address() { return make_function("llvm.frameaddress", "i", "C"); } -Value* SharkBuilder::memory_barrier() { - return make_function( -#ifdef ARM - (address) 0xffff0fa0, // __kernel_dmb -#else - "llvm.memory.barrier", -#endif // ARM - "11111", "v"); -} - Value* SharkBuilder::memset() { -#if SHARK_LLVM_VERSION >= 28 // LLVM 2.8 added a fifth isVolatile field for memset // introduced with LLVM r100304 - return make_function("llvm.memset.i32", "Cciii", "v"); -#else - return make_function("llvm.memset.i32", "Ccii", "v"); -#endif + return make_function("llvm.memset.p0i8.i32", "Cciii", "v"); } Value* SharkBuilder::unimplemented() { @@ -441,43 +378,16 @@ Value* SharkBuilder::dump() { // Public interface to low-level non-VM calls -CallInst* SharkBuilder::CreateCmpxchgInt(Value* exchange_value, - Value* dst, - Value* compare_value) { - return CreateCall3(cmpxchg_int(), dst, compare_value, exchange_value); -} - -CallInst* SharkBuilder::CreateCmpxchgPtr(Value* exchange_value, - Value* dst, - Value* compare_value) { - return CreateCall3(cmpxchg_ptr(), dst, compare_value, exchange_value); -} - CallInst* SharkBuilder::CreateGetFrameAddress() { return CreateCall(frame_address(), LLVMValue::jint_constant(0)); } -CallInst *SharkBuilder::CreateMemoryBarrier(int flags) { - Value *args[] = { - LLVMValue::bit_constant((flags & BARRIER_LOADLOAD) ? 1 : 0), - LLVMValue::bit_constant((flags & BARRIER_LOADSTORE) ? 1 : 0), - LLVMValue::bit_constant((flags & BARRIER_STORELOAD) ? 1 : 0), - LLVMValue::bit_constant((flags & BARRIER_STORESTORE) ? 1 : 0), - LLVMValue::bit_constant(1)}; - - return CreateCall(memory_barrier(), args, args + 5); -} - CallInst* SharkBuilder::CreateMemset(Value* dst, Value* value, Value* len, Value* align) { -#if SHARK_LLVM_VERSION >= 28 return CreateCall5(memset(), dst, value, len, align, LLVMValue::jint_constant(0)); -#else - return CreateCall4(memset(), dst, value, len, align); -#endif } CallInst* SharkBuilder::CreateUnimplemented(const char* file, int line) { @@ -510,11 +420,7 @@ CallInst* SharkBuilder::CreateDump(Value* value) { if (isa(value->getType())) value = CreatePtrToInt(value, SharkType::intptr_type()); else if (value->getType()-> -#if SHARK_LLVM_VERSION >= 27 isIntegerTy() -#else - isInteger() -#endif ) value = CreateIntCast(value, SharkType::intptr_type(), false); else @@ -563,9 +469,19 @@ Value* SharkBuilder::CreateInlineOop(jobject object, const char* name) { name); } +Value* SharkBuilder::CreateInlineMetadata(Metadata* metadata, llvm::PointerType* type, const char* name) { + assert(metadata != NULL, "inlined metadata must not be NULL"); + assert(metadata->is_metadata(), "sanity check"); + return CreateLoad( + CreateIntToPtr( + code_buffer_address(code_buffer()->inline_Metadata(metadata)), + PointerType::getUnqual(type)), + name); +} + Value* SharkBuilder::CreateInlineData(void* data, size_t size, - const Type* type, + Type* type, const char* name) { return CreateIntToPtr( code_buffer_address(code_buffer()->inline_data(data, size)), @@ -600,3 +516,11 @@ BasicBlock* SharkBuilder::CreateBlock(BasicBlock* ip, const char* name) const { return BasicBlock::Create( SharkContext::current(), name, GetInsertBlock()->getParent(), ip); } + +LoadInst* SharkBuilder::CreateAtomicLoad(Value* ptr, unsigned align, AtomicOrdering ordering, SynchronizationScope synchScope, bool isVolatile, const char* name) { + return Insert(new LoadInst(ptr, name, isVolatile, align, ordering, synchScope), name); +} + +StoreInst* SharkBuilder::CreateAtomicStore(Value* val, Value* ptr, unsigned align, AtomicOrdering ordering, SynchronizationScope synchScope, bool isVolatile, const char* name) { + return Insert(new StoreInst(val, ptr, isVolatile, align, ordering, synchScope), name); +} diff --git a/hotspot/src/share/vm/shark/sharkBuilder.hpp b/hotspot/src/share/vm/shark/sharkBuilder.hpp index 226e23b242d..e594a196026 100644 --- a/hotspot/src/share/vm/shark/sharkBuilder.hpp +++ b/hotspot/src/share/vm/shark/sharkBuilder.hpp @@ -53,22 +53,37 @@ class SharkBuilder : public llvm::IRBuilder<> { return _code_buffer; } + public: + llvm::LoadInst* CreateAtomicLoad(llvm::Value* ptr, + unsigned align = HeapWordSize, + llvm::AtomicOrdering ordering = llvm::SequentiallyConsistent, + llvm::SynchronizationScope synchScope = llvm::CrossThread, + bool isVolatile = true, + const char *name = ""); + llvm::StoreInst* CreateAtomicStore(llvm::Value *val, + llvm::Value *ptr, + unsigned align = HeapWordSize, + llvm::AtomicOrdering ordering = llvm::SequentiallyConsistent, + llvm::SynchronizationScope SynchScope = llvm::CrossThread, + bool isVolatile = true, + const char *name = ""); + // Helpers for accessing structures. public: llvm::Value* CreateAddressOfStructEntry(llvm::Value* base, ByteSize offset, - const llvm::Type* type, + llvm::Type* type, const char *name = ""); llvm::LoadInst* CreateValueOfStructEntry(llvm::Value* base, ByteSize offset, - const llvm::Type* type, + llvm::Type* type, const char *name = ""); // Helpers for accessing arrays. public: llvm::LoadInst* CreateArrayLength(llvm::Value* arrayoop); llvm::Value* CreateArrayAddress(llvm::Value* arrayoop, - const llvm::Type* element_type, + llvm::Type* element_type, int element_bytes, ByteSize base_offset, llvm::Value* index, @@ -85,8 +100,8 @@ class SharkBuilder : public llvm::IRBuilder<> { // Helpers for creating intrinsics and external functions. private: - static const llvm::Type* make_type(char type, bool void_ok); - static const llvm::FunctionType* make_ftype(const char* params, + static llvm::Type* make_type(char type, bool void_ok); + static llvm::FunctionType* make_ftype(const char* params, const char* ret); llvm::Value* make_function(const char* name, const char* params, @@ -165,7 +180,6 @@ class SharkBuilder : public llvm::IRBuilder<> { llvm::Value* cmpxchg_int(); llvm::Value* cmpxchg_ptr(); llvm::Value* frame_address(); - llvm::Value* memory_barrier(); llvm::Value* memset(); llvm::Value* unimplemented(); llvm::Value* should_not_reach_here(); @@ -173,14 +187,7 @@ class SharkBuilder : public llvm::IRBuilder<> { // Public interface to low-level non-VM calls. public: - llvm::CallInst* CreateCmpxchgInt(llvm::Value* exchange_value, - llvm::Value* dst, - llvm::Value* compare_value); - llvm::CallInst* CreateCmpxchgPtr(llvm::Value* exchange_value, - llvm::Value* dst, - llvm::Value* compare_value); llvm::CallInst* CreateGetFrameAddress(); - llvm::CallInst* CreateMemoryBarrier(int flags); llvm::CallInst* CreateMemset(llvm::Value* dst, llvm::Value* value, llvm::Value* len, @@ -189,15 +196,6 @@ class SharkBuilder : public llvm::IRBuilder<> { llvm::CallInst* CreateShouldNotReachHere(const char* file, int line); NOT_PRODUCT(llvm::CallInst* CreateDump(llvm::Value* value)); - // Flags for CreateMemoryBarrier. - public: - enum BarrierFlags { - BARRIER_LOADLOAD = 1, - BARRIER_LOADSTORE = 2, - BARRIER_STORELOAD = 4, - BARRIER_STORESTORE = 8 - }; - // HotSpot memory barriers public: void CreateUpdateBarrierSet(BarrierSet* bs, llvm::Value* field); @@ -209,9 +207,14 @@ class SharkBuilder : public llvm::IRBuilder<> { llvm::Value* CreateInlineOop(ciObject* object, const char* name = "") { return CreateInlineOop(object->constant_encoding(), name); } + + llvm::Value* CreateInlineMetadata(Metadata* metadata, llvm::PointerType* type, const char* name = ""); + llvm::Value* CreateInlineMetadata(ciMetadata* metadata, llvm::PointerType* type, const char* name = "") { + return CreateInlineMetadata(metadata->constant_encoding(), type, name); + } llvm::Value* CreateInlineData(void* data, size_t size, - const llvm::Type* type, + llvm::Type* type, const char* name = ""); // Helpers for creating basic blocks. @@ -222,5 +225,4 @@ class SharkBuilder : public llvm::IRBuilder<> { llvm::BasicBlock* CreateBlock(llvm::BasicBlock* ip, const char* name="") const; }; - -#endif // SHARE_VM_SHARK_SHARKBUILDER_HPP + #endif // SHARE_VM_SHARK_SHARKBUILDER_HPP diff --git a/hotspot/src/share/vm/shark/sharkCacheDecache.cpp b/hotspot/src/share/vm/shark/sharkCacheDecache.cpp index 98c86a6c16a..809322f9d40 100644 --- a/hotspot/src/share/vm/shark/sharkCacheDecache.cpp +++ b/hotspot/src/share/vm/shark/sharkCacheDecache.cpp @@ -107,11 +107,10 @@ void SharkDecacher::process_oop_tmp_slot(Value** value, int offset) { void SharkDecacher::process_method_slot(Value** value, int offset) { // Decache the method pointer write_value_to_frame( - SharkType::Method*_type(), + SharkType::Method_type(), *value, offset); - oopmap()->set_oop(slot2reg(offset)); } void SharkDecacher::process_pc_slot(int offset) { @@ -205,7 +204,7 @@ void SharkCacher::process_oop_tmp_slot(Value** value, int offset) { void SharkCacher::process_method_slot(Value** value, int offset) { // Cache the method pointer - *value = read_value_from_frame(SharkType::Method*_type(), offset); + *value = read_value_from_frame(SharkType::Method_type(), offset); } void SharkFunctionEntryCacher::process_method_slot(Value** value, int offset) { @@ -230,7 +229,7 @@ void SharkCacher::process_local_slot(int index, } Value* SharkOSREntryCacher::CreateAddressOfOSRBufEntry(int offset, - const Type* type) { + Type* type) { Value *result = builder()->CreateStructGEP(osr_buf(), offset); if (type != SharkType::intptr_type()) result = builder()->CreateBitCast(result, PointerType::getUnqual(type)); @@ -254,12 +253,12 @@ void SharkOSREntryCacher::process_local_slot(int index, } } -void SharkDecacher::write_value_to_frame(const Type* type, +void SharkDecacher::write_value_to_frame(Type* type, Value* value, int offset) { builder()->CreateStore(value, stack()->slot_addr(offset, type)); } -Value* SharkCacher::read_value_from_frame(const Type* type, int offset) { +Value* SharkCacher::read_value_from_frame(Type* type, int offset) { return builder()->CreateLoad(stack()->slot_addr(offset, type)); } diff --git a/hotspot/src/share/vm/shark/sharkCacheDecache.hpp b/hotspot/src/share/vm/shark/sharkCacheDecache.hpp index 6432a58c299..bdc77a55dcc 100644 --- a/hotspot/src/share/vm/shark/sharkCacheDecache.hpp +++ b/hotspot/src/share/vm/shark/sharkCacheDecache.hpp @@ -192,7 +192,7 @@ class SharkDecacher : public SharkCacherDecacher { // Writer helper protected: - void write_value_to_frame(const llvm::Type* type, + void write_value_to_frame(llvm::Type* type, llvm::Value* value, int offset); }; @@ -321,7 +321,7 @@ class SharkCacher : public SharkCacherDecacher { // Writer helper protected: - llvm::Value* read_value_from_frame(const llvm::Type* type, int offset); + llvm::Value* read_value_from_frame(llvm::Type* type, int offset); }; class SharkJavaCallCacher : public SharkCacher { @@ -422,7 +422,7 @@ class SharkOSREntryCacher : public SharkFunctionEntryCacher { // Helper private: - llvm::Value* CreateAddressOfOSRBufEntry(int offset, const llvm::Type* type); + llvm::Value* CreateAddressOfOSRBufEntry(int offset, llvm::Type* type); }; #endif // SHARE_VM_SHARK_SHARKCACHEDECACHE_HPP diff --git a/hotspot/src/share/vm/shark/sharkCodeBuffer.hpp b/hotspot/src/share/vm/shark/sharkCodeBuffer.hpp index 745112012a6..68b2506dd75 100644 --- a/hotspot/src/share/vm/shark/sharkCodeBuffer.hpp +++ b/hotspot/src/share/vm/shark/sharkCodeBuffer.hpp @@ -81,6 +81,13 @@ class SharkCodeBuffer : public StackObj { return offset; } + int inline_Metadata(Metadata* metadata) const { + masm()->align(BytesPerWord); + int offset = masm()->offset(); + masm()->store_Metadata(metadata); + return offset; + } + // Inline a block of non-oop data into the buffer and return its offset. public: int inline_data(void *src, size_t size) const { diff --git a/hotspot/src/share/vm/shark/sharkCompiler.cpp b/hotspot/src/share/vm/shark/sharkCompiler.cpp index 53851770fc7..3438240ad00 100644 --- a/hotspot/src/share/vm/shark/sharkCompiler.cpp +++ b/hotspot/src/share/vm/shark/sharkCompiler.cpp @@ -48,7 +48,6 @@ using namespace llvm; -#if SHARK_LLVM_VERSION >= 27 namespace { cl::opt MCPU("mcpu"); @@ -57,7 +56,6 @@ namespace { MAttrs("mattr", cl::CommaSeparated); } -#endif SharkCompiler::SharkCompiler() : AbstractCompiler() { @@ -72,6 +70,9 @@ SharkCompiler::SharkCompiler() // Initialize the native target InitializeNativeTarget(); + // MCJIT require a native AsmPrinter + InitializeNativeTargetAsmPrinter(); + // Create the two contexts which we'll use _normal_context = new SharkContext("normal"); _native_context = new SharkContext("native"); @@ -79,7 +80,6 @@ SharkCompiler::SharkCompiler() // Create the memory manager _memory_manager = new SharkMemoryManager(); -#if SHARK_LLVM_VERSION >= 27 // Finetune LLVM for the current host CPU. StringMap Features; bool gotCpuFeatures = llvm::sys::getHostCPUFeatures(Features); @@ -113,6 +113,16 @@ SharkCompiler::SharkCompiler() builder.setJITMemoryManager(memory_manager()); builder.setEngineKind(EngineKind::JIT); builder.setErrorStr(&ErrorMsg); + if (! fnmatch(SharkOptimizationLevel, "None", 0)) { + tty->print_cr("Shark optimization level set to: None"); + builder.setOptLevel(llvm::CodeGenOpt::None); + } else if (! fnmatch(SharkOptimizationLevel, "Less", 0)) { + tty->print_cr("Shark optimization level set to: Less"); + builder.setOptLevel(llvm::CodeGenOpt::Less); + } else if (! fnmatch(SharkOptimizationLevel, "Aggressive", 0)) { + tty->print_cr("Shark optimization level set to: Aggressive"); + builder.setOptLevel(llvm::CodeGenOpt::Aggressive); + } // else Default is selected by, well, default :-) _execution_engine = builder.create(); if (!execution_engine()) { @@ -125,13 +135,6 @@ SharkCompiler::SharkCompiler() execution_engine()->addModule( _native_context->module()); -#else - _execution_engine = ExecutionEngine::createJIT( - _normal_context->module_provider(), - NULL, memory_manager(), CodeGenOpt::Default); - execution_engine()->addModuleProvider( - _native_context->module_provider()); -#endif // All done mark_initialized(); @@ -261,6 +264,12 @@ void SharkCompiler::generate_native_code(SharkEntry* entry, function->dump(); } + if (SharkVerifyFunction != NULL) { + if (!fnmatch(SharkVerifyFunction, name, 0)) { + verifyFunction(*function); + } + } + // Compile to native code address code = NULL; context()->add_function(function); @@ -268,33 +277,28 @@ void SharkCompiler::generate_native_code(SharkEntry* entry, MutexLocker locker(execution_engine_lock()); free_queued_methods(); - if (SharkPrintAsmOf != NULL) { -#if SHARK_LLVM_VERSION >= 27 #ifndef NDEBUG +#if SHARK_LLVM_VERSION <= 31 +#define setCurrentDebugType SetCurrentDebugType +#endif + if (SharkPrintAsmOf != NULL) { if (!fnmatch(SharkPrintAsmOf, name, 0)) { - llvm::SetCurrentDebugType(X86_ONLY("x86-emitter") NOT_X86("jit")); + llvm::setCurrentDebugType(X86_ONLY("x86-emitter") NOT_X86("jit")); llvm::DebugFlag = true; } else { - llvm::SetCurrentDebugType(""); + llvm::setCurrentDebugType(""); llvm::DebugFlag = false; } -#endif // !NDEBUG -#else - // NB you need to patch LLVM with http://tinyurl.com/yf3baln for this - std::vector args; - args.push_back(""); // program name - if (!fnmatch(SharkPrintAsmOf, name, 0)) - args.push_back("-debug-only=x86-emitter"); - else - args.push_back("-debug-only=none"); - args.push_back(0); // terminator - cl::ParseCommandLineOptions(args.size() - 1, (char **) &args[0]); -#endif // SHARK_LLVM_VERSION } +#ifdef setCurrentDebugType +#undef setCurrentDebugType +#endif +#endif // !NDEBUG memory_manager()->set_entry_for_function(function, entry); code = (address) execution_engine()->getPointerToFunction(function); } + assert(code != NULL, "code must be != NULL"); entry->set_entry_point(code); entry->set_function(function); entry->set_context(context()); @@ -319,8 +323,8 @@ void SharkCompiler::free_compiled_method(address code) { // finish with the exception of the VM thread, so we can consider // ourself the owner of the execution engine lock even though we // can't actually acquire it at this time. - assert(Thread::current()->is_VM_thread(), "must be called by VM thread"); - assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint"); + assert(Thread::current()->is_Compiler_thread(), "must be called by compiler thread"); + assert_locked_or_safepoint(CodeCache_lock); SharkEntry *entry = (SharkEntry *) code; entry->context()->push_to_free_queue(entry->function()); diff --git a/hotspot/src/share/vm/shark/sharkConstant.cpp b/hotspot/src/share/vm/shark/sharkConstant.cpp index 50923bd57aa..b45ec136e77 100644 --- a/hotspot/src/share/vm/shark/sharkConstant.cpp +++ b/hotspot/src/share/vm/shark/sharkConstant.cpp @@ -37,10 +37,8 @@ SharkConstant* SharkConstant::for_ldc(ciBytecodeStream *iter) { ciType *type = NULL; if (constant.basic_type() == T_OBJECT) { ciEnv *env = ciEnv::current(); - if (constant.as_object()->is_klass()) - type = env->Class_klass(); - else - type = env->String_klass(); + assert(constant.as_object()->klass() == env->String_klass() || constant.as_object()->klass() == env->Class_klass(), "should be"); + type = constant.as_object()->klass(); } return new SharkConstant(constant, type); } @@ -108,17 +106,16 @@ SharkConstant::SharkConstant(ciConstant constant, ciType *type) { // objects (which differ between ldc* and get*, thanks!) ciObject *object = constant.as_object(); assert(type != NULL, "shouldn't be"); - if (object->is_klass()) { - // The constant returned for a klass is the ciKlass - // for the entry, but we want the java_mirror. - ciKlass *klass = object->as_klass(); - if (!klass->is_loaded()) { + + if ((! object->is_null_object()) && object->klass() == ciEnv::current()->Class_klass()) { + ciKlass *klass = object->klass(); + if (! klass->is_loaded()) { _is_loaded = false; return; } - object = klass->java_mirror(); } - if (object->is_null_object() || !object->can_be_constant()) { + + if (object->is_null_object() || ! object->can_be_constant() || ! object->is_loaded()) { _is_loaded = false; return; } diff --git a/hotspot/src/share/vm/shark/sharkContext.cpp b/hotspot/src/share/vm/shark/sharkContext.cpp index 098abb8a5aa..0fc86f0b14c 100644 --- a/hotspot/src/share/vm/shark/sharkContext.cpp +++ b/hotspot/src/share/vm/shark/sharkContext.cpp @@ -29,6 +29,7 @@ #include "shark/llvmHeaders.hpp" #include "shark/sharkContext.hpp" #include "utilities/globalDefinitions.hpp" +#include "memory/allocation.hpp" using namespace llvm; @@ -52,6 +53,9 @@ SharkContext::SharkContext(const char* name) _itableOffsetEntry_type = PointerType::getUnqual( ArrayType::get(jbyte_type(), itableOffsetEntry::size() * wordSize)); + _Metadata_type = PointerType::getUnqual( + ArrayType::get(jbyte_type(), sizeof(Metadata))); + _klass_type = PointerType::getUnqual( ArrayType::get(jbyte_type(), sizeof(Klass))); @@ -61,7 +65,7 @@ SharkContext::SharkContext(const char* name) _jniHandleBlock_type = PointerType::getUnqual( ArrayType::get(jbyte_type(), sizeof(JNIHandleBlock))); - _Method*_type = PointerType::getUnqual( + _Method_type = PointerType::getUnqual( ArrayType::get(jbyte_type(), sizeof(Method))); _monitor_type = ArrayType::get( @@ -76,14 +80,14 @@ SharkContext::SharkContext(const char* name) _zeroStack_type = PointerType::getUnqual( ArrayType::get(jbyte_type(), sizeof(ZeroStack))); - std::vector params; - params.push_back(Method*_type()); + std::vector params; + params.push_back(Method_type()); params.push_back(intptr_type()); params.push_back(thread_type()); _entry_point_type = FunctionType::get(jint_type(), params, false); params.clear(); - params.push_back(Method*_type()); + params.push_back(Method_type()); params.push_back(PointerType::getUnqual(jbyte_type())); params.push_back(intptr_type()); params.push_back(thread_type()); @@ -150,7 +154,7 @@ SharkContext::SharkContext(const char* name) } } -class SharkFreeQueueItem : public CHeapObj { +class SharkFreeQueueItem : public CHeapObj { public: SharkFreeQueueItem(llvm::Function* function, SharkFreeQueueItem *next) : _function(function), _next(next) {} diff --git a/hotspot/src/share/vm/shark/sharkContext.hpp b/hotspot/src/share/vm/shark/sharkContext.hpp index 470b9114ac1..952cf13b7b7 100644 --- a/hotspot/src/share/vm/shark/sharkContext.hpp +++ b/hotspot/src/share/vm/shark/sharkContext.hpp @@ -42,11 +42,7 @@ class SharkContext : public llvm::LLVMContext { private: llvm::Module* _module; -#if SHARK_LLVM_VERSION >= 27 public: -#else - private: -#endif llvm::Module* module() const { return _module; } @@ -59,127 +55,126 @@ class SharkContext : public llvm::LLVMContext { // Module accessors public: -#if SHARK_LLVM_VERSION < 27 - llvm::ModuleProvider* module_provider() const { - return new llvm::ExistingModuleProvider(module()); - } -#endif void add_function(llvm::Function* function) const { module()->getFunctionList().push_back(function); } llvm::Constant* get_external(const char* name, - const llvm::FunctionType* sig) { + llvm::FunctionType* sig) { return module()->getOrInsertFunction(name, sig); } // Basic types private: - const llvm::Type* _void_type; - const llvm::IntegerType* _bit_type; - const llvm::IntegerType* _jbyte_type; - const llvm::IntegerType* _jshort_type; - const llvm::IntegerType* _jint_type; - const llvm::IntegerType* _jlong_type; - const llvm::Type* _jfloat_type; - const llvm::Type* _jdouble_type; + llvm::Type* _void_type; + llvm::IntegerType* _bit_type; + llvm::IntegerType* _jbyte_type; + llvm::IntegerType* _jshort_type; + llvm::IntegerType* _jint_type; + llvm::IntegerType* _jlong_type; + llvm::Type* _jfloat_type; + llvm::Type* _jdouble_type; public: - const llvm::Type* void_type() const { + llvm::Type* void_type() const { return _void_type; } - const llvm::IntegerType* bit_type() const { + llvm::IntegerType* bit_type() const { return _bit_type; } - const llvm::IntegerType* jbyte_type() const { + llvm::IntegerType* jbyte_type() const { return _jbyte_type; } - const llvm::IntegerType* jshort_type() const { + llvm::IntegerType* jshort_type() const { return _jshort_type; } - const llvm::IntegerType* jint_type() const { + llvm::IntegerType* jint_type() const { return _jint_type; } - const llvm::IntegerType* jlong_type() const { + llvm::IntegerType* jlong_type() const { return _jlong_type; } - const llvm::Type* jfloat_type() const { + llvm::Type* jfloat_type() const { return _jfloat_type; } - const llvm::Type* jdouble_type() const { + llvm::Type* jdouble_type() const { return _jdouble_type; } - const llvm::IntegerType* intptr_type() const { + llvm::IntegerType* intptr_type() const { return LP64_ONLY(jlong_type()) NOT_LP64(jint_type()); } // Compound types private: - const llvm::PointerType* _itableOffsetEntry_type; - const llvm::PointerType* _jniEnv_type; - const llvm::PointerType* _jniHandleBlock_type; - const llvm::PointerType* _klass_type; - const llvm::PointerType* _Method*_type; - const llvm::ArrayType* _monitor_type; - const llvm::PointerType* _oop_type; - const llvm::PointerType* _thread_type; - const llvm::PointerType* _zeroStack_type; - const llvm::FunctionType* _entry_point_type; - const llvm::FunctionType* _osr_entry_point_type; + llvm::PointerType* _itableOffsetEntry_type; + llvm::PointerType* _jniEnv_type; + llvm::PointerType* _jniHandleBlock_type; + llvm::PointerType* _Metadata_type; + llvm::PointerType* _klass_type; + llvm::PointerType* _Method_type; + llvm::ArrayType* _monitor_type; + llvm::PointerType* _oop_type; + llvm::PointerType* _thread_type; + llvm::PointerType* _zeroStack_type; + llvm::FunctionType* _entry_point_type; + llvm::FunctionType* _osr_entry_point_type; public: - const llvm::PointerType* itableOffsetEntry_type() const { + llvm::PointerType* itableOffsetEntry_type() const { return _itableOffsetEntry_type; } - const llvm::PointerType* jniEnv_type() const { + llvm::PointerType* jniEnv_type() const { return _jniEnv_type; } - const llvm::PointerType* jniHandleBlock_type() const { + llvm::PointerType* jniHandleBlock_type() const { return _jniHandleBlock_type; } - const llvm::PointerType* klass_type() const { + llvm::PointerType* Metadata_type() const { + return _Metadata_type; + } + llvm::PointerType* klass_type() const { return _klass_type; } - const llvm::PointerType* Method*_type() const { - return _Method*_type; + llvm::PointerType* Method_type() const { + return _Method_type; } - const llvm::ArrayType* monitor_type() const { + llvm::ArrayType* monitor_type() const { return _monitor_type; } - const llvm::PointerType* oop_type() const { + llvm::PointerType* oop_type() const { return _oop_type; } - const llvm::PointerType* thread_type() const { + llvm::PointerType* thread_type() const { return _thread_type; } - const llvm::PointerType* zeroStack_type() const { + llvm::PointerType* zeroStack_type() const { return _zeroStack_type; } - const llvm::FunctionType* entry_point_type() const { + llvm::FunctionType* entry_point_type() const { return _entry_point_type; } - const llvm::FunctionType* osr_entry_point_type() const { + llvm::FunctionType* osr_entry_point_type() const { return _osr_entry_point_type; } // Mappings private: - const llvm::Type* _to_stackType[T_CONFLICT]; - const llvm::Type* _to_arrayType[T_CONFLICT]; + llvm::Type* _to_stackType[T_CONFLICT]; + llvm::Type* _to_arrayType[T_CONFLICT]; private: - const llvm::Type* map_type(const llvm::Type* const* table, + llvm::Type* map_type(llvm::Type* const* table, BasicType type) const { assert(type >= 0 && type < T_CONFLICT, "unhandled type"); - const llvm::Type* result = table[type]; + llvm::Type* result = table[type]; assert(result != NULL, "unhandled type"); return result; } public: - const llvm::Type* to_stackType(BasicType type) const { + llvm::Type* to_stackType(BasicType type) const { return map_type(_to_stackType, type); } - const llvm::Type* to_arrayType(BasicType type) const { + llvm::Type* to_arrayType(BasicType type) const { return map_type(_to_arrayType, type); } diff --git a/hotspot/src/share/vm/shark/sharkFunction.hpp b/hotspot/src/share/vm/shark/sharkFunction.hpp index cc34a1fdc9b..47a480fa14f 100644 --- a/hotspot/src/share/vm/shark/sharkFunction.hpp +++ b/hotspot/src/share/vm/shark/sharkFunction.hpp @@ -91,7 +91,7 @@ class SharkFunction : public SharkTargetInvariants { bool is_osr() const { return flow()->is_osr_flow(); } - const llvm::FunctionType* entry_point_type() const { + llvm::FunctionType* entry_point_type() const { if (is_osr()) return SharkType::osr_entry_point_type(); else diff --git a/hotspot/src/share/vm/shark/sharkIntrinsics.cpp b/hotspot/src/share/vm/shark/sharkIntrinsics.cpp index 8efaa432e2e..9ad91c96d79 100644 --- a/hotspot/src/share/vm/shark/sharkIntrinsics.cpp +++ b/hotspot/src/share/vm/shark/sharkIntrinsics.cpp @@ -171,7 +171,7 @@ void SharkIntrinsics::do_Math_minmax(ICmpInst::Predicate p) { builder()->CreateBr(done); builder()->SetInsertPoint(done); - PHINode *phi = builder()->CreatePHI(a->getType(), "result"); + PHINode *phi = builder()->CreatePHI(a->getType(), 0, "result"); phi->addIncoming(a, return_a); phi->addIncoming(b, return_b); @@ -210,7 +210,7 @@ void SharkIntrinsics::do_Object_getClass() { Value *klass = builder()->CreateValueOfStructEntry( state()->pop()->jobject_value(), in_ByteSize(oopDesc::klass_offset_in_bytes()), - SharkType::oop_type(), + SharkType::klass_type(), "klass"); state()->push( @@ -265,8 +265,7 @@ void SharkIntrinsics::do_Unsafe_compareAndSwapInt() { "addr"); // Perform the operation - Value *result = builder()->CreateCmpxchgInt(x, addr, e); - + Value *result = builder()->CreateAtomicCmpXchg(addr, e, x, llvm::SequentiallyConsistent); // Push the result state()->push( SharkValue::create_jint( diff --git a/hotspot/src/share/vm/shark/sharkMemoryManager.cpp b/hotspot/src/share/vm/shark/sharkMemoryManager.cpp index cbcef22c2a3..20cb0e5ebfe 100644 --- a/hotspot/src/share/vm/shark/sharkMemoryManager.cpp +++ b/hotspot/src/share/vm/shark/sharkMemoryManager.cpp @@ -79,7 +79,6 @@ void SharkMemoryManager::setMemoryExecutable() { mm()->setMemoryExecutable(); } -#if SHARK_LLVM_VERSION >= 27 void SharkMemoryManager::deallocateExceptionTable(void *ptr) { mm()->deallocateExceptionTable(ptr); } @@ -87,26 +86,23 @@ void SharkMemoryManager::deallocateExceptionTable(void *ptr) { void SharkMemoryManager::deallocateFunctionBody(void *ptr) { mm()->deallocateFunctionBody(ptr); } -#else -void SharkMemoryManager::deallocateMemForFunction(const Function* F) { - return mm()->deallocateMemForFunction(F); -} -#endif uint8_t* SharkMemoryManager::allocateGlobal(uintptr_t Size, unsigned int Alignment) { return mm()->allocateGlobal(Size, Alignment); } -#if SHARK_LLVM_VERSION < 27 -void* SharkMemoryManager::getDlsymTable() const { - return mm()->getDlsymTable(); +void* SharkMemoryManager::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure) { + return mm()->getPointerToNamedFunction(Name, AbortOnFailure); } -void SharkMemoryManager::SetDlsymTable(void *ptr) { - mm()->SetDlsymTable(ptr); +uint8_t* SharkMemoryManager::allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { + return mm()->allocateCodeSection(Size, Alignment, SectionID); +} + +uint8_t* SharkMemoryManager::allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID) { + return mm()->allocateDataSection(Size, Alignment, SectionID); } -#endif void SharkMemoryManager::setPoisonMemory(bool poison) { mm()->setPoisonMemory(poison); diff --git a/hotspot/src/share/vm/shark/sharkMemoryManager.hpp b/hotspot/src/share/vm/shark/sharkMemoryManager.hpp index 2da762afeda..fa725a14502 100644 --- a/hotspot/src/share/vm/shark/sharkMemoryManager.hpp +++ b/hotspot/src/share/vm/shark/sharkMemoryManager.hpp @@ -75,20 +75,15 @@ class SharkMemoryManager : public llvm::JITMemoryManager { unsigned char* TableStart, unsigned char* TableEnd, unsigned char* FrameRegister); -#if SHARK_LLVM_VERSION < 27 - void* getDlsymTable() const; - void SetDlsymTable(void *ptr); -#endif + void *getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure = true); + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID); + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, unsigned SectionID); void setPoisonMemory(bool); uint8_t* allocateGlobal(uintptr_t, unsigned int); void setMemoryWritable(); void setMemoryExecutable(); -#if SHARK_LLVM_VERSION >= 27 void deallocateExceptionTable(void *ptr); void deallocateFunctionBody(void *ptr); -#else - void deallocateMemForFunction(const llvm::Function* F); -#endif unsigned char *allocateSpace(intptr_t Size, unsigned int Alignment); }; diff --git a/hotspot/src/share/vm/shark/sharkNativeWrapper.cpp b/hotspot/src/share/vm/shark/sharkNativeWrapper.cpp index ef4337233f5..53fea3154b8 100644 --- a/hotspot/src/share/vm/shark/sharkNativeWrapper.cpp +++ b/hotspot/src/share/vm/shark/sharkNativeWrapper.cpp @@ -59,7 +59,6 @@ void SharkNativeWrapper::initialize(const char *name) { OopMap *oopmap = new OopMap( SharkStack::oopmap_slot_munge(stack()->oopmap_frame_size()), SharkStack::oopmap_slot_munge(arg_size())); - oopmap->set_oop(SharkStack::slot2reg(stack()->method_slot_offset())); // Set up the oop_tmp slot if required: // - For static methods we use it to handlize the class argument @@ -83,9 +82,9 @@ void SharkNativeWrapper::initialize(const char *name) { } // Start building the argument list - std::vector param_types; + std::vector param_types; std::vector param_values; - const PointerType *box_type = PointerType::getUnqual(SharkType::oop_type()); + PointerType *box_type = PointerType::getUnqual(SharkType::oop_type()); // First argument is the JNIEnv param_types.push_back(SharkType::jniEnv_type()); @@ -149,7 +148,7 @@ void SharkNativeWrapper::initialize(const char *name) { builder()->CreateBr(merge); builder()->SetInsertPoint(merge); - phi = builder()->CreatePHI(box_type, "boxed_object"); + phi = builder()->CreatePHI(box_type, 0, "boxed_object"); phi->addIncoming(ConstantPointerNull::get(box_type), null); phi->addIncoming(box, not_null); box = phi; @@ -170,7 +169,7 @@ void SharkNativeWrapper::initialize(const char *name) { // fall through default: - const Type *param_type = SharkType::to_stackType(arg_type(i)); + Type *param_type = SharkType::to_stackType(arg_type(i)); param_types.push_back(param_type); param_values.push_back( @@ -201,7 +200,7 @@ void SharkNativeWrapper::initialize(const char *name) { // Make the call BasicType result_type = target()->result_type(); - const Type* return_type; + Type* return_type; if (result_type == T_VOID) return_type = SharkType::void_type(); else if (is_returning_oop()) @@ -213,7 +212,7 @@ void SharkNativeWrapper::initialize(const char *name) { PointerType::getUnqual( FunctionType::get(return_type, param_types, false))); Value *result = builder()->CreateCall( - native_function, param_values.begin(), param_values.end()); + native_function, llvm::makeArrayRef(param_values)); // Start the transition back to _thread_in_Java CreateSetThreadState(_thread_in_native_trans); @@ -221,7 +220,7 @@ void SharkNativeWrapper::initialize(const char *name) { // Make sure new state is visible in the GC thread if (os::is_MP()) { if (UseMembar) - builder()->CreateMemoryBarrier(SharkBuilder::BARRIER_STORELOAD); + builder()->CreateFence(llvm::SequentiallyConsistent, llvm::CrossThread); else CreateWriteMemorySerializePage(); } @@ -305,7 +304,7 @@ void SharkNativeWrapper::initialize(const char *name) { builder()->CreateBr(merge); builder()->SetInsertPoint(merge); - PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "result"); + PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "result"); phi->addIncoming(LLVMValue::null(), null); phi->addIncoming(unboxed_result, not_null); result = phi; diff --git a/hotspot/src/share/vm/shark/sharkStack.cpp b/hotspot/src/share/vm/shark/sharkStack.cpp index 6800d22e012..ce868d61d56 100644 --- a/hotspot/src/share/vm/shark/sharkStack.cpp +++ b/hotspot/src/share/vm/shark/sharkStack.cpp @@ -75,7 +75,7 @@ void SharkStack::initialize(Value* method) { _method_slot_offset = offset++; if (setup_sp_and_method) { builder()->CreateStore( - method, slot_addr(method_slot_offset(), SharkType::Method*_type())); + method, slot_addr(method_slot_offset(), SharkType::Method_type())); } // Unextended SP @@ -163,7 +163,7 @@ Value* SharkStack::CreatePopFrame(int result_slots) { } Value* SharkStack::slot_addr(int offset, - const Type* type, + Type* type, const char* name) const { bool needs_cast = type && type != SharkType::intptr_type(); diff --git a/hotspot/src/share/vm/shark/sharkStack.hpp b/hotspot/src/share/vm/shark/sharkStack.hpp index 78f694cee99..15e367f59bc 100644 --- a/hotspot/src/share/vm/shark/sharkStack.hpp +++ b/hotspot/src/share/vm/shark/sharkStack.hpp @@ -204,7 +204,7 @@ class SharkStack : public SharkCompileInvariants { // Addresses of things in the frame public: llvm::Value* slot_addr(int offset, - const llvm::Type* type = NULL, + llvm::Type* type = NULL, const char* name = "") const; llvm::Value* monitor_addr(int index) const { diff --git a/hotspot/src/share/vm/shark/sharkState.cpp b/hotspot/src/share/vm/shark/sharkState.cpp index d0bc19d9891..87a5d5c6a42 100644 --- a/hotspot/src/share/vm/shark/sharkState.cpp +++ b/hotspot/src/share/vm/shark/sharkState.cpp @@ -131,7 +131,7 @@ void SharkState::merge(SharkState* other, Value *this_method = this->method(); Value *other_method = other->method(); if (this_method != other_method) { - PHINode *phi = builder()->CreatePHI(SharkType::Method*_type(), "method"); + PHINode *phi = builder()->CreatePHI(SharkType::Method_type(), 0, "method"); phi->addIncoming(this_method, this_block); phi->addIncoming(other_method, other_block); set_method(phi); @@ -142,7 +142,7 @@ void SharkState::merge(SharkState* other, Value *other_oop_tmp = other->oop_tmp(); if (this_oop_tmp != other_oop_tmp) { assert(this_oop_tmp && other_oop_tmp, "can't merge NULL with non-NULL"); - PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "oop_tmp"); + PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "oop_tmp"); phi->addIncoming(this_oop_tmp, this_block); phi->addIncoming(other_oop_tmp, other_block); set_oop_tmp(phi); @@ -243,7 +243,7 @@ SharkOSREntryState::SharkOSREntryState(SharkTopLevelBlock* block, Value* method, Value* osr_buf) : SharkState(block) { - assert(!block->stack_depth_at_entry(), "entry block shouldn't have stack"); + assert(block->stack_depth_at_entry() == 0, "entry block shouldn't have stack"); set_num_monitors(block->ciblock()->monitor_count()); // Local variables @@ -287,7 +287,7 @@ SharkPHIState::SharkPHIState(SharkTopLevelBlock* block) char name[18]; // Method - set_method(builder()->CreatePHI(SharkType::Method*_type(), "method")); + set_method(builder()->CreatePHI(SharkType::Method_type(), 0, "method")); // Local variables for (int i = 0; i < max_locals(); i++) { @@ -307,7 +307,7 @@ SharkPHIState::SharkPHIState(SharkTopLevelBlock* block) case T_ARRAY: snprintf(name, sizeof(name), "local_%d_", i); value = SharkValue::create_phi( - type, builder()->CreatePHI(SharkType::to_stackType(type), name)); + type, builder()->CreatePHI(SharkType::to_stackType(type), 0, name)); break; case T_ADDRESS: @@ -345,7 +345,7 @@ SharkPHIState::SharkPHIState(SharkTopLevelBlock* block) case T_ARRAY: snprintf(name, sizeof(name), "stack_%d_", i); value = SharkValue::create_phi( - type, builder()->CreatePHI(SharkType::to_stackType(type), name)); + type, builder()->CreatePHI(SharkType::to_stackType(type), 0, name)); break; case T_ADDRESS: diff --git a/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp b/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp index b66d70bdda8..fcd6906caad 100644 --- a/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp +++ b/hotspot/src/share/vm/shark/sharkTopLevelBlock.cpp @@ -65,6 +65,7 @@ void SharkTopLevelBlock::scan_for_traps() { switch (bc()) { case Bytecodes::_ldc: case Bytecodes::_ldc_w: + case Bytecodes::_ldc2_w: if (!SharkConstant::for_ldc(iter())->is_loaded()) { set_trap( Deoptimization::make_trap_request( @@ -109,7 +110,8 @@ void SharkTopLevelBlock::scan_for_traps() { case Bytecodes::_invokespecial: case Bytecodes::_invokevirtual: case Bytecodes::_invokeinterface: - method = iter()->get_method(will_link); + ciSignature* sig; + method = iter()->get_method(will_link, &sig); assert(will_link, "typeflow responsibility"); if (!method->holder()->is_linked()) { @@ -562,12 +564,12 @@ void SharkTopLevelBlock::marshal_exception_fast(int num_options) { Value *exception_klass = builder()->CreateValueOfStructEntry( xstack(0)->jobject_value(), in_ByteSize(oopDesc::klass_offset_in_bytes()), - SharkType::oop_type(), + SharkType::klass_type(), "exception_klass"); for (int i = 0; i < num_options; i++) { Value *check_klass = - builder()->CreateInlineOop(exc_handler(i)->catch_klass()); + builder()->CreateInlineMetadata(exc_handler(i)->catch_klass(), SharkType::klass_type()); BasicBlock *not_exact = function()->CreateBlock("not_exact"); BasicBlock *not_subtype = function()->CreateBlock("not_subtype"); @@ -823,7 +825,7 @@ void SharkTopLevelBlock::do_aload(BasicType basic_type) { builder()->CreateArrayAddress( array->jarray_value(), basic_type, index->jint_value())); - const Type *stack_type = SharkType::to_stackType(basic_type); + Type *stack_type = SharkType::to_stackType(basic_type); if (value->getType() != stack_type) value = builder()->CreateIntCast(value, stack_type, basic_type != T_CHAR); @@ -910,7 +912,7 @@ void SharkTopLevelBlock::do_astore(BasicType basic_type) { ShouldNotReachHere(); } - const Type *array_type = SharkType::to_arrayType(basic_type); + Type *array_type = SharkType::to_arrayType(basic_type); if (value->getType() != array_type) value = builder()->CreateIntCast(value, array_type, basic_type != T_CHAR); @@ -1102,9 +1104,9 @@ ciMethod* SharkTopLevelBlock::improve_virtual_call(ciMethod* caller, Value *SharkTopLevelBlock::get_direct_callee(ciMethod* method) { return builder()->CreateBitCast( - builder()->CreateInlineOop(method), - SharkType::Method*_type(), - "callee"); + builder()->CreateInlineMetadata(method, SharkType::Method_type()), + SharkType::Method_type(), + "callee"); } Value *SharkTopLevelBlock::get_virtual_callee(SharkValue* receiver, @@ -1118,7 +1120,7 @@ Value *SharkTopLevelBlock::get_virtual_callee(SharkValue* receiver, return builder()->CreateLoad( builder()->CreateArrayAddress( klass, - SharkType::Method*_type(), + SharkType::Method_type(), vtableEntry::size() * wordSize, in_ByteSize(InstanceKlass::vtable_start_offset() * wordSize), LLVMValue::intptr_constant(vtable_index)), @@ -1136,7 +1138,7 @@ Value* SharkTopLevelBlock::get_interface_callee(SharkValue *receiver, // Locate the receiver's itable Value *object_klass = builder()->CreateValueOfStructEntry( receiver->jobject_value(), in_ByteSize(oopDesc::klass_offset_in_bytes()), - SharkType::oop_type(), + SharkType::klass_type(), "object_klass"); Value *vtable_start = builder()->CreateAdd( @@ -1169,12 +1171,12 @@ Value* SharkTopLevelBlock::get_interface_callee(SharkValue *receiver, } // Locate this interface's entry in the table - Value *iklass = builder()->CreateInlineOop(method->holder()); + Value *iklass = builder()->CreateInlineMetadata(method->holder(), SharkType::klass_type()); BasicBlock *loop_entry = builder()->GetInsertBlock(); builder()->CreateBr(loop); builder()->SetInsertPoint(loop); PHINode *itable_entry_addr = builder()->CreatePHI( - SharkType::intptr_type(), "itable_entry_addr"); + SharkType::intptr_type(), 0, "itable_entry_addr"); itable_entry_addr->addIncoming(itable_start, loop_entry); Value *itable_entry = builder()->CreateIntToPtr( @@ -1183,11 +1185,11 @@ Value* SharkTopLevelBlock::get_interface_callee(SharkValue *receiver, Value *itable_iklass = builder()->CreateValueOfStructEntry( itable_entry, in_ByteSize(itableOffsetEntry::interface_offset_in_bytes()), - SharkType::oop_type(), + SharkType::klass_type(), "itable_iklass"); builder()->CreateCondBr( - builder()->CreateICmpEQ(itable_iklass, LLVMValue::null()), + builder()->CreateICmpEQ(itable_iklass, LLVMValue::nullKlass()), got_null, not_null); // A null entry means that the class doesn't implement the @@ -1231,7 +1233,7 @@ Value* SharkTopLevelBlock::get_interface_callee(SharkValue *receiver, method->itable_index() * itableMethodEntry::size() * wordSize)), LLVMValue::intptr_constant( itableMethodEntry::method_offset_in_bytes())), - PointerType::getUnqual(SharkType::Method*_type())), + PointerType::getUnqual(SharkType::Method_type())), "callee"); } @@ -1243,7 +1245,9 @@ void SharkTopLevelBlock::do_call() { // Find the method being called bool will_link; - ciMethod *dest_method = iter()->get_method(will_link); + ciSignature* sig; + ciMethod *dest_method = iter()->get_method(will_link, &sig); + assert(will_link, "typeflow responsibility"); assert(dest_method->is_static() == is_static, "must match bc"); @@ -1259,10 +1263,17 @@ void SharkTopLevelBlock::do_call() { assert(holder_klass->is_interface() || holder_klass->super() == NULL || !is_interface, "must match bc"); + + bool is_forced_virtual = is_interface && holder_klass == java_lang_Object_klass(); + ciKlass *holder = iter()->get_declared_method_holder(); ciInstanceKlass *klass = ciEnv::get_instance_klass_for_declared_method_holder(holder); + if (is_forced_virtual) { + klass = java_lang_Object_klass(); + } + // Find the receiver in the stack. We do this before // trying to inline because the inliner can only use // zero-checked values, not being able to perform the @@ -1294,7 +1305,7 @@ void SharkTopLevelBlock::do_call() { // Find the method we are calling Value *callee; if (call_is_virtual) { - if (is_virtual) { + if (is_virtual || is_forced_virtual) { assert(klass->is_linked(), "scan_for_traps responsibility"); int vtable_index = call_method->resolve_vtable_index( target()->holder(), klass); @@ -1490,12 +1501,12 @@ void SharkTopLevelBlock::do_full_instance_check(ciKlass* klass) { // Get the class we're checking against builder()->SetInsertPoint(not_null); - Value *check_klass = builder()->CreateInlineOop(klass); + Value *check_klass = builder()->CreateInlineMetadata(klass, SharkType::klass_type()); // Get the class of the object being tested Value *object_klass = builder()->CreateValueOfStructEntry( object, in_ByteSize(oopDesc::klass_offset_in_bytes()), - SharkType::oop_type(), + SharkType::klass_type(), "object_klass"); // Perform the check @@ -1520,7 +1531,7 @@ void SharkTopLevelBlock::do_full_instance_check(ciKlass* klass) { // First merge builder()->SetInsertPoint(merge1); PHINode *nonnull_result = builder()->CreatePHI( - SharkType::jint_type(), "nonnull_result"); + SharkType::jint_type(), 0, "nonnull_result"); nonnull_result->addIncoming( LLVMValue::jint_constant(IC_IS_INSTANCE), is_instance); nonnull_result->addIncoming( @@ -1531,7 +1542,7 @@ void SharkTopLevelBlock::do_full_instance_check(ciKlass* klass) { // Second merge builder()->SetInsertPoint(merge2); PHINode *result = builder()->CreatePHI( - SharkType::jint_type(), "result"); + SharkType::jint_type(), 0, "result"); result->addIncoming(LLVMValue::jint_constant(IC_IS_NULL), null_block); result->addIncoming(nonnull_result, nonnull_block); @@ -1698,7 +1709,7 @@ void SharkTopLevelBlock::do_new() { heap_object = builder()->CreateIntToPtr( old_top, SharkType::oop_type(), "heap_object"); - Value *check = builder()->CreateCmpxchgPtr(new_top, top_addr, old_top); + Value *check = builder()->CreateAtomicCmpXchg(top_addr, old_top, new_top, llvm::SequentiallyConsistent); builder()->CreateCondBr( builder()->CreateICmpEQ(old_top, check), initialize, retry); @@ -1707,7 +1718,7 @@ void SharkTopLevelBlock::do_new() { builder()->SetInsertPoint(initialize); if (tlab_object) { PHINode *phi = builder()->CreatePHI( - SharkType::oop_type(), "fast_object"); + SharkType::oop_type(), 0, "fast_object"); phi->addIncoming(tlab_object, got_tlab); phi->addIncoming(heap_object, got_heap); fast_object = phi; @@ -1730,7 +1741,7 @@ void SharkTopLevelBlock::do_new() { Value *klass_addr = builder()->CreateAddressOfStructEntry( fast_object, in_ByteSize(oopDesc::klass_offset_in_bytes()), - PointerType::getUnqual(SharkType::oop_type()), + PointerType::getUnqual(SharkType::klass_type()), "klass_addr"); // Set the mark @@ -1744,7 +1755,7 @@ void SharkTopLevelBlock::do_new() { builder()->CreateStore(LLVMValue::intptr_constant(mark), mark_addr); // Set the class - Value *rtklass = builder()->CreateInlineOop(klass); + Value *rtklass = builder()->CreateInlineMetadata(klass, SharkType::klass_type()); builder()->CreateStore(rtklass, klass_addr); got_fast = builder()->GetInsertBlock(); @@ -1767,7 +1778,7 @@ void SharkTopLevelBlock::do_new() { builder()->SetInsertPoint(push_object); } if (fast_object) { - PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), "object"); + PHINode *phi = builder()->CreatePHI(SharkType::oop_type(), 0, "object"); phi->addIncoming(fast_object, got_fast); phi->addIncoming(slow_object, got_slow); object = phi; @@ -1849,8 +1860,9 @@ void SharkTopLevelBlock::do_multianewarray() { void SharkTopLevelBlock::acquire_method_lock() { Value *lockee; - if (target()->is_static()) + if (target()->is_static()) { lockee = builder()->CreateInlineOop(target()->holder()->java_mirror()); + } else lockee = local(0)->jobject_value(); @@ -1898,7 +1910,7 @@ void SharkTopLevelBlock::acquire_lock(Value *lockee, int exception_action) { Value *lock = builder()->CreatePtrToInt( monitor_header_addr, SharkType::intptr_type()); - Value *check = builder()->CreateCmpxchgPtr(lock, mark_addr, disp); + Value *check = builder()->CreateAtomicCmpXchg(mark_addr, disp, lock, llvm::Acquire); builder()->CreateCondBr( builder()->CreateICmpEQ(disp, check), acquired_fast, try_recursive); @@ -1983,7 +1995,7 @@ void SharkTopLevelBlock::release_lock(int exception_action) { PointerType::getUnqual(SharkType::intptr_type()), "mark_addr"); - Value *check = builder()->CreateCmpxchgPtr(disp, mark_addr, lock); + Value *check = builder()->CreateAtomicCmpXchg(mark_addr, lock, disp, llvm::Release); builder()->CreateCondBr( builder()->CreateICmpEQ(lock, check), released_fast, slow_path); diff --git a/hotspot/src/share/vm/shark/sharkTopLevelBlock.hpp b/hotspot/src/share/vm/shark/sharkTopLevelBlock.hpp index 067ea6285d1..a1306f30cb5 100644 --- a/hotspot/src/share/vm/shark/sharkTopLevelBlock.hpp +++ b/hotspot/src/share/vm/shark/sharkTopLevelBlock.hpp @@ -290,7 +290,7 @@ class SharkTopLevelBlock : public SharkBlock { int exception_action) { decache_for_VM_call(); stack()->CreateSetLastJavaFrame(); - llvm::CallInst *res = builder()->CreateCall(callee, args_start, args_end); + llvm::CallInst *res = builder()->CreateCall(callee, llvm::makeArrayRef(args_start, args_end)); stack()->CreateResetLastJavaFrame(); cache_after_VM_call(); if (exception_action & EAM_CHECK) { diff --git a/hotspot/src/share/vm/shark/sharkType.hpp b/hotspot/src/share/vm/shark/sharkType.hpp index c4e0d957518..719722bc79f 100644 --- a/hotspot/src/share/vm/shark/sharkType.hpp +++ b/hotspot/src/share/vm/shark/sharkType.hpp @@ -40,82 +40,85 @@ class SharkType : public AllStatic { // Basic types public: - static const llvm::Type* void_type() { + static llvm::Type* void_type() { return context().void_type(); } - static const llvm::IntegerType* bit_type() { + static llvm::IntegerType* bit_type() { return context().bit_type(); } - static const llvm::IntegerType* jbyte_type() { + static llvm::IntegerType* jbyte_type() { return context().jbyte_type(); } - static const llvm::IntegerType* jshort_type() { + static llvm::IntegerType* jshort_type() { return context().jshort_type(); } - static const llvm::IntegerType* jint_type() { + static llvm::IntegerType* jint_type() { return context().jint_type(); } - static const llvm::IntegerType* jlong_type() { + static llvm::IntegerType* jlong_type() { return context().jlong_type(); } - static const llvm::Type* jfloat_type() { + static llvm::Type* jfloat_type() { return context().jfloat_type(); } - static const llvm::Type* jdouble_type() { + static llvm::Type* jdouble_type() { return context().jdouble_type(); } - static const llvm::IntegerType* intptr_type() { + static llvm::IntegerType* intptr_type() { return context().intptr_type(); } // Compound types public: - static const llvm::PointerType* itableOffsetEntry_type() { + static llvm::PointerType* itableOffsetEntry_type() { return context().itableOffsetEntry_type(); } - static const llvm::PointerType* jniEnv_type() { + static llvm::PointerType* jniEnv_type() { return context().jniEnv_type(); } - static const llvm::PointerType* jniHandleBlock_type() { + static llvm::PointerType* jniHandleBlock_type() { return context().jniHandleBlock_type(); } - static const llvm::PointerType* klass_type() { + static llvm::PointerType* Metadata_type() { + return context().Metadata_type(); + } + static llvm::PointerType* klass_type() { return context().klass_type(); } - static const llvm::PointerType* Method*_type() { - return context().Method*_type(); + static llvm::PointerType* Method_type() { + return context().Method_type(); } - static const llvm::ArrayType* monitor_type() { + static llvm::ArrayType* monitor_type() { return context().monitor_type(); } - static const llvm::PointerType* oop_type() { + static llvm::PointerType* oop_type() { return context().oop_type(); } - static const llvm::PointerType* thread_type() { + static llvm::PointerType* thread_type() { return context().thread_type(); } - static const llvm::PointerType* zeroStack_type() { + static llvm::PointerType* zeroStack_type() { return context().zeroStack_type(); } - static const llvm::FunctionType* entry_point_type() { + static llvm::FunctionType* entry_point_type() { return context().entry_point_type(); } - static const llvm::FunctionType* osr_entry_point_type() { + static llvm::FunctionType* osr_entry_point_type() { return context().osr_entry_point_type(); } // Mappings public: - static const llvm::Type* to_stackType(BasicType type) { + static llvm::Type* to_stackType(BasicType type) { return context().to_stackType(type); } - static const llvm::Type* to_stackType(ciType* type) { + static llvm::Type* to_stackType(ciType* type) { return to_stackType(type->basic_type()); } - static const llvm::Type* to_arrayType(BasicType type) { + static llvm::Type* to_arrayType(BasicType type) { return context().to_arrayType(type); } - static const llvm::Type* to_arrayType(ciType* type) { + static llvm::Type* to_arrayType(ciType* type) { return to_arrayType(type->basic_type()); } }; diff --git a/hotspot/src/share/vm/shark/sharkValue.cpp b/hotspot/src/share/vm/shark/sharkValue.cpp index e9b17a5f35b..0c4ef006a61 100644 --- a/hotspot/src/share/vm/shark/sharkValue.cpp +++ b/hotspot/src/share/vm/shark/sharkValue.cpp @@ -233,7 +233,7 @@ SharkValue* SharkNormalValue::merge(SharkBuilder* builder, assert(type() == other->type(), "should be"); assert(zero_checked() == other->zero_checked(), "should be"); - PHINode *phi = builder->CreatePHI(SharkType::to_stackType(type()), name); + PHINode *phi = builder->CreatePHI(SharkType::to_stackType(type()), 0, name); phi->addIncoming(this->generic_value(), this_block); phi->addIncoming(other->generic_value(), other_block); return SharkValue::create_generic(type(), phi, zero_checked()); diff --git a/hotspot/src/share/vm/shark/shark_globals.hpp b/hotspot/src/share/vm/shark/shark_globals.hpp index deae2f3c0b3..9ab174d654b 100644 --- a/hotspot/src/share/vm/shark/shark_globals.hpp +++ b/hotspot/src/share/vm/shark/shark_globals.hpp @@ -40,6 +40,12 @@ product(intx, SharkMaxInlineSize, 32, \ "Maximum bytecode size of methods to inline when using Shark") \ \ + product(bool, EliminateNestedLocks, true, \ + "Eliminate nested locks of the same object when possible") \ + \ + product(ccstr, SharkOptimizationLevel, "Default", \ + "The optimization level passed to LLVM, possible values: None, Less, Default and Agressive") \ + \ /* compiler debugging */ \ develop(ccstr, SharkPrintTypeflowOf, NULL, \ "Print the typeflow of the specified method") \ @@ -58,6 +64,10 @@ \ diagnostic(bool, SharkPerformanceWarnings, false, \ "Warn about things that could be made faster") \ + \ + develop(ccstr, SharkVerifyFunction, NULL, \ + "Runs LLVM verify over LLVM IR") \ + SHARK_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG) diff --git a/hotspot/src/share/vm/utilities/events.hpp b/hotspot/src/share/vm/utilities/events.hpp index 74a804219fa..c2e543da976 100644 --- a/hotspot/src/share/vm/utilities/events.hpp +++ b/hotspot/src/share/vm/utilities/events.hpp @@ -135,11 +135,11 @@ template class EventLogBase : public EventLog { }; // A simple wrapper class for fixed size text messages. -class StringLogMessage : public FormatBuffer<132> { +class StringLogMessage : public FormatBuffer<256> { public: // Wrap this buffer in a stringStream. stringStream stream() { - return stringStream(_buf, sizeof(_buf)); + return stringStream(_buf, size()); } }; diff --git a/hotspot/test/compiler/6865265/StackOverflowBug.java b/hotspot/test/compiler/6865265/StackOverflowBug.java index d474814054f..295a6b41777 100644 --- a/hotspot/test/compiler/6865265/StackOverflowBug.java +++ b/hotspot/test/compiler/6865265/StackOverflowBug.java @@ -28,7 +28,7 @@ * @summary JVM crashes with "missing exception handler" error * @author volker.simonis@sap.com * - * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss224k StackOverflowBug + * @run main/othervm -XX:CompileThreshold=100 -Xbatch -Xss248k StackOverflowBug */