diff --git a/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp index 96a967fb961..a9cd044a1d8 100644 --- a/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/interpreter_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -39,7 +39,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -259,20 +258,3 @@ address InterpreterGenerator::generate_abstract_entry(void) { return entry_point; } - - -void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { - - // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in - // the days we had adapter frames. When we deoptimize a situation where a - // compiled caller calls a compiled caller will have registers it expects - // to survive the call to the callee. If we deoptimize the callee the only - // way we can restore these registers is to have the oldest interpreter - // frame that we create restore these values. That is what this routine - // will accomplish. - - // At the moment we have modified c2 to not have any callee save registers - // so this problem does not exist and this routine is just a place holder. - - assert(f->is_interpreted_frame(), "must be interpreted"); -} diff --git a/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp new file mode 100644 index 00000000000..b9501b948f1 --- /dev/null +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp @@ -0,0 +1,1925 @@ +/* + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "interpreter/bytecodeTracer.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include + +#ifndef PRODUCT +#include "oops/method.hpp" +#endif // !PRODUCT + +#ifdef BUILTIN_SIM +#include "../../../../../../simulator/simulator.hpp" +#endif + +#define __ _masm-> + +#ifndef CC_INTERP + +//----------------------------------------------------------------------------- + +extern "C" void entry(CodeBuffer*); + +//----------------------------------------------------------------------------- + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + +#ifdef ASSERT + { + Label L; + __ ldr(rscratch1, Address(rfp, + frame::interpreter_frame_monitor_block_top_offset * + wordSize)); + __ mov(rscratch2, sp); + __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack + // grows negative) + __ br(Assembler::HS, L); // check if frame is complete + __ stop ("interpreter frame not set up"); + __ bind(L); + } +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_StackOverflowError)); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( + const char* name) { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + // ??? convention: expect aberrant index in register r1 + __ movw(c_rarg2, r1); + __ mov(c_rarg1, (address)name); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ArrayIndexOutOfBoundsException), + c_rarg1, c_rarg2); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + + // object is at TOS + __ pop(c_rarg1); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + throw_ClassCastException), + c_rarg1); + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common( + const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + if (pass_oop) { + // object is at TOS + __ pop(c_rarg2); + } + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // setup parameters + __ lea(c_rarg1, Address((address)name)); + if (pass_oop) { + __ call_VM(r0, CAST_FROM_FN_PTR(address, + InterpreterRuntime:: + create_klass_exception), + c_rarg1, c_rarg2); + } else { + // kind of lame ExternalAddress can't take NULL because + // external_word_Relocation will assert. + if (message != NULL) { + __ lea(c_rarg2, Address((address)message)); + } else { + __ mov(c_rarg2, NULL_WORD); + } + __ call_VM(r0, + CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), + c_rarg1, c_rarg2); + } + // throw exception + __ b(address(Interpreter::throw_exception_entry())); + return entry; +} + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + // NULL last_sp until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ dispatch_next(state); + return entry; +} + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + // Restore stack bottom in case i2c adjusted stack + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // and NULL it as marker that esp is now tos until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + + // Pop N words from the stack + __ get_cache_and_index_at_bcp(r1, r2, 1, index_size); + __ ldr(r1, Address(r1, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); + __ andr(r1, r1, ConstantPoolCacheEntry::parameter_size_mask); + + __ add(esp, esp, r1, Assembler::LSL, 3); + + // Restore machine SP + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + +#ifndef PRODUCT + // tell the simulator that the method has been reentered + if (NotifySimulator) { + __ notify(Assembler::method_reentry); + } +#endif + __ get_dispatch(); + __ dispatch_next(state, step); + + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, + int step) { + address entry = __ pc(); + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + + // handle exceptions + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + __ get_dispatch(); + + // Calculate stack limit + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // Restore expression stack pointer + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // NULL last_sp until next java call + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ dispatch_next(state, step); + return entry; +} + +address TemplateInterpreterGenerator::generate_result_handler_for( + BasicType type) { + address entry = __ pc(); + switch (type) { + case T_BOOLEAN: __ uxtb(r0, r0); break; + case T_CHAR : __ uxth(r0, r0); break; + case T_BYTE : __ sxtb(r0, r0); break; + case T_SHORT : __ sxth(r0, r0); break; + case T_INT : __ uxtw(r0, r0); break; // FIXME: We almost certainly don't need this + case T_LONG : /* nothing to do */ break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : /* nothing to do */ break; + case T_DOUBLE : /* nothing to do */ break; + case T_OBJECT : + // retrieve result from frame + __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // and verify it + __ verify_oop(r0); + break; + default : ShouldNotReachHere(); + } + __ ret(lr); // return from result handler + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for( + TosState state, + address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ membar(Assembler::AnyAny); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + return entry; +} + +// Helpers for commoning out cases in the various type of method entries. +// + + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// rmethod: method +// +void InterpreterGenerator::generate_counter_incr( + Label* overflow, + Label* profile_method, + Label* profile_method_continue) { + Label done; + // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. + if (TieredCompilation) { + int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // Are we profiling? + __ ldr(r0, Address(rmethod, Method::method_data_offset())); + __ cbz(r0, no_mdo); + // Increment counter in the MDO + const Address mdo_invocation_counter(r0, in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + const Address mask(r0, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); + __ b(done); + } + __ bind(no_mdo); + // Increment counter in MethodCounters + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + __ get_method_counters(rmethod, rscratch2, done); + const Address mask(rscratch2, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, rscratch1, r1, false, Assembler::EQ, overflow); + __ bind(done); + } else { // not TieredCompilation + const Address backedge_counter(rscratch2, + MethodCounters::backedge_counter_offset() + + InvocationCounter::counter_offset()); + const Address invocation_counter(rscratch2, + MethodCounters::invocation_counter_offset() + + InvocationCounter::counter_offset()); + + __ get_method_counters(rmethod, rscratch2, done); + + if (ProfileInterpreter) { // %%% Merge this into MethodData* + __ ldrw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + __ addw(r1, r1, 1); + __ strw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); + } + // Update standard invocation counters + __ ldrw(r1, invocation_counter); + __ ldrw(r0, backedge_counter); + + __ addw(r1, r1, InvocationCounter::count_increment); + __ andw(r0, r0, InvocationCounter::count_mask_value); + + __ strw(r1, invocation_counter); + __ addw(r0, r0, r1); // add both counters + + // profile_method is non-null only for interpreted method so + // profile_method != NULL == !native_call + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldrw(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); + __ cmpw(r0, rscratch2); + __ br(Assembler::LT, *profile_method_continue); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(r0, *profile_method); + } + + { + __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); + __ ldrw(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); + __ cmpw(r0, rscratch2); + __ br(Assembler::HS, *overflow); + } + __ bind(done); + } +} + +void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { + + // Asm interpreter on entry + // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] + // Everything as it was on entry + + // InterpreterRuntime::frequency_counter_overflow takes two + // arguments, the first (thread) is passed by call_VM, the second + // indicates if the counter overflow occurs at a backwards branch + // (NULL bcp). We pass zero for it. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ mov(c_rarg1, 0); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::frequency_counter_overflow), + c_rarg1); + + __ b(*do_continue); +} + +// See if we've got enough room on the stack for locals plus overhead. +// The expression stack grows down incrementally, so the normal guard +// page mechanism will work for that. +// +// NOTE: Since the additional locals are also always pushed (wasn't +// obvious in generate_method_entry) so the guard should work for them +// too. +// +// Args: +// r3: number of additional locals this frame needs (what we must check) +// rmethod: Method* +// +// Kills: +// r0 +void InterpreterGenerator::generate_stack_overflow_check(void) { + + // monitor entry size: see picture of stack set + // (generate_method_entry) and frame_amd64.hpp + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + + // total overhead size: entry_size + (saved rbp through expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; + + const int page_size = os::vm_page_size(); + + Label after_frame_check; + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // for the additional locals. + // + // Note that we use SUBS rather than CMP here because the immediate + // field of this instruction may overflow. SUBS can cope with this + // because it is a macro that will expand to some number of MOV + // instructions and a register operation. + __ subs(rscratch1, r3, (page_size - overhead_size) / Interpreter::stackElementSize); + __ br(Assembler::LS, after_frame_check); + + // compute rsp as if this were going to be the last frame on + // the stack before the red zone + + const Address stack_base(rthread, Thread::stack_base_offset()); + const Address stack_size(rthread, Thread::stack_size_offset()); + + // locals + overhead, in bytes + __ mov(r0, overhead_size); + __ add(r0, r0, r3, Assembler::LSL, Interpreter::logStackElementSize); // 2 slots per parameter. + + __ ldr(rscratch1, stack_base); + __ ldr(rscratch2, stack_size); + +#ifdef ASSERT + Label stack_base_okay, stack_size_okay; + // verify that thread stack base is non-zero + __ cbnz(rscratch1, stack_base_okay); + __ stop("stack base is zero"); + __ bind(stack_base_okay); + // verify that thread stack size is non-zero + __ cbnz(rscratch2, stack_size_okay); + __ stop("stack size is zero"); + __ bind(stack_size_okay); +#endif + + // Add stack base to locals and subtract stack size + __ sub(rscratch1, rscratch1, rscratch2); // Stack limit + __ add(r0, r0, rscratch1); + + // Use the maximum number of pages we might bang. + const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages : + (StackRedPages+StackYellowPages); + + // add in the red and yellow zone sizes + __ add(r0, r0, max_pages * page_size * 2); + + // check against the current stack bottom + __ cmp(sp, r0); + __ br(Assembler::HI, after_frame_check); + + // Remove the incoming args, peeling the machine SP back to where it + // was in the caller. This is not strictly necessary, but unless we + // do so the stack frame may have a garbage FP; this ensures a + // correct call stack that we can always unwind. The ANDR should be + // unnecessary because the sender SP in r13 is always aligned, but + // it doesn't hurt. + __ andr(sp, r13, -16); + + // Note: the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); + + // all done with frame size check + __ bind(after_frame_check); +} + +// Allocate monitor and lock method (asm interpreter) +// +// Args: +// rmethod: Method* +// rlocals: locals +// +// Kills: +// r0 +// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) +// rscratch1, rscratch2 (scratch regs) +void TemplateInterpreterGenerator::lock_method() { + // synchronize method + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address monitor_block_top( + rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; + +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::NE, L); + __ stop("method doesn't need synchronization"); + __ bind(L); + } +#endif // ASSERT + + // get synchronization object + { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + Label done; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_STATIC); + // get receiver (assume this is frequent case) + __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0))); + __ br(Assembler::EQ, done); + __ ldr(r0, Address(rmethod, Method::const_offset())); + __ ldr(r0, Address(r0, ConstMethod::constants_offset())); + __ ldr(r0, Address(r0, + ConstantPool::pool_holder_offset_in_bytes())); + __ ldr(r0, Address(r0, mirror_offset)); + +#ifdef ASSERT + { + Label L; + __ cbnz(r0, L); + __ stop("synchronization object is NULL"); + __ bind(L); + } +#endif // ASSERT + + __ bind(done); + } + + // add space for monitor & lock + __ sub(sp, sp, entry_size); // add space for a monitor entry + __ sub(esp, esp, entry_size); + __ mov(rscratch1, esp); + __ str(rscratch1, monitor_block_top); // set new monitor block top + // store object + __ str(r0, Address(esp, BasicObjectLock::obj_offset_in_bytes())); + __ mov(c_rarg1, esp); // object address + __ lock_object(c_rarg1); +} + +// Generate a fixed interpreter frame. This is identical setup for +// interpreted methods and for native methods hence the shared code. +// +// Args: +// lr: return address +// rmethod: Method* +// rlocals: pointer to locals +// rcpool: cp cache +// stack_pointer: previous sp +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // initialize fixed part of activation frame + if (native_call) { + __ sub(esp, sp, 12 * wordSize); + __ mov(rbcp, zr); + __ stp(esp, zr, Address(__ pre(sp, -12 * wordSize))); + // add 2 zero-initialized slots for native calls + __ stp(zr, zr, Address(sp, 10 * wordSize)); + } else { + __ sub(esp, sp, 10 * wordSize); + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod + __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase + __ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize))); + } + + if (ProfileInterpreter) { + Label method_data_continue; + __ ldr(rscratch1, Address(rmethod, Method::method_data_offset())); + __ cbz(rscratch1, method_data_continue); + __ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset()))); + __ bind(method_data_continue); + __ stp(rscratch1, rmethod, Address(sp, 4 * wordSize)); // save Method* and mdp (method data pointer) + } else { + __ stp(zr, rmethod, Address(sp, 4 * wordSize)); // save Method* (no mdp) + } + + __ ldr(rcpool, Address(rmethod, Method::const_offset())); + __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); + __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes())); + __ stp(rlocals, rcpool, Address(sp, 2 * wordSize)); + + __ stp(rfp, lr, Address(sp, 8 * wordSize)); + __ lea(rfp, Address(sp, 8 * wordSize)); + + // set sender sp + // leave last_sp as null + __ stp(zr, r13, Address(sp, 6 * wordSize)); + + // Move SP out of the way + if (! native_call) { + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); + __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + } +} + +// End of helpers + +// Various method entries +//------------------------------------------------------------------------------------------------------------------------ +// +// + +// Method entry for java.lang.ref.Reference.get. +address InterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + // + // rmethod: Method* + // r13: senderSP must preserve for slow path, set SP to it on fast path + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + const Register local_0 = c_rarg0; + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ldr(local_0, Address(esp, 0)); + __ cbz(local_0, slow_path); + + + // Load the value of the referent field. + const Address field_address(local_0, referent_offset); + __ load_heap_oop(local_0, field_address); + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. + __ enter(); // g1_write may call runtime + __ g1_write_barrier_pre(noreg /* obj */, + local_0 /* pre_val */, + rthread /* thread */, + rscratch2 /* tmp */, + true /* tosca_live */, + true /* expand_call */); + __ leave(); + // areturn + __ andr(sp, r13, -16); // done with stack + __ ret(lr); + + // generate a vanilla interpreter entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return generate_accessor_entry(); +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod: Method* + // r13: senderSP must preserved for slow path + // esp: args + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); + __ cbnz(rscratch1, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register val = c_rarg1; // source java byte value + const Register tbl = c_rarg2; // scratch + + // Arguments are reversed on java expression stack + __ ldrw(val, Address(esp, 0)); // byte value + __ ldrw(crc, Address(esp, wordSize)); // Initial CRC + + __ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset); + __ add(tbl, tbl, offset); + + __ ornw(crc, zr, crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ ornw(crc, zr, crc); // ~crc + + // result in c_rarg0 + + __ andr(sp, r13, -16); + __ ret(lr); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rmethod,: Method* + // r13: senderSP must preserved for slow path + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + unsigned long offset; + __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); + __ ldrw(rscratch1, Address(rscratch1, offset)); + assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); + __ cbnz(rscratch1, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register off = len; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ ldr(buf, Address(esp, 2*wordSize)); // long buf + __ ldrw(off, Address(esp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldrw(crc, Address(esp, 4*wordSize)); // Initial CRC + } else { + __ ldr(buf, Address(esp, 2*wordSize)); // byte[] array + __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ ldrw(off, Address(esp, wordSize)); // offset + __ add(buf, buf, off); // + offset + __ ldrw(crc, Address(esp, 3*wordSize)); // Initial CRC + } + // Can now load 'len' since we're finished with 'off' + __ ldrw(len, Address(esp, 0x0)); // Length + + __ andr(sp, r13, -16); // Restore the caller's SP + + // We are frameless so we can just jump to the stub. + __ b(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32())); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page + // needs to be checked. Only true for non-native. + if (UseStackBanging) { + const int start_page = native_call ? StackShadowPages : 1; + const int page_size = os::vm_page_size(); + for (int pages = start_page; pages <= StackShadowPages ; pages++) { + __ sub(rscratch2, sp, pages*page_size); + __ str(zr, Address(rscratch2)); + } + } +} + + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +address InterpreterGenerator::generate_native_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // r1: Method* + // rscratch1: sender sp + + address entry_point = __ pc(); + + const Address constMethod (rmethod, Method::const_offset()); + const Address access_flags (rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r2, ConstMethod:: + size_of_parameters_offset()); + + // get parameter size (always needed) + __ ldr(r2, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // native calls don't need the stack size check since they have no + // expression stack and the arguments are already on the stack and + // we only add a handful of words to the stack + + // rmethod: Method* + // r2: size of parameters + // rscratch1: sender sp + + // for natives the size of locals is zero + + // compute beginning of parameters (rlocals) + __ add(rlocals, esp, r2, ext::uxtx, 3); + __ add(rlocals, rlocals, -wordSize); + + // Pull SP back to minimum size: this avoids holes in the stack + __ andr(sp, esp, -16); + + // initialize fixed part of activation frame + generate_fixed_frame(true); +#ifndef PRODUCT + // tell the simulator that a method has been entered + if (NotifySimulator) { + __ notify(Assembler::method_entry); + } +#endif + + // make sure method is native & not abstract +#ifdef ASSERT + __ ldrw(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ br(Assembler::NE, L); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ br(Assembler::EQ, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ strb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top(rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(esp, rscratch1); + __ br(Assembler::EQ, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + // work registers + const Register t = r17; + const Register result_handler = r19; + + // allocate space for parameters + __ ldr(t, Address(rmethod, Method::const_offset())); + __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + + __ sub(rscratch1, esp, t, ext::uxtx, Interpreter::logStackElementSize); + __ andr(sp, rscratch1, -16); + __ mov(esp, rscratch1); + + // get signature handler + { + Label L; + __ ldr(t, Address(rmethod, Method::signature_handler_offset())); + __ cbnz(t, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + rmethod); + __ ldr(t, Address(rmethod, Method::signature_handler_offset())); + __ bind(L); + } + + // call signature handler + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, + "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, + "adjust this code"); + + // The generated handlers do not touch rmethod (the method). + // However, large signatures cannot be cached and are generated + // each time here. The slow-path generator can do a GC on return, + // so we must reload it after the call. + __ blr(t); + __ get_method(rmethod); // slow path can do a GC, reload rmethod + + + // result handler is in r0 + // set result handler + __ mov(result_handler, r0); + // pass mirror handle if static call + { + Label L; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ ldrw(t, Address(rmethod, Method::access_flags_offset())); + __ tst(t, JVM_ACC_STATIC); + __ br(Assembler::EQ, L); + // get mirror + __ ldr(t, Address(rmethod, Method::const_offset())); + __ ldr(t, Address(t, ConstMethod::constants_offset())); + __ ldr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes())); + __ ldr(t, Address(t, mirror_offset)); + // copy mirror into activation frame + __ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize)); + // pass handle to mirror + __ add(c_rarg1, rfp, frame::interpreter_frame_oop_temp_offset * wordSize); + __ bind(L); + } + + // get native function entry point in r10 + { + Label L; + __ ldr(r10, Address(rmethod, Method::native_function_offset())); + address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); + __ mov(rscratch2, unsatisfied); + __ ldr(rscratch2, rscratch2); + __ cmp(r10, rscratch2); + __ br(Assembler::NE, L); + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::prepare_native_call), + rmethod); + __ get_method(rmethod); + __ ldr(r10, Address(rmethod, Method::native_function_offset())); + __ bind(L); + } + + // pass JNIEnv + __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset())); + + // It is enough that the pc() points into the right code + // segment. It does not have to be the correct return pc. + __ set_last_Java_frame(esp, rfp, (address)NULL, rscratch1); + + // change thread state +#ifdef ASSERT + { + Label L; + __ ldrw(t, Address(rthread, JavaThread::thread_state_offset())); + __ cmp(t, _thread_in_Java); + __ br(Assembler::EQ, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif + + // Change state to native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + // Call the native method. + __ blrt(r10, rscratch1); + __ maybe_isb(); + __ get_method(rmethod); + // result potentially in r0 or v0 + + // make room for the pushes we're about to do + __ sub(rscratch1, esp, 4 * wordSize); + __ andr(sp, rscratch1, -16); + + // NOTE: The order of these pushes is known to frame::interpreter_frame_result + // in order to extract the result of a method call. If the order of these + // pushes change or anything else is added to the stack then the code in + // interpreter_frame_result must also change. + __ push(dtos); + __ push(ltos); + + // change thread state + __ mov(rscratch1, _thread_in_native_trans); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + if (os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ dsb(Assembler::SY); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(rthread, rscratch2); + } + } + + // check for safepoint operation in progress and/or pending suspend requests + { + Label Continue; + { + unsigned long offset; + __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset); + __ ldrw(rscratch2, Address(rscratch2, offset)); + } + assert(SafepointSynchronize::_not_synchronized == 0, + "SafepointSynchronize::_not_synchronized"); + Label L; + __ cbnz(rscratch2, L); + __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbz(rscratch2, Continue); + __ bind(L); + + // Don't use call_VM as it will see a possible pending exception + // and forward it and never return here preventing us from + // clearing _last_native_pc down below. So we do a runtime call by + // hand. + // + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); + __ blrt(rscratch2, 1, 0, 0); + __ maybe_isb(); + __ get_method(rmethod); + __ reinit_heapbase(); + __ bind(Continue); + } + + // change thread state + __ mov(rscratch1, _thread_in_Java); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + + // reset_last_Java_frame + __ reset_last_Java_frame(true, true); + + // reset handle block + __ ldr(t, Address(rthread, JavaThread::active_handles_offset())); + __ str(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); + + // If result is an oop unbox and store it in frame where gc will see it + // and result handler will pick it up + + { + Label no_oop, store_result; + __ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); + __ cmp(t, result_handler); + __ br(Assembler::NE, no_oop); + // retrieve result + __ pop(ltos); + __ cbz(r0, store_result); + __ ldr(r0, Address(r0, 0)); + __ bind(store_result); + __ str(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); + // keep stack depth as expected by pushing oop which will eventually be discarded + __ push(ltos); + __ bind(no_oop); + } + + { + Label no_reguard; + __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset()))); + __ ldrb(rscratch1, Address(rscratch1)); + __ cmp(rscratch1, JavaThread::stack_guard_yellow_disabled); + __ br(Assembler::NE, no_reguard); + + __ pusha(); // XXX only save smashed registers + __ mov(c_rarg0, rthread); + __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); + __ blrt(rscratch2, 0, 0, 0); + __ popa(); // XXX only restore smashed registers + __ bind(no_reguard); + } + + // The method register is junk from after the thread_in_native transition + // until here. Also can't call_VM until the bcp has been + // restored. Need bcp for throwing exception below so get it now. + __ get_method(rmethod); + + // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> + // rbcp == code_base() + __ ldr(rbcp, Address(rmethod, Method::const_offset())); // get ConstMethod* + __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); // get codebase + // handle exceptions (exception handling will handle unlocking!) + { + Label L; + __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); + __ cbz(rscratch1, L); + // Note: At some point we may want to unify this with the code + // used in call_VM_base(); i.e., we should use the + // StubRoutines::forward_exception code. For now this doesn't work + // here because the rsp is not correctly set at this point. + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // do unlocking if necessary + { + Label L; + __ ldrw(t, Address(rmethod, Method::access_flags_offset())); + __ tst(t, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + // the code below should be shared with interpreter macro + // assembler implementation + { + Label unlock; + // BasicObjectLock will be first in list, since this is a + // synchronized method. However, need to check that the object + // has not been unlocked by an explicit monitorexit bytecode. + + // monitor expect in c_rarg1 for slow unlock path + __ lea (c_rarg1, Address(rfp, // address of first monitor + (intptr_t)(frame::interpreter_frame_initial_sp_offset * + wordSize - sizeof(BasicObjectLock)))); + + __ ldr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ cbnz(t, unlock); + + // Entry already unlocked, need to throw exception + __ MacroAssembler::call_VM(noreg, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_illegal_monitor_state_exception)); + __ should_not_reach_here(); + + __ bind(unlock); + __ unlock_object(c_rarg1); + } + __ bind(L); + } + + // jvmti support + // Note: This must happen _after_ handling/throwing any exceptions since + // the exception handler code notifies the runtime of method exits + // too. If this happens before, method entry/exit notifications are + // not properly paired (was bug - gri 11/22/99). + __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); + + // restore potential result in r0:d0, call result handler to + // restore potential result in ST0 & handle result + + __ pop(ltos); + __ pop(dtos); + + __ blr(result_handler); + + // remove activation + __ ldr(esp, Address(rfp, + frame::interpreter_frame_sender_sp_offset * + wordSize)); // get sender sp + // remove frame anchor + __ leave(); + + // resture sender sp + __ mov(sp, esp); + + __ ret(lr); + + if (inc_counter) { + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +// +// Generic interpreted method entry to (asm) interpreter +// +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + // determine code generation flags + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // rscratch1: sender sp + address entry_point = __ pc(); + + const Address constMethod(rmethod, Method::const_offset()); + const Address access_flags(rmethod, Method::access_flags_offset()); + const Address size_of_parameters(r3, + ConstMethod::size_of_parameters_offset()); + const Address size_of_locals(r3, ConstMethod::size_of_locals_offset()); + + // get parameter size (always needed) + // need to load the const method first + __ ldr(r3, constMethod); + __ load_unsigned_short(r2, size_of_parameters); + + // r2: size of parameters + + __ load_unsigned_short(r3, size_of_locals); // get size of locals in words + __ sub(r3, r3, r2); // r3 = no. of additional locals + + // see if we've got enough room on the stack for locals plus overhead. + generate_stack_overflow_check(); + + // compute beginning of parameters (rlocals) + __ add(rlocals, esp, r2, ext::uxtx, 3); + __ sub(rlocals, rlocals, wordSize); + + // Make room for locals + __ sub(rscratch1, esp, r3, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // r3 - # of additional locals + // allocate space for locals + // explicitly initialize locals + { + Label exit, loop; + __ ands(zr, r3, r3); + __ br(Assembler::LE, exit); // do nothing if r3 <= 0 + __ bind(loop); + __ str(zr, Address(__ post(rscratch1, wordSize))); + __ sub(r3, r3, 1); // until everything initialized + __ cbnz(r3, loop); + __ bind(exit); + } + + // And the base dispatch table + __ get_dispatch(); + + // initialize fixed part of activation frame + generate_fixed_frame(false); +#ifndef PRODUCT + // tell the simulator that a method has been entered + if (NotifySimulator) { + __ notify(Assembler::method_entry); + } +#endif + // make sure method is not native & not abstract +#ifdef ASSERT + __ ldrw(r0, access_flags); + { + Label L; + __ tst(r0, JVM_ACC_NATIVE); + __ br(Assembler::EQ, L); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { + Label L; + __ tst(r0, JVM_ACC_ABSTRACT); + __ br(Assembler::EQ, L); + __ stop("tried to execute abstract method in interpreter"); + __ bind(L); + } +#endif + + // Since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which hasn't been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. The remove_activation + // will check this flag. + + const Address do_not_unlock_if_synchronized(rthread, + in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); + __ mov(rscratch2, true); + __ strb(rscratch2, do_not_unlock_if_synchronized); + + // increment invocation count & check for overflow + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, + &profile_method, + &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + + Label continue_after_compile; + __ bind(continue_after_compile); + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ strb(zr, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + if (synchronized) { + // Allocate monitor and lock method + lock_method(); + } else { + // no synchronization necessary +#ifdef ASSERT + { + Label L; + __ ldrw(r0, access_flags); + __ tst(r0, JVM_ACC_SYNCHRONIZED); + __ br(Assembler::EQ, L); + __ stop("method needs synchronization"); + __ bind(L); + } +#endif + } + + // start execution +#ifdef ASSERT + { + Label L; + const Address monitor_block_top (rfp, + frame::interpreter_frame_monitor_block_top_offset * wordSize); + __ ldr(rscratch1, monitor_block_top); + __ cmp(esp, rscratch1); + __ br(Assembler::EQ, L); + __ stop("broken stack frame setup in interpreter"); + __ bind(L); + } +#endif + + // jvmti support + __ notify_method_entry(); + + __ dispatch_next(vtos); + + // invocation counter overflow + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + // don't think we need this + __ get_method(r1); + __ b(profile_method_continue); + } + // Handle overflow of counter and compile method + __ bind(invocation_counter_overflow); + generate_counter_overflow(&continue_after_compile); + } + + return entry_point; +} + +//----------------------------------------------------------------------------- +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + // Entry point in previous activation (i.e., if the caller was + // interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // Restore sp to interpreter_frame_last_sp even though we are going + // to empty the expression stack for the exception processing. + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + // r0: exception + // r3: return address/pc that threw exception + __ restore_bcp(); // rbcp points to call/send + __ restore_locals(); + __ restore_constant_pool_cache(); + __ reinit_heapbase(); // restore rheapbase as heapbase. + __ get_dispatch(); + +#ifndef PRODUCT + // tell the simulator that the caller method has been reentered + if (NotifySimulator) { + __ get_method(rmethod); + __ notify(Assembler::method_reentry); + } +#endif + // Entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + // If we came here via a NullPointerException on the receiver of a + // method, rmethod may be corrupt. + __ get_method(rmethod); + // expression stack is undefined here + // r0: exception + // rbcp: exception bcp + __ verify_oop(r0); + __ mov(c_rarg1, r0); + + // expression stack must be empty before entering the VM in case of + // an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + __ call_VM(r3, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::exception_handler_for_exception), + c_rarg1); + + // Calculate stack limit + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); + __ andr(sp, rscratch1, -16); + + // r0: exception handler entry point + // r3: preserved exception oop + // rbcp: bcp for exception handler + __ push_ptr(r3); // push exception which is now the only value on the stack + __ br(r0); // jump to exception handler (may be _remove_activation_entry!) + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + __ empty_expression_stack(); + // Set the popframe_processing bit in pending_popframe_condition + // indicating that we are currently handling popframe, so that + // call_VMs that may happen later do not trigger new popframe + // handling cycles. + __ ldrw(r3, Address(rthread, JavaThread::popframe_condition_offset())); + __ orr(r3, r3, JavaThread::popframe_processing_bit); + __ strw(r3, Address(rthread, JavaThread::popframe_condition_offset())); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ ldr(c_rarg1, Address(rfp, frame::return_addr_offset * wordSize)); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + InterpreterRuntime::interpreter_contains), c_rarg1); + __ cbnz(r0, caller_not_deoptimized); + + // Compute size of arguments for saving when returning to + // deoptimized caller + __ get_method(r0); + __ ldr(r0, Address(r0, Method::const_offset())); + __ load_unsigned_short(r0, Address(r0, in_bytes(ConstMethod:: + size_of_parameters_offset()))); + __ lsl(r0, r0, Interpreter::logStackElementSize); + __ restore_locals(); // XXX do we need this? + __ sub(rlocals, rlocals, r0); + __ add(rlocals, rlocals, wordSize); + // Save these arguments + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + Deoptimization:: + popframe_preserve_args), + rthread, r0, rlocals); + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Inform deoptimization that it is responsible for restoring + // these arguments + __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit); + __ strw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); + + // Continue in deoptimization handler + __ ret(lr); + + __ bind(caller_not_deoptimized); + } + + __ remove_activation(vtos, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false, + /* notify_jvmdi */ false); + + // Restore the last_sp and null it out + __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); + + __ restore_bcp(); + __ restore_locals(); + __ restore_constant_pool_cache(); + __ get_method(rmethod); + + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + + // Clear the popframe condition flag + __ strw(zr, Address(rthread, JavaThread::popframe_condition_offset())); + assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); + +#if INCLUDE_JVMTI + { + Label L_done; + + __ ldrb(rscratch1, Address(rbcp, 0)); + __ cmpw(r1, Bytecodes::_invokestatic); + __ br(Assembler::EQ, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ ldr(c_rarg0, Address(rlocals, 0)); + __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp); + + __ cbz(r0, L_done); + + __ str(r0, Address(esp, 0)); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Restore machine SP + __ ldr(rscratch1, Address(rmethod, Method::const_offset())); + __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); + __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); + __ ldr(rscratch2, + Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); + __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); + __ andr(sp, rscratch1, -16); + + __ dispatch_next(vtos); + // end of PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence + __ pop_ptr(r0); + __ str(r0, Address(rthread, JavaThread::vm_result_offset())); + // remove the activation (without doing throws on illegalMonitorExceptions) + __ remove_activation(vtos, false, true, false); + // restore exception + // restore exception + __ get_vm_result(r0, rthread); + + // In between activations - previous activation type unknown yet + // compute continuation point - the continuation point expects the + // following registers set up: + // + // r0: exception + // lr: return address/pc that threw exception + // rsp: expression stack of caller + // rfp: fp of caller + // FIXME: There's no point saving LR here because VM calls don't trash it + __ stp(r0, lr, Address(__ pre(sp, -2 * wordSize))); // save exception & return address + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, + SharedRuntime::exception_handler_for_return_address), + rthread, lr); + __ mov(r1, r0); // save exception handler + __ ldp(r0, lr, Address(__ post(sp, 2 * wordSize))); // restore exception & return address + // We might be returning to a deopt handler that expects r3 to + // contain the exception pc + __ mov(r3, lr); + // Note that an "issuing PC" is actually the next PC after the call + __ br(r1); // jump to exception + // handler of caller +} + + +// +// JVMTI ForceEarlyReturn support +// +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + + __ restore_bcp(); + __ restore_locals(); + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); + Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); + __ str(zr, cond_addr); + + __ remove_activation(state, + false, /* throw_monitor_exception */ + false, /* install_monitor_exception */ + true); /* notify_jvmdi */ + __ ret(lr); + + return entry; +} // end of ForceEarlyReturn support + + + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ b(L); + fep = __ pc(); __ push_f(); __ b(L); + dep = __ pc(); __ push_d(); __ b(L); + lep = __ pc(); __ push_l(); __ b(L); + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push(lr); + __ push(state); + __ push(RegSet::range(r0, r15), sp); + __ mov(c_rarg2, r0); // Pass itos + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), + c_rarg1, c_rarg2, c_rarg3); + __ pop(RegSet::range(r0, r15), sp); + __ pop(state); + __ pop(lr); + __ ret(lr); // return from result handler + + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + Register rscratch3 = r0; + __ push(rscratch1); + __ push(rscratch2); + __ push(rscratch3); + Label L; + __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); + __ bind(L); + __ ldxr(rscratch1, rscratch2); + __ add(rscratch1, rscratch1, 1); + __ stxr(rscratch3, rscratch1, rscratch2); + __ cbnzw(rscratch3, L); + __ pop(rscratch3); + __ pop(rscratch2); + __ pop(rscratch1); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, + "entry must have been generated"); + __ bl(Interpreter::trace_code(t->tos_in())); + __ reinit_heapbase(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + __ push(rscratch1); + __ mov(rscratch1, (address) &BytecodeCounter::_counter_value); + __ ldr(rscratch1, Address(rscratch1)); + __ mov(rscratch2, StopInterpreterAt); + __ cmpw(rscratch1, rscratch2); + __ br(Assembler::NE, L); + __ brk(0); + __ bind(L); + __ pop(rscratch1); +} + +#ifdef BUILTIN_SIM + +#include +#include + +extern "C" { + static int PAGESIZE = getpagesize(); + int is_mapped_address(u_int64_t address) + { + address = (address & ~((u_int64_t)PAGESIZE - 1)); + if (msync((void *)address, PAGESIZE, MS_ASYNC) == 0) { + return true; + } + if (errno != ENOMEM) { + return true; + } + return false; + } + + void bccheck1(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode) + { + if (method != 0) { + method[0] = '\0'; + } + if (bcidx != 0) { + *bcidx = -2; + } + if (decode != 0) { + decode[0] = 0; + } + + if (framesize != 0) { + *framesize = -1; + } + + if (Interpreter::contains((address)pc)) { + AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck); + Method* meth; + address bcp; + if (fp) { +#define FRAME_SLOT_METHOD 3 +#define FRAME_SLOT_BCP 7 + meth = (Method*)sim->getMemory()->loadU64(fp - (FRAME_SLOT_METHOD << 3)); + bcp = (address)sim->getMemory()->loadU64(fp - (FRAME_SLOT_BCP << 3)); +#undef FRAME_SLOT_METHOD +#undef FRAME_SLOT_BCP + } else { + meth = (Method*)sim->getCPUState().xreg(RMETHOD, 0); + bcp = (address)sim->getCPUState().xreg(RBCP, 0); + } + if (meth->is_native()) { + return; + } + if(method && meth->is_method()) { + ResourceMark rm; + method[0] = 'I'; + method[1] = ' '; + meth->name_and_sig_as_C_string(method + 2, 398); + } + if (bcidx) { + if (meth->contains(bcp)) { + *bcidx = meth->bci_from(bcp); + } else { + *bcidx = -2; + } + } + if (decode) { + if (!BytecodeTracer::closure()) { + BytecodeTracer::set_closure(BytecodeTracer::std_closure()); + } + stringStream str(decode, 400); + BytecodeTracer::trace(meth, bcp, &str); + } + } else { + if (method) { + CodeBlob *cb = CodeCache::find_blob((address)pc); + if (cb != NULL) { + if (cb->is_nmethod()) { + ResourceMark rm; + nmethod* nm = (nmethod*)cb; + method[0] = 'C'; + method[1] = ' '; + nm->method()->name_and_sig_as_C_string(method + 2, 398); + } else if (cb->is_adapter_blob()) { + strcpy(method, "B adapter blob"); + } else if (cb->is_runtime_stub()) { + strcpy(method, "B runtime stub"); + } else if (cb->is_exception_stub()) { + strcpy(method, "B exception stub"); + } else if (cb->is_deoptimization_stub()) { + strcpy(method, "B deoptimization stub"); + } else if (cb->is_safepoint_stub()) { + strcpy(method, "B safepoint stub"); + } else if (cb->is_uncommon_trap_stub()) { + strcpy(method, "B uncommon trap stub"); + } else if (cb->contains((address)StubRoutines::call_stub())) { + strcpy(method, "B call stub"); + } else { + strcpy(method, "B unknown blob : "); + strcat(method, cb->name()); + } + if (framesize != NULL) { + *framesize = cb->frame_size(); + } + } + } + } + } + + + JNIEXPORT void bccheck(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode) + { + bccheck1(pc, fp, method, bcidx, framesize, decode); + } +} + +#endif // BUILTIN_SIM +#endif // !PRODUCT +#endif // ! CC_INTERP diff --git a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp index fb339033869..314af0b87ab 100644 --- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp @@ -24,238 +24,12 @@ */ #include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" -#include "interpreter/interpreterGenerator.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/templateTable.hpp" -#include "interpreter/bytecodeTracer.hpp" -#include "oops/arrayOop.hpp" -#include "oops/methodData.hpp" +#include "oops/constMethod.hpp" #include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "prims/jvmtiExport.hpp" -#include "prims/jvmtiThreadState.hpp" -#include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/synchronizer.hpp" -#include "runtime/timer.hpp" -#include "runtime/vframeArray.hpp" #include "utilities/debug.hpp" -#include - -#ifndef PRODUCT -#include "oops/method.hpp" -#endif // !PRODUCT - -#ifdef BUILTIN_SIM -#include "../../../../../../simulator/simulator.hpp" -#endif - -#define __ _masm-> - -#ifndef CC_INTERP - -//----------------------------------------------------------------------------- - -extern "C" void entry(CodeBuffer*); - -//----------------------------------------------------------------------------- - -address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { - address entry = __ pc(); - -#ifdef ASSERT - { - Label L; - __ ldr(rscratch1, Address(rfp, - frame::interpreter_frame_monitor_block_top_offset * - wordSize)); - __ mov(rscratch2, sp); - __ cmp(rscratch1, rscratch2); // maximal rsp for current rfp (stack - // grows negative) - __ br(Assembler::HS, L); // check if frame is complete - __ stop ("interpreter frame not set up"); - __ bind(L); - } -#endif // ASSERT - // Restore bcp under the assumption that the current frame is still - // interpreted - __ restore_bcp(); - - // expression stack must be empty before entering the VM if an - // exception happened - __ empty_expression_stack(); - // throw exception - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_StackOverflowError)); - return entry; -} - -address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( - const char* name) { - address entry = __ pc(); - // expression stack must be empty before entering the VM if an - // exception happened - __ empty_expression_stack(); - // setup parameters - // ??? convention: expect aberrant index in register r1 - __ movw(c_rarg2, r1); - __ mov(c_rarg1, (address)name); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime:: - throw_ArrayIndexOutOfBoundsException), - c_rarg1, c_rarg2); - return entry; -} - -address TemplateInterpreterGenerator::generate_ClassCastException_handler() { - address entry = __ pc(); - - // object is at TOS - __ pop(c_rarg1); - - // expression stack must be empty before entering the VM if an - // exception happened - __ empty_expression_stack(); - - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime:: - throw_ClassCastException), - c_rarg1); - return entry; -} - -address TemplateInterpreterGenerator::generate_exception_handler_common( - const char* name, const char* message, bool pass_oop) { - assert(!pass_oop || message == NULL, "either oop or message but not both"); - address entry = __ pc(); - if (pass_oop) { - // object is at TOS - __ pop(c_rarg2); - } - // expression stack must be empty before entering the VM if an - // exception happened - __ empty_expression_stack(); - // setup parameters - __ lea(c_rarg1, Address((address)name)); - if (pass_oop) { - __ call_VM(r0, CAST_FROM_FN_PTR(address, - InterpreterRuntime:: - create_klass_exception), - c_rarg1, c_rarg2); - } else { - // kind of lame ExternalAddress can't take NULL because - // external_word_Relocation will assert. - if (message != NULL) { - __ lea(c_rarg2, Address((address)message)); - } else { - __ mov(c_rarg2, NULL_WORD); - } - __ call_VM(r0, - CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), - c_rarg1, c_rarg2); - } - // throw exception - __ b(address(Interpreter::throw_exception_entry())); - return entry; -} - -address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { - address entry = __ pc(); - // NULL last_sp until next java call - __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ dispatch_next(state); - return entry; -} - -address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { - address entry = __ pc(); - - // Restore stack bottom in case i2c adjusted stack - __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - // and NULL it as marker that esp is now tos until next java call - __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ restore_bcp(); - __ restore_locals(); - __ restore_constant_pool_cache(); - __ get_method(rmethod); - - // Pop N words from the stack - __ get_cache_and_index_at_bcp(r1, r2, 1, index_size); - __ ldr(r1, Address(r1, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); - __ andr(r1, r1, ConstantPoolCacheEntry::parameter_size_mask); - - __ add(esp, esp, r1, Assembler::LSL, 3); - - // Restore machine SP - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); - __ ldr(rscratch2, - Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); - __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); - __ andr(sp, rscratch1, -16); - -#ifndef PRODUCT - // tell the simulator that the method has been reentered - if (NotifySimulator) { - __ notify(Assembler::method_reentry); - } -#endif - __ get_dispatch(); - __ dispatch_next(state, step); - - return entry; -} - -address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, - int step) { - address entry = __ pc(); - __ restore_bcp(); - __ restore_locals(); - __ restore_constant_pool_cache(); - __ get_method(rmethod); - - // handle exceptions - { - Label L; - __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); - __ cbz(rscratch1, L); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - - __ get_dispatch(); - - // Calculate stack limit - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); - __ ldr(rscratch2, - Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); - __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); - __ andr(sp, rscratch1, -16); - - // Restore expression stack pointer - __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - // NULL last_sp until next java call - __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - - __ dispatch_next(state, step); - return entry; -} +#include "utilities/macros.hpp" int AbstractInterpreter::BasicType_as_index(BasicType type) { @@ -279,1195 +53,6 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) { return i; } - -address TemplateInterpreterGenerator::generate_result_handler_for( - BasicType type) { - address entry = __ pc(); - switch (type) { - case T_BOOLEAN: __ uxtb(r0, r0); break; - case T_CHAR : __ uxth(r0, r0); break; - case T_BYTE : __ sxtb(r0, r0); break; - case T_SHORT : __ sxth(r0, r0); break; - case T_INT : __ uxtw(r0, r0); break; // FIXME: We almost certainly don't need this - case T_LONG : /* nothing to do */ break; - case T_VOID : /* nothing to do */ break; - case T_FLOAT : /* nothing to do */ break; - case T_DOUBLE : /* nothing to do */ break; - case T_OBJECT : - // retrieve result from frame - __ ldr(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); - // and verify it - __ verify_oop(r0); - break; - default : ShouldNotReachHere(); - } - __ ret(lr); // return from result handler - return entry; -} - -address TemplateInterpreterGenerator::generate_safept_entry_for( - TosState state, - address runtime_entry) { - address entry = __ pc(); - __ push(state); - __ call_VM(noreg, runtime_entry); - __ membar(Assembler::AnyAny); - __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); - return entry; -} - -// Helpers for commoning out cases in the various type of method entries. -// - - -// increment invocation count & check for overflow -// -// Note: checking for negative value instead of overflow -// so we have a 'sticky' overflow test -// -// rmethod: method -// -void InterpreterGenerator::generate_counter_incr( - Label* overflow, - Label* profile_method, - Label* profile_method_continue) { - Label done; - // Note: In tiered we increment either counters in Method* or in MDO depending if we're profiling or not. - if (TieredCompilation) { - int increment = InvocationCounter::count_increment; - Label no_mdo; - if (ProfileInterpreter) { - // Are we profiling? - __ ldr(r0, Address(rmethod, Method::method_data_offset())); - __ cbz(r0, no_mdo); - // Increment counter in the MDO - const Address mdo_invocation_counter(r0, in_bytes(MethodData::invocation_counter_offset()) + - in_bytes(InvocationCounter::counter_offset())); - const Address mask(r0, in_bytes(MethodData::invoke_mask_offset())); - __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rscratch1, rscratch2, false, Assembler::EQ, overflow); - __ b(done); - } - __ bind(no_mdo); - // Increment counter in MethodCounters - const Address invocation_counter(rscratch2, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); - __ get_method_counters(rmethod, rscratch2, done); - const Address mask(rscratch2, in_bytes(MethodCounters::invoke_mask_offset())); - __ increment_mask_and_jump(invocation_counter, increment, mask, rscratch1, r1, false, Assembler::EQ, overflow); - __ bind(done); - } else { // not TieredCompilation - const Address backedge_counter(rscratch2, - MethodCounters::backedge_counter_offset() + - InvocationCounter::counter_offset()); - const Address invocation_counter(rscratch2, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); - - __ get_method_counters(rmethod, rscratch2, done); - - if (ProfileInterpreter) { // %%% Merge this into MethodData* - __ ldrw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); - __ addw(r1, r1, 1); - __ strw(r1, Address(rscratch2, MethodCounters::interpreter_invocation_counter_offset())); - } - // Update standard invocation counters - __ ldrw(r1, invocation_counter); - __ ldrw(r0, backedge_counter); - - __ addw(r1, r1, InvocationCounter::count_increment); - __ andw(r0, r0, InvocationCounter::count_mask_value); - - __ strw(r1, invocation_counter); - __ addw(r0, r0, r1); // add both counters - - // profile_method is non-null only for interpreted method so - // profile_method != NULL == !native_call - - if (ProfileInterpreter && profile_method != NULL) { - // Test to see if we should create a method data oop - __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); - __ ldrw(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); - __ cmpw(r0, rscratch2); - __ br(Assembler::LT, *profile_method_continue); - - // if no method data exists, go to profile_method - __ test_method_data_pointer(r0, *profile_method); - } - - { - __ ldr(rscratch2, Address(rmethod, Method::method_counters_offset())); - __ ldrw(rscratch2, Address(rscratch2, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); - __ cmpw(r0, rscratch2); - __ br(Assembler::HS, *overflow); - } - __ bind(done); - } -} - -void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { - - // Asm interpreter on entry - // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] - // Everything as it was on entry - - // InterpreterRuntime::frequency_counter_overflow takes two - // arguments, the first (thread) is passed by call_VM, the second - // indicates if the counter overflow occurs at a backwards branch - // (NULL bcp). We pass zero for it. The call returns the address - // of the verified entry point for the method or NULL if the - // compilation did not complete (either went background or bailed - // out). - __ mov(c_rarg1, 0); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::frequency_counter_overflow), - c_rarg1); - - __ b(*do_continue); -} - -// See if we've got enough room on the stack for locals plus overhead. -// The expression stack grows down incrementally, so the normal guard -// page mechanism will work for that. -// -// NOTE: Since the additional locals are also always pushed (wasn't -// obvious in generate_method_entry) so the guard should work for them -// too. -// -// Args: -// r3: number of additional locals this frame needs (what we must check) -// rmethod: Method* -// -// Kills: -// r0 -void InterpreterGenerator::generate_stack_overflow_check(void) { - - // monitor entry size: see picture of stack set - // (generate_method_entry) and frame_amd64.hpp - const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; - - // total overhead size: entry_size + (saved rbp through expr stack - // bottom). be sure to change this if you add/subtract anything - // to/from the overhead area - const int overhead_size = - -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size; - - const int page_size = os::vm_page_size(); - - Label after_frame_check; - - // see if the frame is greater than one page in size. If so, - // then we need to verify there is enough stack space remaining - // for the additional locals. - // - // Note that we use SUBS rather than CMP here because the immediate - // field of this instruction may overflow. SUBS can cope with this - // because it is a macro that will expand to some number of MOV - // instructions and a register operation. - __ subs(rscratch1, r3, (page_size - overhead_size) / Interpreter::stackElementSize); - __ br(Assembler::LS, after_frame_check); - - // compute rsp as if this were going to be the last frame on - // the stack before the red zone - - const Address stack_base(rthread, Thread::stack_base_offset()); - const Address stack_size(rthread, Thread::stack_size_offset()); - - // locals + overhead, in bytes - __ mov(r0, overhead_size); - __ add(r0, r0, r3, Assembler::LSL, Interpreter::logStackElementSize); // 2 slots per parameter. - - __ ldr(rscratch1, stack_base); - __ ldr(rscratch2, stack_size); - -#ifdef ASSERT - Label stack_base_okay, stack_size_okay; - // verify that thread stack base is non-zero - __ cbnz(rscratch1, stack_base_okay); - __ stop("stack base is zero"); - __ bind(stack_base_okay); - // verify that thread stack size is non-zero - __ cbnz(rscratch2, stack_size_okay); - __ stop("stack size is zero"); - __ bind(stack_size_okay); -#endif - - // Add stack base to locals and subtract stack size - __ sub(rscratch1, rscratch1, rscratch2); // Stack limit - __ add(r0, r0, rscratch1); - - // Use the maximum number of pages we might bang. - const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages : - (StackRedPages+StackYellowPages); - - // add in the red and yellow zone sizes - __ add(r0, r0, max_pages * page_size * 2); - - // check against the current stack bottom - __ cmp(sp, r0); - __ br(Assembler::HI, after_frame_check); - - // Remove the incoming args, peeling the machine SP back to where it - // was in the caller. This is not strictly necessary, but unless we - // do so the stack frame may have a garbage FP; this ensures a - // correct call stack that we can always unwind. The ANDR should be - // unnecessary because the sender SP in r13 is always aligned, but - // it doesn't hurt. - __ andr(sp, r13, -16); - - // Note: the restored frame is not necessarily interpreted. - // Use the shared runtime version of the StackOverflowError. - assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); - __ far_jump(RuntimeAddress(StubRoutines::throw_StackOverflowError_entry())); - - // all done with frame size check - __ bind(after_frame_check); -} - -// Allocate monitor and lock method (asm interpreter) -// -// Args: -// rmethod: Method* -// rlocals: locals -// -// Kills: -// r0 -// c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs) -// rscratch1, rscratch2 (scratch regs) -void TemplateInterpreterGenerator::lock_method() { - // synchronize method - const Address access_flags(rmethod, Method::access_flags_offset()); - const Address monitor_block_top( - rfp, - frame::interpreter_frame_monitor_block_top_offset * wordSize); - const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; - -#ifdef ASSERT - { - Label L; - __ ldrw(r0, access_flags); - __ tst(r0, JVM_ACC_SYNCHRONIZED); - __ br(Assembler::NE, L); - __ stop("method doesn't need synchronization"); - __ bind(L); - } -#endif // ASSERT - - // get synchronization object - { - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - Label done; - __ ldrw(r0, access_flags); - __ tst(r0, JVM_ACC_STATIC); - // get receiver (assume this is frequent case) - __ ldr(r0, Address(rlocals, Interpreter::local_offset_in_bytes(0))); - __ br(Assembler::EQ, done); - __ ldr(r0, Address(rmethod, Method::const_offset())); - __ ldr(r0, Address(r0, ConstMethod::constants_offset())); - __ ldr(r0, Address(r0, - ConstantPool::pool_holder_offset_in_bytes())); - __ ldr(r0, Address(r0, mirror_offset)); - -#ifdef ASSERT - { - Label L; - __ cbnz(r0, L); - __ stop("synchronization object is NULL"); - __ bind(L); - } -#endif // ASSERT - - __ bind(done); - } - - // add space for monitor & lock - __ sub(sp, sp, entry_size); // add space for a monitor entry - __ sub(esp, esp, entry_size); - __ mov(rscratch1, esp); - __ str(rscratch1, monitor_block_top); // set new monitor block top - // store object - __ str(r0, Address(esp, BasicObjectLock::obj_offset_in_bytes())); - __ mov(c_rarg1, esp); // object address - __ lock_object(c_rarg1); -} - -// Generate a fixed interpreter frame. This is identical setup for -// interpreted methods and for native methods hence the shared code. -// -// Args: -// lr: return address -// rmethod: Method* -// rlocals: pointer to locals -// rcpool: cp cache -// stack_pointer: previous sp -void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { - // initialize fixed part of activation frame - if (native_call) { - __ sub(esp, sp, 12 * wordSize); - __ mov(rbcp, zr); - __ stp(esp, zr, Address(__ pre(sp, -12 * wordSize))); - // add 2 zero-initialized slots for native calls - __ stp(zr, zr, Address(sp, 10 * wordSize)); - } else { - __ sub(esp, sp, 10 * wordSize); - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); // get ConstMethod - __ add(rbcp, rscratch1, in_bytes(ConstMethod::codes_offset())); // get codebase - __ stp(esp, rbcp, Address(__ pre(sp, -10 * wordSize))); - } - - if (ProfileInterpreter) { - Label method_data_continue; - __ ldr(rscratch1, Address(rmethod, Method::method_data_offset())); - __ cbz(rscratch1, method_data_continue); - __ lea(rscratch1, Address(rscratch1, in_bytes(MethodData::data_offset()))); - __ bind(method_data_continue); - __ stp(rscratch1, rmethod, Address(sp, 4 * wordSize)); // save Method* and mdp (method data pointer) - } else { - __ stp(zr, rmethod, Address(sp, 4 * wordSize)); // save Method* (no mdp) - } - - __ ldr(rcpool, Address(rmethod, Method::const_offset())); - __ ldr(rcpool, Address(rcpool, ConstMethod::constants_offset())); - __ ldr(rcpool, Address(rcpool, ConstantPool::cache_offset_in_bytes())); - __ stp(rlocals, rcpool, Address(sp, 2 * wordSize)); - - __ stp(rfp, lr, Address(sp, 8 * wordSize)); - __ lea(rfp, Address(sp, 8 * wordSize)); - - // set sender sp - // leave last_sp as null - __ stp(zr, r13, Address(sp, 6 * wordSize)); - - // Move SP out of the way - if (! native_call) { - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 2); - __ sub(rscratch1, sp, rscratch1, ext::uxtw, 3); - __ andr(sp, rscratch1, -16); - } -} - -// End of helpers - -// Various method entries -//------------------------------------------------------------------------------------------------------------------------ -// -// - -// Method entry for java.lang.ref.Reference.get. -address InterpreterGenerator::generate_Reference_get_entry(void) { -#if INCLUDE_ALL_GCS - // Code: _aload_0, _getfield, _areturn - // parameter size = 1 - // - // The code that gets generated by this routine is split into 2 parts: - // 1. The "intrinsified" code for G1 (or any SATB based GC), - // 2. The slow path - which is an expansion of the regular method entry. - // - // Notes:- - // * In the G1 code we do not check whether we need to block for - // a safepoint. If G1 is enabled then we must execute the specialized - // code for Reference.get (except when the Reference object is null) - // so that we can log the value in the referent field with an SATB - // update buffer. - // If the code for the getfield template is modified so that the - // G1 pre-barrier code is executed when the current method is - // Reference.get() then going through the normal method entry - // will be fine. - // * The G1 code can, however, check the receiver object (the instance - // of java.lang.Reference) and jump to the slow path if null. If the - // Reference object is null then we obviously cannot fetch the referent - // and so we don't need to call the G1 pre-barrier. Thus we can use the - // regular method entry code to generate the NPE. - // - // This code is based on generate_accessor_enty. - // - // rmethod: Method* - // r13: senderSP must preserve for slow path, set SP to it on fast path - - address entry = __ pc(); - - const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); - - if (UseG1GC) { - Label slow_path; - const Register local_0 = c_rarg0; - // Check if local 0 != NULL - // If the receiver is null then it is OK to jump to the slow path. - __ ldr(local_0, Address(esp, 0)); - __ cbz(local_0, slow_path); - - - // Load the value of the referent field. - const Address field_address(local_0, referent_offset); - __ load_heap_oop(local_0, field_address); - - // Generate the G1 pre-barrier code to log the value of - // the referent field in an SATB buffer. - __ enter(); // g1_write may call runtime - __ g1_write_barrier_pre(noreg /* obj */, - local_0 /* pre_val */, - rthread /* thread */, - rscratch2 /* tmp */, - true /* tosca_live */, - true /* expand_call */); - __ leave(); - // areturn - __ andr(sp, r13, -16); // done with stack - __ ret(lr); - - // generate a vanilla interpreter entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); - return entry; - } -#endif // INCLUDE_ALL_GCS - - // If G1 is not enabled then attempt to go through the accessor entry point - // Reference.get is an accessor - return generate_accessor_entry(); -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address InterpreterGenerator::generate_CRC32_update_entry() { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rmethod: Method* - // r13: senderSP must preserved for slow path - // esp: args - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - unsigned long offset; - __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); - __ ldrw(rscratch1, Address(rscratch1, offset)); - assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); - __ cbnz(rscratch1, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = c_rarg0; // crc - const Register val = c_rarg1; // source java byte value - const Register tbl = c_rarg2; // scratch - - // Arguments are reversed on java expression stack - __ ldrw(val, Address(esp, 0)); // byte value - __ ldrw(crc, Address(esp, wordSize)); // Initial CRC - - __ adrp(tbl, ExternalAddress(StubRoutines::crc_table_addr()), offset); - __ add(tbl, tbl, offset); - - __ ornw(crc, zr, crc); // ~crc - __ update_byte_crc32(crc, val, tbl); - __ ornw(crc, zr, crc); // ~crc - - // result in c_rarg0 - - __ andr(sp, r13, -16); - __ ret(lr); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rmethod,: Method* - // r13: senderSP must preserved for slow path - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - unsigned long offset; - __ adrp(rscratch1, ExternalAddress(SafepointSynchronize::address_of_state()), offset); - __ ldrw(rscratch1, Address(rscratch1, offset)); - assert(SafepointSynchronize::_not_synchronized == 0, "rewrite this code"); - __ cbnz(rscratch1, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = c_rarg0; // crc - const Register buf = c_rarg1; // source java byte array address - const Register len = c_rarg2; // length - const Register off = len; // offset (never overlaps with 'len') - - // Arguments are reversed on java expression stack - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ ldr(buf, Address(esp, 2*wordSize)); // long buf - __ ldrw(off, Address(esp, wordSize)); // offset - __ add(buf, buf, off); // + offset - __ ldrw(crc, Address(esp, 4*wordSize)); // Initial CRC - } else { - __ ldr(buf, Address(esp, 2*wordSize)); // byte[] array - __ add(buf, buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ ldrw(off, Address(esp, wordSize)); // offset - __ add(buf, buf, off); // + offset - __ ldrw(crc, Address(esp, 3*wordSize)); // Initial CRC - } - // Can now load 'len' since we're finished with 'off' - __ ldrw(len, Address(esp, 0x0)); // Length - - __ andr(sp, r13, -16); // Restore the caller's SP - - // We are frameless so we can just jump to the stub. - __ b(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32())); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) { - // Bang each page in the shadow zone. We can't assume it's been done for - // an interpreter frame with greater than a page of locals, so each page - // needs to be checked. Only true for non-native. - if (UseStackBanging) { - const int start_page = native_call ? StackShadowPages : 1; - const int page_size = os::vm_page_size(); - for (int pages = start_page; pages <= StackShadowPages ; pages++) { - __ sub(rscratch2, sp, pages*page_size); - __ str(zr, Address(rscratch2)); - } - } -} - - -// Interpreter stub for calling a native method. (asm interpreter) -// This sets up a somewhat different looking stack for calling the -// native method than the typical interpreter frame setup. -address InterpreterGenerator::generate_native_entry(bool synchronized) { - // determine code generation flags - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // r1: Method* - // rscratch1: sender sp - - address entry_point = __ pc(); - - const Address constMethod (rmethod, Method::const_offset()); - const Address access_flags (rmethod, Method::access_flags_offset()); - const Address size_of_parameters(r2, ConstMethod:: - size_of_parameters_offset()); - - // get parameter size (always needed) - __ ldr(r2, constMethod); - __ load_unsigned_short(r2, size_of_parameters); - - // native calls don't need the stack size check since they have no - // expression stack and the arguments are already on the stack and - // we only add a handful of words to the stack - - // rmethod: Method* - // r2: size of parameters - // rscratch1: sender sp - - // for natives the size of locals is zero - - // compute beginning of parameters (rlocals) - __ add(rlocals, esp, r2, ext::uxtx, 3); - __ add(rlocals, rlocals, -wordSize); - - // Pull SP back to minimum size: this avoids holes in the stack - __ andr(sp, esp, -16); - - // initialize fixed part of activation frame - generate_fixed_frame(true); -#ifndef PRODUCT - // tell the simulator that a method has been entered - if (NotifySimulator) { - __ notify(Assembler::method_entry); - } -#endif - - // make sure method is native & not abstract -#ifdef ASSERT - __ ldrw(r0, access_flags); - { - Label L; - __ tst(r0, JVM_ACC_NATIVE); - __ br(Assembler::NE, L); - __ stop("tried to execute non-native method as native"); - __ bind(L); - } - { - Label L; - __ tst(r0, JVM_ACC_ABSTRACT); - __ br(Assembler::EQ, L); - __ stop("tried to execute abstract method in interpreter"); - __ bind(L); - } -#endif - - // Since at this point in the method invocation the exception - // handler would try to exit the monitor of synchronized methods - // which hasn't been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. The remove_activation - // will check this flag. - - const Address do_not_unlock_if_synchronized(rthread, - in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); - __ mov(rscratch2, true); - __ strb(rscratch2, do_not_unlock_if_synchronized); - - // increment invocation count & check for overflow - Label invocation_counter_overflow; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - } - - Label continue_after_compile; - __ bind(continue_after_compile); - - bang_stack_shadow_pages(true); - - // reset the _do_not_unlock_if_synchronized flag - __ strb(zr, do_not_unlock_if_synchronized); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - if (synchronized) { - lock_method(); - } else { - // no synchronization necessary -#ifdef ASSERT - { - Label L; - __ ldrw(r0, access_flags); - __ tst(r0, JVM_ACC_SYNCHRONIZED); - __ br(Assembler::EQ, L); - __ stop("method needs synchronization"); - __ bind(L); - } -#endif - } - - // start execution -#ifdef ASSERT - { - Label L; - const Address monitor_block_top(rfp, - frame::interpreter_frame_monitor_block_top_offset * wordSize); - __ ldr(rscratch1, monitor_block_top); - __ cmp(esp, rscratch1); - __ br(Assembler::EQ, L); - __ stop("broken stack frame setup in interpreter"); - __ bind(L); - } -#endif - - // jvmti support - __ notify_method_entry(); - - // work registers - const Register t = r17; - const Register result_handler = r19; - - // allocate space for parameters - __ ldr(t, Address(rmethod, Method::const_offset())); - __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); - - __ sub(rscratch1, esp, t, ext::uxtx, Interpreter::logStackElementSize); - __ andr(sp, rscratch1, -16); - __ mov(esp, rscratch1); - - // get signature handler - { - Label L; - __ ldr(t, Address(rmethod, Method::signature_handler_offset())); - __ cbnz(t, L); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::prepare_native_call), - rmethod); - __ ldr(t, Address(rmethod, Method::signature_handler_offset())); - __ bind(L); - } - - // call signature handler - assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, - "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::to() == sp, - "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, - "adjust this code"); - - // The generated handlers do not touch rmethod (the method). - // However, large signatures cannot be cached and are generated - // each time here. The slow-path generator can do a GC on return, - // so we must reload it after the call. - __ blr(t); - __ get_method(rmethod); // slow path can do a GC, reload rmethod - - - // result handler is in r0 - // set result handler - __ mov(result_handler, r0); - // pass mirror handle if static call - { - Label L; - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - __ ldrw(t, Address(rmethod, Method::access_flags_offset())); - __ tst(t, JVM_ACC_STATIC); - __ br(Assembler::EQ, L); - // get mirror - __ ldr(t, Address(rmethod, Method::const_offset())); - __ ldr(t, Address(t, ConstMethod::constants_offset())); - __ ldr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes())); - __ ldr(t, Address(t, mirror_offset)); - // copy mirror into activation frame - __ str(t, Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize)); - // pass handle to mirror - __ add(c_rarg1, rfp, frame::interpreter_frame_oop_temp_offset * wordSize); - __ bind(L); - } - - // get native function entry point in r10 - { - Label L; - __ ldr(r10, Address(rmethod, Method::native_function_offset())); - address unsatisfied = (SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); - __ mov(rscratch2, unsatisfied); - __ ldr(rscratch2, rscratch2); - __ cmp(r10, rscratch2); - __ br(Assembler::NE, L); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::prepare_native_call), - rmethod); - __ get_method(rmethod); - __ ldr(r10, Address(rmethod, Method::native_function_offset())); - __ bind(L); - } - - // pass JNIEnv - __ add(c_rarg0, rthread, in_bytes(JavaThread::jni_environment_offset())); - - // It is enough that the pc() points into the right code - // segment. It does not have to be the correct return pc. - __ set_last_Java_frame(esp, rfp, (address)NULL, rscratch1); - - // change thread state -#ifdef ASSERT - { - Label L; - __ ldrw(t, Address(rthread, JavaThread::thread_state_offset())); - __ cmp(t, _thread_in_Java); - __ br(Assembler::EQ, L); - __ stop("Wrong thread state in native stub"); - __ bind(L); - } -#endif - - // Change state to native - __ mov(rscratch1, _thread_in_native); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); - - // Call the native method. - __ blrt(r10, rscratch1); - __ maybe_isb(); - __ get_method(rmethod); - // result potentially in r0 or v0 - - // make room for the pushes we're about to do - __ sub(rscratch1, esp, 4 * wordSize); - __ andr(sp, rscratch1, -16); - - // NOTE: The order of these pushes is known to frame::interpreter_frame_result - // in order to extract the result of a method call. If the order of these - // pushes change or anything else is added to the stack then the code in - // interpreter_frame_result must also change. - __ push(dtos); - __ push(ltos); - - // change thread state - __ mov(rscratch1, _thread_in_native_trans); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); - - if (os::is_MP()) { - if (UseMembar) { - // Force this write out before the read below - __ dsb(Assembler::SY); - } else { - // Write serialization page so VM thread can do a pseudo remote membar. - // We use the current thread pointer to calculate a thread specific - // offset to write to within the page. This minimizes bus traffic - // due to cache line collision. - __ serialize_memory(rthread, rscratch2); - } - } - - // check for safepoint operation in progress and/or pending suspend requests - { - Label Continue; - { - unsigned long offset; - __ adrp(rscratch2, SafepointSynchronize::address_of_state(), offset); - __ ldrw(rscratch2, Address(rscratch2, offset)); - } - assert(SafepointSynchronize::_not_synchronized == 0, - "SafepointSynchronize::_not_synchronized"); - Label L; - __ cbnz(rscratch2, L); - __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset())); - __ cbz(rscratch2, Continue); - __ bind(L); - - // Don't use call_VM as it will see a possible pending exception - // and forward it and never return here preventing us from - // clearing _last_native_pc down below. So we do a runtime call by - // hand. - // - __ mov(c_rarg0, rthread); - __ mov(rscratch2, CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)); - __ blrt(rscratch2, 1, 0, 0); - __ maybe_isb(); - __ get_method(rmethod); - __ reinit_heapbase(); - __ bind(Continue); - } - - // change thread state - __ mov(rscratch1, _thread_in_Java); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); - - // reset_last_Java_frame - __ reset_last_Java_frame(true, true); - - // reset handle block - __ ldr(t, Address(rthread, JavaThread::active_handles_offset())); - __ str(zr, Address(t, JNIHandleBlock::top_offset_in_bytes())); - - // If result is an oop unbox and store it in frame where gc will see it - // and result handler will pick it up - - { - Label no_oop, store_result; - __ adr(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT))); - __ cmp(t, result_handler); - __ br(Assembler::NE, no_oop); - // retrieve result - __ pop(ltos); - __ cbz(r0, store_result); - __ ldr(r0, Address(r0, 0)); - __ bind(store_result); - __ str(r0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize)); - // keep stack depth as expected by pushing oop which will eventually be discarded - __ push(ltos); - __ bind(no_oop); - } - - { - Label no_reguard; - __ lea(rscratch1, Address(rthread, in_bytes(JavaThread::stack_guard_state_offset()))); - __ ldrb(rscratch1, Address(rscratch1)); - __ cmp(rscratch1, JavaThread::stack_guard_yellow_disabled); - __ br(Assembler::NE, no_reguard); - - __ pusha(); // XXX only save smashed registers - __ mov(c_rarg0, rthread); - __ mov(rscratch2, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)); - __ blrt(rscratch2, 0, 0, 0); - __ popa(); // XXX only restore smashed registers - __ bind(no_reguard); - } - - // The method register is junk from after the thread_in_native transition - // until here. Also can't call_VM until the bcp has been - // restored. Need bcp for throwing exception below so get it now. - __ get_method(rmethod); - - // restore bcp to have legal interpreter frame, i.e., bci == 0 <=> - // rbcp == code_base() - __ ldr(rbcp, Address(rmethod, Method::const_offset())); // get ConstMethod* - __ add(rbcp, rbcp, in_bytes(ConstMethod::codes_offset())); // get codebase - // handle exceptions (exception handling will handle unlocking!) - { - Label L; - __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset())); - __ cbz(rscratch1, L); - // Note: At some point we may want to unify this with the code - // used in call_VM_base(); i.e., we should use the - // StubRoutines::forward_exception code. For now this doesn't work - // here because the rsp is not correctly set at this point. - __ MacroAssembler::call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - - // do unlocking if necessary - { - Label L; - __ ldrw(t, Address(rmethod, Method::access_flags_offset())); - __ tst(t, JVM_ACC_SYNCHRONIZED); - __ br(Assembler::EQ, L); - // the code below should be shared with interpreter macro - // assembler implementation - { - Label unlock; - // BasicObjectLock will be first in list, since this is a - // synchronized method. However, need to check that the object - // has not been unlocked by an explicit monitorexit bytecode. - - // monitor expect in c_rarg1 for slow unlock path - __ lea (c_rarg1, Address(rfp, // address of first monitor - (intptr_t)(frame::interpreter_frame_initial_sp_offset * - wordSize - sizeof(BasicObjectLock)))); - - __ ldr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); - __ cbnz(t, unlock); - - // Entry already unlocked, need to throw exception - __ MacroAssembler::call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_illegal_monitor_state_exception)); - __ should_not_reach_here(); - - __ bind(unlock); - __ unlock_object(c_rarg1); - } - __ bind(L); - } - - // jvmti support - // Note: This must happen _after_ handling/throwing any exceptions since - // the exception handler code notifies the runtime of method exits - // too. If this happens before, method entry/exit notifications are - // not properly paired (was bug - gri 11/22/99). - __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); - - // restore potential result in r0:d0, call result handler to - // restore potential result in ST0 & handle result - - __ pop(ltos); - __ pop(dtos); - - __ blr(result_handler); - - // remove activation - __ ldr(esp, Address(rfp, - frame::interpreter_frame_sender_sp_offset * - wordSize)); // get sender sp - // remove frame anchor - __ leave(); - - // resture sender sp - __ mov(sp, esp); - - __ ret(lr); - - if (inc_counter) { - // Handle overflow of counter and compile method - __ bind(invocation_counter_overflow); - generate_counter_overflow(&continue_after_compile); - } - - return entry_point; -} - -// -// Generic interpreted method entry to (asm) interpreter -// -address InterpreterGenerator::generate_normal_entry(bool synchronized) { - // determine code generation flags - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // rscratch1: sender sp - address entry_point = __ pc(); - - const Address constMethod(rmethod, Method::const_offset()); - const Address access_flags(rmethod, Method::access_flags_offset()); - const Address size_of_parameters(r3, - ConstMethod::size_of_parameters_offset()); - const Address size_of_locals(r3, ConstMethod::size_of_locals_offset()); - - // get parameter size (always needed) - // need to load the const method first - __ ldr(r3, constMethod); - __ load_unsigned_short(r2, size_of_parameters); - - // r2: size of parameters - - __ load_unsigned_short(r3, size_of_locals); // get size of locals in words - __ sub(r3, r3, r2); // r3 = no. of additional locals - - // see if we've got enough room on the stack for locals plus overhead. - generate_stack_overflow_check(); - - // compute beginning of parameters (rlocals) - __ add(rlocals, esp, r2, ext::uxtx, 3); - __ sub(rlocals, rlocals, wordSize); - - // Make room for locals - __ sub(rscratch1, esp, r3, ext::uxtx, 3); - __ andr(sp, rscratch1, -16); - - // r3 - # of additional locals - // allocate space for locals - // explicitly initialize locals - { - Label exit, loop; - __ ands(zr, r3, r3); - __ br(Assembler::LE, exit); // do nothing if r3 <= 0 - __ bind(loop); - __ str(zr, Address(__ post(rscratch1, wordSize))); - __ sub(r3, r3, 1); // until everything initialized - __ cbnz(r3, loop); - __ bind(exit); - } - - // And the base dispatch table - __ get_dispatch(); - - // initialize fixed part of activation frame - generate_fixed_frame(false); -#ifndef PRODUCT - // tell the simulator that a method has been entered - if (NotifySimulator) { - __ notify(Assembler::method_entry); - } -#endif - // make sure method is not native & not abstract -#ifdef ASSERT - __ ldrw(r0, access_flags); - { - Label L; - __ tst(r0, JVM_ACC_NATIVE); - __ br(Assembler::EQ, L); - __ stop("tried to execute native method as non-native"); - __ bind(L); - } - { - Label L; - __ tst(r0, JVM_ACC_ABSTRACT); - __ br(Assembler::EQ, L); - __ stop("tried to execute abstract method in interpreter"); - __ bind(L); - } -#endif - - // Since at this point in the method invocation the exception - // handler would try to exit the monitor of synchronized methods - // which hasn't been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. The remove_activation - // will check this flag. - - const Address do_not_unlock_if_synchronized(rthread, - in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); - __ mov(rscratch2, true); - __ strb(rscratch2, do_not_unlock_if_synchronized); - - // increment invocation count & check for overflow - Label invocation_counter_overflow; - Label profile_method; - Label profile_method_continue; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, - &profile_method, - &profile_method_continue); - if (ProfileInterpreter) { - __ bind(profile_method_continue); - } - } - - Label continue_after_compile; - __ bind(continue_after_compile); - - bang_stack_shadow_pages(false); - - // reset the _do_not_unlock_if_synchronized flag - __ strb(zr, do_not_unlock_if_synchronized); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - if (synchronized) { - // Allocate monitor and lock method - lock_method(); - } else { - // no synchronization necessary -#ifdef ASSERT - { - Label L; - __ ldrw(r0, access_flags); - __ tst(r0, JVM_ACC_SYNCHRONIZED); - __ br(Assembler::EQ, L); - __ stop("method needs synchronization"); - __ bind(L); - } -#endif - } - - // start execution -#ifdef ASSERT - { - Label L; - const Address monitor_block_top (rfp, - frame::interpreter_frame_monitor_block_top_offset * wordSize); - __ ldr(rscratch1, monitor_block_top); - __ cmp(esp, rscratch1); - __ br(Assembler::EQ, L); - __ stop("broken stack frame setup in interpreter"); - __ bind(L); - } -#endif - - // jvmti support - __ notify_method_entry(); - - __ dispatch_next(vtos); - - // invocation counter overflow - if (inc_counter) { - if (ProfileInterpreter) { - // We have decided to profile this method in the interpreter - __ bind(profile_method); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); - __ set_method_data_pointer_for_bcp(); - // don't think we need this - __ get_method(r1); - __ b(profile_method_continue); - } - // Handle overflow of counter and compile method - __ bind(invocation_counter_overflow); - generate_counter_overflow(&continue_after_compile); - } - - return entry_point; -} - // These should never be compiled since the interpreter will prefer // the compiled version to the intrinsic version. bool AbstractInterpreter::can_be_compiled(methodHandle m) { @@ -1593,483 +178,3 @@ void AbstractInterpreter::layout_activation(Method* method, *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); } - - -//----------------------------------------------------------------------------- -// Exceptions - -void TemplateInterpreterGenerator::generate_throw_exception() { - // Entry point in previous activation (i.e., if the caller was - // interpreted) - Interpreter::_rethrow_exception_entry = __ pc(); - // Restore sp to interpreter_frame_last_sp even though we are going - // to empty the expression stack for the exception processing. - __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - // r0: exception - // r3: return address/pc that threw exception - __ restore_bcp(); // rbcp points to call/send - __ restore_locals(); - __ restore_constant_pool_cache(); - __ reinit_heapbase(); // restore rheapbase as heapbase. - __ get_dispatch(); - -#ifndef PRODUCT - // tell the simulator that the caller method has been reentered - if (NotifySimulator) { - __ get_method(rmethod); - __ notify(Assembler::method_reentry); - } -#endif - // Entry point for exceptions thrown within interpreter code - Interpreter::_throw_exception_entry = __ pc(); - // If we came here via a NullPointerException on the receiver of a - // method, rmethod may be corrupt. - __ get_method(rmethod); - // expression stack is undefined here - // r0: exception - // rbcp: exception bcp - __ verify_oop(r0); - __ mov(c_rarg1, r0); - - // expression stack must be empty before entering the VM in case of - // an exception - __ empty_expression_stack(); - // find exception handler address and preserve exception oop - __ call_VM(r3, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::exception_handler_for_exception), - c_rarg1); - - // Calculate stack limit - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); - __ ldr(rscratch2, - Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); - __ sub(rscratch1, rscratch2, rscratch1, ext::uxtx, 3); - __ andr(sp, rscratch1, -16); - - // r0: exception handler entry point - // r3: preserved exception oop - // rbcp: bcp for exception handler - __ push_ptr(r3); // push exception which is now the only value on the stack - __ br(r0); // jump to exception handler (may be _remove_activation_entry!) - - // If the exception is not handled in the current frame the frame is - // removed and the exception is rethrown (i.e. exception - // continuation is _rethrow_exception). - // - // Note: At this point the bci is still the bxi for the instruction - // which caused the exception and the expression stack is - // empty. Thus, for any VM calls at this point, GC will find a legal - // oop map (with empty expression stack). - - // - // JVMTI PopFrame support - // - - Interpreter::_remove_activation_preserving_args_entry = __ pc(); - __ empty_expression_stack(); - // Set the popframe_processing bit in pending_popframe_condition - // indicating that we are currently handling popframe, so that - // call_VMs that may happen later do not trigger new popframe - // handling cycles. - __ ldrw(r3, Address(rthread, JavaThread::popframe_condition_offset())); - __ orr(r3, r3, JavaThread::popframe_processing_bit); - __ strw(r3, Address(rthread, JavaThread::popframe_condition_offset())); - - { - // Check to see whether we are returning to a deoptimized frame. - // (The PopFrame call ensures that the caller of the popped frame is - // either interpreted or compiled and deoptimizes it if compiled.) - // In this case, we can't call dispatch_next() after the frame is - // popped, but instead must save the incoming arguments and restore - // them after deoptimization has occurred. - // - // Note that we don't compare the return PC against the - // deoptimization blob's unpack entry because of the presence of - // adapter frames in C2. - Label caller_not_deoptimized; - __ ldr(c_rarg1, Address(rfp, frame::return_addr_offset * wordSize)); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, - InterpreterRuntime::interpreter_contains), c_rarg1); - __ cbnz(r0, caller_not_deoptimized); - - // Compute size of arguments for saving when returning to - // deoptimized caller - __ get_method(r0); - __ ldr(r0, Address(r0, Method::const_offset())); - __ load_unsigned_short(r0, Address(r0, in_bytes(ConstMethod:: - size_of_parameters_offset()))); - __ lsl(r0, r0, Interpreter::logStackElementSize); - __ restore_locals(); // XXX do we need this? - __ sub(rlocals, rlocals, r0); - __ add(rlocals, rlocals, wordSize); - // Save these arguments - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, - Deoptimization:: - popframe_preserve_args), - rthread, r0, rlocals); - - __ remove_activation(vtos, - /* throw_monitor_exception */ false, - /* install_monitor_exception */ false, - /* notify_jvmdi */ false); - - // Inform deoptimization that it is responsible for restoring - // these arguments - __ mov(rscratch1, JavaThread::popframe_force_deopt_reexecution_bit); - __ strw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset())); - - // Continue in deoptimization handler - __ ret(lr); - - __ bind(caller_not_deoptimized); - } - - __ remove_activation(vtos, - /* throw_monitor_exception */ false, - /* install_monitor_exception */ false, - /* notify_jvmdi */ false); - - // Restore the last_sp and null it out - __ ldr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize)); - - __ restore_bcp(); - __ restore_locals(); - __ restore_constant_pool_cache(); - __ get_method(rmethod); - - // The method data pointer was incremented already during - // call profiling. We have to restore the mdp for the current bcp. - if (ProfileInterpreter) { - __ set_method_data_pointer_for_bcp(); - } - - // Clear the popframe condition flag - __ strw(zr, Address(rthread, JavaThread::popframe_condition_offset())); - assert(JavaThread::popframe_inactive == 0, "fix popframe_inactive"); - -#if INCLUDE_JVMTI - { - Label L_done; - - __ ldrb(rscratch1, Address(rbcp, 0)); - __ cmpw(r1, Bytecodes::_invokestatic); - __ br(Assembler::EQ, L_done); - - // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. - // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. - - __ ldr(c_rarg0, Address(rlocals, 0)); - __ call_VM(r0, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), c_rarg0, rmethod, rbcp); - - __ cbz(r0, L_done); - - __ str(r0, Address(esp, 0)); - __ bind(L_done); - } -#endif // INCLUDE_JVMTI - - // Restore machine SP - __ ldr(rscratch1, Address(rmethod, Method::const_offset())); - __ ldrh(rscratch1, Address(rscratch1, ConstMethod::max_stack_offset())); - __ add(rscratch1, rscratch1, frame::interpreter_frame_monitor_size() + 4); - __ ldr(rscratch2, - Address(rfp, frame::interpreter_frame_initial_sp_offset * wordSize)); - __ sub(rscratch1, rscratch2, rscratch1, ext::uxtw, 3); - __ andr(sp, rscratch1, -16); - - __ dispatch_next(vtos); - // end of PopFrame support - - Interpreter::_remove_activation_entry = __ pc(); - - // preserve exception over this code sequence - __ pop_ptr(r0); - __ str(r0, Address(rthread, JavaThread::vm_result_offset())); - // remove the activation (without doing throws on illegalMonitorExceptions) - __ remove_activation(vtos, false, true, false); - // restore exception - // restore exception - __ get_vm_result(r0, rthread); - - // In between activations - previous activation type unknown yet - // compute continuation point - the continuation point expects the - // following registers set up: - // - // r0: exception - // lr: return address/pc that threw exception - // rsp: expression stack of caller - // rfp: fp of caller - // FIXME: There's no point saving LR here because VM calls don't trash it - __ stp(r0, lr, Address(__ pre(sp, -2 * wordSize))); // save exception & return address - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, - SharedRuntime::exception_handler_for_return_address), - rthread, lr); - __ mov(r1, r0); // save exception handler - __ ldp(r0, lr, Address(__ post(sp, 2 * wordSize))); // restore exception & return address - // We might be returning to a deopt handler that expects r3 to - // contain the exception pc - __ mov(r3, lr); - // Note that an "issuing PC" is actually the next PC after the call - __ br(r1); // jump to exception - // handler of caller -} - - -// -// JVMTI ForceEarlyReturn support -// -address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { - address entry = __ pc(); - - __ restore_bcp(); - __ restore_locals(); - __ empty_expression_stack(); - __ load_earlyret_value(state); - - __ ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset())); - Address cond_addr(rscratch1, JvmtiThreadState::earlyret_state_offset()); - - // Clear the earlyret state - assert(JvmtiThreadState::earlyret_inactive == 0, "should be"); - __ str(zr, cond_addr); - - __ remove_activation(state, - false, /* throw_monitor_exception */ - false, /* install_monitor_exception */ - true); /* notify_jvmdi */ - __ ret(lr); - - return entry; -} // end of ForceEarlyReturn support - - - -//----------------------------------------------------------------------------- -// Helper for vtos entry point generation - -void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, - address& bep, - address& cep, - address& sep, - address& aep, - address& iep, - address& lep, - address& fep, - address& dep, - address& vep) { - assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); - Label L; - aep = __ pc(); __ push_ptr(); __ b(L); - fep = __ pc(); __ push_f(); __ b(L); - dep = __ pc(); __ push_d(); __ b(L); - lep = __ pc(); __ push_l(); __ b(L); - bep = cep = sep = - iep = __ pc(); __ push_i(); - vep = __ pc(); - __ bind(L); - generate_and_dispatch(t); -} - -//----------------------------------------------------------------------------- -// Generation of individual instructions - -// helpers for generate_and_dispatch - - -InterpreterGenerator::InterpreterGenerator(StubQueue* code) - : TemplateInterpreterGenerator(code) { - generate_all(); // down here so it can be "virtual" -} - -//----------------------------------------------------------------------------- - -// Non-product code -#ifndef PRODUCT -address TemplateInterpreterGenerator::generate_trace_code(TosState state) { - address entry = __ pc(); - - __ push(lr); - __ push(state); - __ push(RegSet::range(r0, r15), sp); - __ mov(c_rarg2, r0); // Pass itos - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), - c_rarg1, c_rarg2, c_rarg3); - __ pop(RegSet::range(r0, r15), sp); - __ pop(state); - __ pop(lr); - __ ret(lr); // return from result handler - - return entry; -} - -void TemplateInterpreterGenerator::count_bytecode() { - Register rscratch3 = r0; - __ push(rscratch1); - __ push(rscratch2); - __ push(rscratch3); - Label L; - __ mov(rscratch2, (address) &BytecodeCounter::_counter_value); - __ bind(L); - __ ldxr(rscratch1, rscratch2); - __ add(rscratch1, rscratch1, 1); - __ stxr(rscratch3, rscratch1, rscratch2); - __ cbnzw(rscratch3, L); - __ pop(rscratch3); - __ pop(rscratch2); - __ pop(rscratch1); -} - -void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ; } - -void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ; } - - -void TemplateInterpreterGenerator::trace_bytecode(Template* t) { - // Call a little run-time stub to avoid blow-up for each bytecode. - // The run-time runtime saves the right registers, depending on - // the tosca in-state for the given template. - - assert(Interpreter::trace_code(t->tos_in()) != NULL, - "entry must have been generated"); - __ bl(Interpreter::trace_code(t->tos_in())); - __ reinit_heapbase(); -} - - -void TemplateInterpreterGenerator::stop_interpreter_at() { - Label L; - __ push(rscratch1); - __ mov(rscratch1, (address) &BytecodeCounter::_counter_value); - __ ldr(rscratch1, Address(rscratch1)); - __ mov(rscratch2, StopInterpreterAt); - __ cmpw(rscratch1, rscratch2); - __ br(Assembler::NE, L); - __ brk(0); - __ bind(L); - __ pop(rscratch1); -} - -#ifdef BUILTIN_SIM - -#include -#include - -extern "C" { - static int PAGESIZE = getpagesize(); - int is_mapped_address(u_int64_t address) - { - address = (address & ~((u_int64_t)PAGESIZE - 1)); - if (msync((void *)address, PAGESIZE, MS_ASYNC) == 0) { - return true; - } - if (errno != ENOMEM) { - return true; - } - return false; - } - - void bccheck1(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode) - { - if (method != 0) { - method[0] = '\0'; - } - if (bcidx != 0) { - *bcidx = -2; - } - if (decode != 0) { - decode[0] = 0; - } - - if (framesize != 0) { - *framesize = -1; - } - - if (Interpreter::contains((address)pc)) { - AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck); - Method* meth; - address bcp; - if (fp) { -#define FRAME_SLOT_METHOD 3 -#define FRAME_SLOT_BCP 7 - meth = (Method*)sim->getMemory()->loadU64(fp - (FRAME_SLOT_METHOD << 3)); - bcp = (address)sim->getMemory()->loadU64(fp - (FRAME_SLOT_BCP << 3)); -#undef FRAME_SLOT_METHOD -#undef FRAME_SLOT_BCP - } else { - meth = (Method*)sim->getCPUState().xreg(RMETHOD, 0); - bcp = (address)sim->getCPUState().xreg(RBCP, 0); - } - if (meth->is_native()) { - return; - } - if(method && meth->is_method()) { - ResourceMark rm; - method[0] = 'I'; - method[1] = ' '; - meth->name_and_sig_as_C_string(method + 2, 398); - } - if (bcidx) { - if (meth->contains(bcp)) { - *bcidx = meth->bci_from(bcp); - } else { - *bcidx = -2; - } - } - if (decode) { - if (!BytecodeTracer::closure()) { - BytecodeTracer::set_closure(BytecodeTracer::std_closure()); - } - stringStream str(decode, 400); - BytecodeTracer::trace(meth, bcp, &str); - } - } else { - if (method) { - CodeBlob *cb = CodeCache::find_blob((address)pc); - if (cb != NULL) { - if (cb->is_nmethod()) { - ResourceMark rm; - nmethod* nm = (nmethod*)cb; - method[0] = 'C'; - method[1] = ' '; - nm->method()->name_and_sig_as_C_string(method + 2, 398); - } else if (cb->is_adapter_blob()) { - strcpy(method, "B adapter blob"); - } else if (cb->is_runtime_stub()) { - strcpy(method, "B runtime stub"); - } else if (cb->is_exception_stub()) { - strcpy(method, "B exception stub"); - } else if (cb->is_deoptimization_stub()) { - strcpy(method, "B deoptimization stub"); - } else if (cb->is_safepoint_stub()) { - strcpy(method, "B safepoint stub"); - } else if (cb->is_uncommon_trap_stub()) { - strcpy(method, "B uncommon trap stub"); - } else if (cb->contains((address)StubRoutines::call_stub())) { - strcpy(method, "B call stub"); - } else { - strcpy(method, "B unknown blob : "); - strcat(method, cb->name()); - } - if (framesize != NULL) { - *framesize = cb->frame_size(); - } - } - } - } - } - - - JNIEXPORT void bccheck(u_int64_t pc, u_int64_t fp, char *method, int *bcidx, int *framesize, char *decode) - { - bccheck1(pc, fp, method, bcidx, framesize, decode); - } -} - -#endif // BUILTIN_SIM -#endif // !PRODUCT -#endif // ! CC_INTERP diff --git a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp index 0fbf97085ca..280ebd5148b 100644 --- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp @@ -39,7 +39,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -61,26 +60,6 @@ #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") -int AbstractInterpreter::BasicType_as_index(BasicType type) { - int i = 0; - switch (type) { - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : i = 4; break; - case T_LONG : i = 5; break; - case T_VOID : i = 6; break; - case T_FLOAT : i = 7; break; - case T_DOUBLE : i = 8; break; - case T_OBJECT : i = 9; break; - case T_ARRAY : i = 9; break; - default : ShouldNotReachHere(); - } - assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); - return i; -} - address AbstractInterpreterGenerator::generate_slow_signature_handler() { // Slow_signature handler that respects the PPC C calling conventions. // @@ -579,18 +558,3 @@ address InterpreterGenerator::generate_Reference_get_entry(void) { return NULL; } - -void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { - // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in - // the days we had adapter frames. When we deoptimize a situation where a - // compiled caller calls a compiled caller will have registers it expects - // to survive the call to the callee. If we deoptimize the callee the only - // way we can restore these registers is to have the oldest interpreter - // frame that we create restore these values. That is what this routine - // will accomplish. - - // At the moment we have modified c2 to not have any callee save registers - // so this problem does not exist and this routine is just a place holder. - - assert(f->is_interpreted_frame(), "must be interpreted"); -} diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp new file mode 100644 index 00000000000..1d99393562f --- /dev/null +++ b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp @@ -0,0 +1,1802 @@ +/* + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#ifndef CC_INTERP +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +#undef __ +#define __ _masm-> + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") + +//----------------------------------------------------------------------------- + +// Actually we should never reach here since we do stack overflow checks before pushing any frame. +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + __ unimplemented("generate_StackOverflowError_handler"); + return entry; +} + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) { + address entry = __ pc(); + __ empty_expression_stack(); + __ load_const_optimized(R4_ARG2, (address) name); + // Index is in R17_tos. + __ mr(R5_ARG3, R17_tos); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException)); + return entry; +} + +#if 0 +// Call special ClassCastException constructor taking object to cast +// and target class as arguments. +address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler() { + address entry = __ pc(); + + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + + // Thread will be loaded to R3_ARG1. + // Target class oop is in register R5_ARG3 by convention! + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose), R17_tos, R5_ARG3); + // Above call must not return here since exception pending. + DEBUG_ONLY(__ should_not_reach_here();) + return entry; +} +#endif + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + // Expression stack must be empty before entering the VM if an + // exception happened. + __ empty_expression_stack(); + + // Load exception object. + // Thread will be loaded to R3_ARG1. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), R17_tos); +#ifdef ASSERT + // Above call must not return here since exception pending. + __ should_not_reach_here(); +#endif + return entry; +} + +address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { + address entry = __ pc(); + //__ untested("generate_exception_handler_common"); + Register Rexception = R17_tos; + + // Expression stack must be empty before entering the VM if an exception happened. + __ empty_expression_stack(); + + __ load_const_optimized(R4_ARG2, (address) name, R11_scratch1); + if (pass_oop) { + __ mr(R5_ARG3, Rexception); + __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), false); + } else { + __ load_const_optimized(R5_ARG3, (address) message, R11_scratch1); + __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), false); + } + + // Throw exception. + __ mr(R3_ARG1, Rexception); + __ load_const_optimized(R11_scratch1, Interpreter::throw_exception_entry(), R12_scratch2); + __ mtctr(R11_scratch1); + __ bctr(); + + return entry; +} + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + __ unimplemented("generate_continuation_for"); + return entry; +} + +// This entry is returned to when a call returns to the interpreter. +// When we arrive here, we expect that the callee stack frame is already popped. +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + // Move the value out of the return register back to the TOS cache of current frame. + switch (state) { + case ltos: + case btos: + case ctos: + case stos: + case atos: + case itos: __ mr(R17_tos, R3_RET); break; // RET -> TOS cache + case ftos: + case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET + case vtos: break; // Nothing to do, this was a void return. + default : ShouldNotReachHere(); + } + + __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp. + __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); + __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); + + // Compiled code destroys templateTableBase, reload. + __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R12_scratch2); + + if (state == atos) { + __ profile_return_type(R3_RET, R11_scratch1, R12_scratch2); + } + + const Register cache = R11_scratch1; + const Register size = R12_scratch2; + __ get_cache_and_index_at_bcp(cache, 1, index_size); + + // Get least significant byte of 64 bit value: +#if defined(VM_LITTLE_ENDIAN) + __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()), cache); +#else + __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache); +#endif + __ sldi(size, size, Interpreter::logStackElementSize); + __ add(R15_esp, R15_esp, size); + __ dispatch_next(state, step); + return entry; +} + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { + address entry = __ pc(); + // If state != vtos, we're returning from a native method, which put it's result + // into the result register. So move the value out of the return register back + // to the TOS cache of current frame. + + switch (state) { + case ltos: + case btos: + case ctos: + case stos: + case atos: + case itos: __ mr(R17_tos, R3_RET); break; // GR_RET -> TOS cache + case ftos: + case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET + case vtos: break; // Nothing to do, this was a void return. + default : ShouldNotReachHere(); + } + + // Load LcpoolCache @@@ should be already set! + __ get_constant_pool_cache(R27_constPoolCache); + + // Handle a pending exception, fall through if none. + __ check_and_forward_exception(R11_scratch1, R12_scratch2); + + // Start executing bytecodes. + __ dispatch_next(state, step); + + return entry; +} + +// A result handler converts the native result into java format. +// Use the shared code between c++ and template interpreter. +address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { + return AbstractInterpreterGenerator::generate_result_handler_for(type); +} + +address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { + address entry = __ pc(); + + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); + + return entry; +} + +// Helpers for commoning out cases in the various type of method entries. + +// Increment invocation count & check for overflow. +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test. +// +void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { + // Note: In tiered we increment either counters in method or in MDO depending if we're profiling or not. + Register Rscratch1 = R11_scratch1; + Register Rscratch2 = R12_scratch2; + Register R3_counters = R3_ARG1; + Label done; + + if (TieredCompilation) { + const int increment = InvocationCounter::count_increment; + const int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; + Label no_mdo; + if (ProfileInterpreter) { + const Register Rmdo = Rscratch1; + // If no method data exists, go to profile_continue. + __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method); + __ cmpdi(CCR0, Rmdo, 0); + __ beq(CCR0, no_mdo); + + // Increment backedge counter in the MDO. + const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + __ lwz(Rscratch2, mdo_bc_offs, Rmdo); + __ addi(Rscratch2, Rscratch2, increment); + __ stw(Rscratch2, mdo_bc_offs, Rmdo); + __ load_const_optimized(Rscratch1, mask, R0); + __ and_(Rscratch1, Rscratch2, Rscratch1); + __ bne(CCR0, done); + __ b(*overflow); + } + + // Increment counter in MethodCounters*. + const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + __ bind(no_mdo); + __ get_method_counters(R19_method, R3_counters, done); + __ lwz(Rscratch2, mo_bc_offs, R3_counters); + __ addi(Rscratch2, Rscratch2, increment); + __ stw(Rscratch2, mo_bc_offs, R3_counters); + __ load_const_optimized(Rscratch1, mask, R0); + __ and_(Rscratch1, Rscratch2, Rscratch1); + __ beq(CCR0, *overflow); + + __ bind(done); + + } else { + + // Update standard invocation counters. + Register Rsum_ivc_bec = R4_ARG2; + __ get_method_counters(R19_method, R3_counters, done); + __ increment_invocation_counter(R3_counters, Rsum_ivc_bec, R12_scratch2); + // Increment interpreter invocation counter. + if (ProfileInterpreter) { // %%% Merge this into methodDataOop. + __ lwz(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters); + __ addi(R12_scratch2, R12_scratch2, 1); + __ stw(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters); + } + // Check if we must create a method data obj. + if (ProfileInterpreter && profile_method != NULL) { + const Register profile_limit = Rscratch1; + int pl_offs = __ load_const_optimized(profile_limit, &InvocationCounter::InterpreterProfileLimit, R0, true); + __ lwz(profile_limit, pl_offs, profile_limit); + // Test to see if we should create a method data oop. + __ cmpw(CCR0, Rsum_ivc_bec, profile_limit); + __ blt(CCR0, *profile_method_continue); + // If no method data exists, go to profile_method. + __ test_method_data_pointer(*profile_method); + } + // Finally check for counter overflow. + if (overflow) { + const Register invocation_limit = Rscratch1; + int il_offs = __ load_const_optimized(invocation_limit, &InvocationCounter::InterpreterInvocationLimit, R0, true); + __ lwz(invocation_limit, il_offs, invocation_limit); + assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "unexpected field size"); + __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit); + __ bge(CCR0, *overflow); + } + + __ bind(done); + } +} + +// Generate code to initiate compilation on invocation counter overflow. +void TemplateInterpreterGenerator::generate_counter_overflow(Label& continue_entry) { + // Generate code to initiate compilation on the counter overflow. + + // InterpreterRuntime::frequency_counter_overflow takes one arguments, + // which indicates if the counter overflow occurs at a backwards branch (NULL bcp) + // We pass zero in. + // The call returns the address of the verified entry point for the method or NULL + // if the compilation did not complete (either went background or bailed out). + // + // Unlike the C++ interpreter above: Check exceptions! + // Assumption: Caller must set the flag "do_not_unlock_if_sychronized" if the monitor of a sync'ed + // method has not yet been created. Thus, no unlocking of a non-existing monitor can occur. + + __ li(R4_ARG2, 0); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true); + + // Returns verified_entry_point or NULL. + // We ignore it in any case. + __ b(continue_entry); +} + +void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rmem_frame_size, Register Rscratch1) { + assert_different_registers(Rmem_frame_size, Rscratch1); + __ generate_stack_overflow_check_with_compare_and_throw(Rmem_frame_size, Rscratch1); +} + +void TemplateInterpreterGenerator::unlock_method(bool check_exceptions) { + __ unlock_object(R26_monitor, check_exceptions); +} + +// Lock the current method, interpreter register window must be set up! +void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded) { + const Register Robj_to_lock = Rscratch2; + + { + if (!flags_preloaded) { + __ lwz(Rflags, method_(access_flags)); + } + +#ifdef ASSERT + // Check if methods needs synchronization. + { + Label Lok; + __ testbitdi(CCR0, R0, Rflags, JVM_ACC_SYNCHRONIZED_BIT); + __ btrue(CCR0,Lok); + __ stop("method doesn't need synchronization"); + __ bind(Lok); + } +#endif // ASSERT + } + + // Get synchronization object to Rscratch2. + { + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + Label Lstatic; + Label Ldone; + + __ testbitdi(CCR0, R0, Rflags, JVM_ACC_STATIC_BIT); + __ btrue(CCR0, Lstatic); + + // Non-static case: load receiver obj from stack and we're done. + __ ld(Robj_to_lock, R18_locals); + __ b(Ldone); + + __ bind(Lstatic); // Static case: Lock the java mirror + __ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method); + __ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock); + __ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock); + __ ld(Robj_to_lock, mirror_offset, Robj_to_lock); + + __ bind(Ldone); + __ verify_oop(Robj_to_lock); + } + + // Got the oop to lock => execute! + __ add_monitor_to_stack(true, Rscratch1, R0); + + __ std(Robj_to_lock, BasicObjectLock::obj_offset_in_bytes(), R26_monitor); + __ lock_object(R26_monitor, Robj_to_lock); +} + +// Generate a fixed interpreter frame for pure interpreter +// and I2N native transition frames. +// +// Before (stack grows downwards): +// +// | ... | +// |------------- | +// | java arg0 | +// | ... | +// | java argn | +// | | <- R15_esp +// | | +// |--------------| +// | abi_112 | +// | | <- R1_SP +// |==============| +// +// +// After: +// +// | ... | +// | java arg0 |<- R18_locals +// | ... | +// | java argn | +// |--------------| +// | | +// | java locals | +// | | +// |--------------| +// | abi_48 | +// |==============| +// | | +// | istate | +// | | +// |--------------| +// | monitor |<- R26_monitor +// |--------------| +// | |<- R15_esp +// | expression | +// | stack | +// | | +// |--------------| +// | | +// | abi_112 |<- R1_SP +// |==============| +// +// The top most frame needs an abi space of 112 bytes. This space is needed, +// since we call to c. The c function may spill their arguments to the caller +// frame. When we call to java, we don't need these spill slots. In order to save +// space on the stack, we resize the caller. However, java local reside in +// the caller frame and the frame has to be increased. The frame_size for the +// current frame was calculated based on max_stack as size for the expression +// stack. At the call, just a part of the expression stack might be used. +// We don't want to waste this space and cut the frame back accordingly. +// The resulting amount for resizing is calculated as follows: +// resize = (number_of_locals - number_of_arguments) * slot_size +// + (R1_SP - R15_esp) + 48 +// +// The size for the callee frame is calculated: +// framesize = 112 + max_stack + monitor + state_size +// +// maxstack: Max number of slots on the expression stack, loaded from the method. +// monitor: We statically reserve room for one monitor object. +// state_size: We save the current state of the interpreter to this area. +// +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals) { + Register parent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes. + top_frame_size = R7_ARG5, + Rconst_method = R8_ARG6; + + assert_different_registers(Rsize_of_parameters, Rsize_of_locals, parent_frame_resize, top_frame_size); + + __ ld(Rconst_method, method_(const)); + __ lhz(Rsize_of_parameters /* number of params */, + in_bytes(ConstMethod::size_of_parameters_offset()), Rconst_method); + if (native_call) { + // If we're calling a native method, we reserve space for the worst-case signature + // handler varargs vector, which is max(Argument::n_register_parameters, parameter_count+2). + // We add two slots to the parameter_count, one for the jni + // environment and one for a possible native mirror. + Label skip_native_calculate_max_stack; + __ addi(top_frame_size, Rsize_of_parameters, 2); + __ cmpwi(CCR0, top_frame_size, Argument::n_register_parameters); + __ bge(CCR0, skip_native_calculate_max_stack); + __ li(top_frame_size, Argument::n_register_parameters); + __ bind(skip_native_calculate_max_stack); + __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize); + __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize); + __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize! + assert(Rsize_of_locals == noreg, "Rsize_of_locals not initialized"); // Only relevant value is Rsize_of_parameters. + } else { + __ lhz(Rsize_of_locals /* number of params */, in_bytes(ConstMethod::size_of_locals_offset()), Rconst_method); + __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize); + __ sldi(Rsize_of_locals, Rsize_of_locals, Interpreter::logStackElementSize); + __ lhz(top_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method); + __ sub(R11_scratch1, Rsize_of_locals, Rsize_of_parameters); // >=0 + __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize! + __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize); + __ add(parent_frame_resize, parent_frame_resize, R11_scratch1); + } + + // Compute top frame size. + __ addi(top_frame_size, top_frame_size, frame::abi_reg_args_size + frame::ijava_state_size); + + // Cut back area between esp and max_stack. + __ addi(parent_frame_resize, parent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize); + + __ round_to(top_frame_size, frame::alignment_in_bytes); + __ round_to(parent_frame_resize, frame::alignment_in_bytes); + // parent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size. + // Enlarge by locals-parameters (not in case of native_call), shrink by ESP-SP-ABI48. + + { + // -------------------------------------------------------------------------- + // Stack overflow check + + Label cont; + __ add(R11_scratch1, parent_frame_resize, top_frame_size); + generate_stack_overflow_check(R11_scratch1, R12_scratch2); + } + + // Set up interpreter state registers. + + __ add(R18_locals, R15_esp, Rsize_of_parameters); + __ ld(R27_constPoolCache, in_bytes(ConstMethod::constants_offset()), Rconst_method); + __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), R27_constPoolCache); + + // Set method data pointer. + if (ProfileInterpreter) { + Label zero_continue; + __ ld(R28_mdx, method_(method_data)); + __ cmpdi(CCR0, R28_mdx, 0); + __ beq(CCR0, zero_continue); + __ addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset())); + __ bind(zero_continue); + } + + if (native_call) { + __ li(R14_bcp, 0); // Must initialize. + } else { + __ add(R14_bcp, in_bytes(ConstMethod::codes_offset()), Rconst_method); + } + + // Resize parent frame. + __ mflr(R12_scratch2); + __ neg(parent_frame_resize, parent_frame_resize); + __ resize_frame(parent_frame_resize, R11_scratch1); + __ std(R12_scratch2, _abi(lr), R1_SP); + + __ addi(R26_monitor, R1_SP, - frame::ijava_state_size); + __ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize); + + // Store values. + // R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls + // in InterpreterMacroAssembler::call_from_interpreter. + __ std(R19_method, _ijava_state_neg(method), R1_SP); + __ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP); + __ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP); + __ std(R18_locals, _ijava_state_neg(locals), R1_SP); + + // Note: esp, bcp, monitor, mdx live in registers. Hence, the correct version can only + // be found in the frame after save_interpreter_state is done. This is always true + // for non-top frames. But when a signal occurs, dumping the top frame can go wrong, + // because e.g. frame::interpreter_frame_bcp() will not access the correct value + // (Enhanced Stack Trace). + // The signal handler does not save the interpreter state into the frame. + __ li(R0, 0); +#ifdef ASSERT + // Fill remaining slots with constants. + __ load_const_optimized(R11_scratch1, 0x5afe); + __ load_const_optimized(R12_scratch2, 0xdead); +#endif + // We have to initialize some frame slots for native calls (accessed by GC). + if (native_call) { + __ std(R26_monitor, _ijava_state_neg(monitors), R1_SP); + __ std(R14_bcp, _ijava_state_neg(bcp), R1_SP); + if (ProfileInterpreter) { __ std(R28_mdx, _ijava_state_neg(mdx), R1_SP); } + } +#ifdef ASSERT + else { + __ std(R12_scratch2, _ijava_state_neg(monitors), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(bcp), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(mdx), R1_SP); + } + __ std(R11_scratch1, _ijava_state_neg(ijava_reserved), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(esp), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(lresult), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(fresult), R1_SP); +#endif + __ subf(R12_scratch2, top_frame_size, R1_SP); + __ std(R0, _ijava_state_neg(oop_tmp), R1_SP); + __ std(R12_scratch2, _ijava_state_neg(top_frame_sp), R1_SP); + + // Push top frame. + __ push_frame(top_frame_size, R11_scratch1); +} + +// End of helpers + +address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { + if (!TemplateInterpreter::math_entry_available(kind)) { + NOT_PRODUCT(__ should_not_reach_here();) + return NULL; + } + + address entry = __ pc(); + + __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp); + + // Pop c2i arguments (if any) off when we return. +#ifdef ASSERT + __ ld(R9_ARG7, 0, R1_SP); + __ ld(R10_ARG8, 0, R21_sender_SP); + __ cmpd(CCR0, R9_ARG7, R10_ARG8); + __ asm_assert_eq("backlink", 0x545); +#endif // ASSERT + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + + if (kind == Interpreter::java_lang_math_sqrt) { + __ fsqrt(F1_RET, F1_RET); + } else if (kind == Interpreter::java_lang_math_abs) { + __ fabs(F1_RET, F1_RET); + } else { + ShouldNotReachHere(); + } + + // And we're done. + __ blr(); + + __ flush(); + + return entry; +} + +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the +// native method than the typical interpreter frame setup. +// +// On entry: +// R19_method - method +// R16_thread - JavaThread* +// R15_esp - intptr_t* sender tos +// +// abstract stack (grows up) +// [ IJava (caller of JNI callee) ] <-- ASP +// ... +address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { + + address entry = __ pc(); + + const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // ----------------------------------------------------------------------------- + // Allocate a new frame that represents the native callee (i2n frame). + // This is not a full-blown interpreter frame, but in particular, the + // following registers are valid after this: + // - R19_method + // - R18_local (points to start of argumuments to native function) + // + // abstract stack (grows up) + // [ IJava (caller of JNI callee) ] <-- ASP + // ... + + const Register signature_handler_fd = R11_scratch1; + const Register pending_exception = R0; + const Register result_handler_addr = R31; + const Register native_method_fd = R11_scratch1; + const Register access_flags = R22_tmp2; + const Register active_handles = R11_scratch1; // R26_monitor saved to state. + const Register sync_state = R12_scratch2; + const Register sync_state_addr = sync_state; // Address is dead after use. + const Register suspend_flags = R11_scratch1; + + //============================================================================= + // Allocate new frame and initialize interpreter state. + + Label exception_return; + Label exception_return_sync_check; + Label stack_overflow_return; + + // Generate new interpreter state and jump to stack_overflow_return in case of + // a stack overflow. + //generate_compute_interpreter_state(stack_overflow_return); + + Register size_of_parameters = R22_tmp2; + + generate_fixed_frame(true, size_of_parameters, noreg /* unused */); + + //============================================================================= + // Increment invocation counter. On overflow, entry to JNI method + // will be compiled. + Label invocation_counter_overflow, continue_after_compile; + if (inc_counter) { + if (synchronized) { + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + // This flag has two effects, one is to force an unwind in the topmost + // interpreter frame and not perform an unlock while doing so. + __ li(R0, 1); + __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); + } + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + + BIND(continue_after_compile); + // Reset the _do_not_unlock_if_synchronized flag. + if (synchronized) { + __ li(R0, 0); + __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); + } + } + + // access_flags = method->access_flags(); + // Load access flags. + assert(access_flags->is_nonvolatile(), + "access_flags must be in a non-volatile register"); + // Type check. + assert(4 == sizeof(AccessFlags), "unexpected field size"); + __ lwz(access_flags, method_(access_flags)); + + // We don't want to reload R19_method and access_flags after calls + // to some helper functions. + assert(R19_method->is_nonvolatile(), + "R19_method must be a non-volatile register"); + + // Check for synchronized methods. Must happen AFTER invocation counter + // check, so method is not locked if counter overflows. + + if (synchronized) { + lock_method(access_flags, R11_scratch1, R12_scratch2, true); + + // Update monitor in state. + __ ld(R11_scratch1, 0, R1_SP); + __ std(R26_monitor, _ijava_state_neg(monitors), R11_scratch1); + } + + // jvmti/jvmpi support + __ notify_method_entry(); + + //============================================================================= + // Get and call the signature handler. + + __ ld(signature_handler_fd, method_(signature_handler)); + Label call_signature_handler; + + __ cmpdi(CCR0, signature_handler_fd, 0); + __ bne(CCR0, call_signature_handler); + + // Method has never been called. Either generate a specialized + // handler or point to the slow one. + // + // Pass parameter 'false' to avoid exception check in call_VM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false); + + // Check for an exception while looking up the target method. If we + // incurred one, bail. + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ bne(CCR0, exception_return_sync_check); // Has pending exception. + + // Reload signature handler, it may have been created/assigned in the meanwhile. + __ ld(signature_handler_fd, method_(signature_handler)); + __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below). + + BIND(call_signature_handler); + + // Before we call the signature handler we push a new frame to + // protect the interpreter frame volatile registers when we return + // from jni but before we can get back to Java. + + // First set the frame anchor while the SP/FP registers are + // convenient and the slow signature handler can use this same frame + // anchor. + + // We have a TOP_IJAVA_FRAME here, which belongs to us. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); + + // Now the interpreter frame (and its call chain) have been + // invalidated and flushed. We are now protected against eager + // being enabled in native code. Even if it goes eager the + // registers will be reloaded as clean and we will invalidate after + // the call so no spurious flush should be possible. + + // Call signature handler and pass locals address. + // + // Our signature handlers copy required arguments to the C stack + // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13. + __ mr(R3_ARG1, R18_locals); +#if !defined(ABI_ELFv2) + __ ld(signature_handler_fd, 0, signature_handler_fd); +#endif + + __ call_stub(signature_handler_fd); + + // Remove the register parameter varargs slots we allocated in + // compute_interpreter_state. SP+16 ends up pointing to the ABI + // outgoing argument area. + // + // Not needed on PPC64. + //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord); + + assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register"); + // Save across call to native method. + __ mr(result_handler_addr, R3_RET); + + __ isync(); // Acquire signature handler before trying to fetch the native entry point and klass mirror. + + // Set up fixed parameters and call the native method. + // If the method is static, get mirror into R4_ARG2. + { + Label method_is_not_static; + // Access_flags is non-volatile and still, no need to restore it. + + // Restore access flags. + __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); + __ bfalse(CCR0, method_is_not_static); + + // constants = method->constants(); + __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); + __ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1); + // pool_holder = method->constants()->pool_holder(); + __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), + R11_scratch1/*constants*/); + + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + + // mirror = pool_holder->klass_part()->java_mirror(); + __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/); + // state->_native_mirror = mirror; + + __ ld(R11_scratch1, 0, R1_SP); + __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); + // R4_ARG2 = &state->_oop_temp; + __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp)); + BIND(method_is_not_static); + } + + // At this point, arguments have been copied off the stack into + // their JNI positions. Oops are boxed in-place on the stack, with + // handles copied to arguments. The result handler address is in a + // register. + + // Pass JNIEnv address as first parameter. + __ addir(R3_ARG1, thread_(jni_environment)); + + // Load the native_method entry before we change the thread state. + __ ld(native_method_fd, method_(native_function)); + + //============================================================================= + // Transition from _thread_in_Java to _thread_in_native. As soon as + // we make this change the safepoint code needs to be certain that + // the last Java frame we established is good. The pc in that frame + // just needs to be near here not an actual return address. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0, _thread_in_native); + __ release(); + + // TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + + if (UseMembar) { + __ fence(); + } + + //============================================================================= + // Call the native method. Argument registers must not have been + // overwritten since "__ call_stub(signature_handler);" (except for + // ARG1 and ARG2 for static methods). + __ call_c(native_method_fd); + + __ li(R0, 0); + __ ld(R11_scratch1, 0, R1_SP); + __ std(R3_RET, _ijava_state_neg(lresult), R11_scratch1); + __ stfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1); + __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); // reset + + // Note: C++ interpreter needs the following here: + // The frame_manager_lr field, which we use for setting the last + // java frame, gets overwritten by the signature handler. Restore + // it now. + //__ get_PC_trash_LR(R11_scratch1); + //__ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + // Because of GC R19_method may no longer be valid. + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after + // blocking. + + //============================================================================= + // Switch thread to "native transition" state before reading the + // synchronization state. This additional state is necessary + // because reading and testing the synchronization state is not + // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, + // in _thread_in_native state, loads _not_synchronized and is + // preempted. VM thread changes sync state to synchronizing and + // suspends threads for GC. Thread A is resumed to finish this + // native method, but doesn't block here since it didn't see any + // synchronization in progress, and escapes. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_native_trans); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + // Write serialization page so that the VM thread can do a pseudo remote + // membar. We use the current thread pointer to calculate a thread + // specific offset to write to within the page. This minimizes bus + // traffic due to cache line collision. + else { + __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2); + } + + // Now before we return to java we must look for a current safepoint + // (a new safepoint can not start since we entered native_trans). + // We must check here because a current safepoint could be modifying + // the callers registers right this moment. + + // Acquire isn't strictly necessary here because of the fence, but + // sync_state is declared to be volatile, so we do it anyway + // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path). + int sync_state_offs = __ load_const_optimized(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/R0, true); + + // TODO PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); + __ lwz(sync_state, sync_state_offs, sync_state_addr); + + // TODO PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); + __ lwz(suspend_flags, thread_(suspend_flags)); + + Label sync_check_done; + Label do_safepoint; + // No synchronization in progress nor yet synchronized. + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + // Not suspended. + __ cmpwi(CCR1, suspend_flags, 0); + + __ bne(CCR0, do_safepoint); + __ beq(CCR1, sync_check_done); + __ bind(do_safepoint); + __ isync(); + // Block. We do the call directly and leave the current + // last_Java_frame setup undisturbed. We must save any possible + // native result across the call. No oop is present. + + __ mr(R3_ARG1, R16_thread); +#if defined(ABI_ELFv2) + __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + relocInfo::none); +#else + __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans), + relocInfo::none); +#endif + + __ bind(sync_check_done); + + //============================================================================= + // <<<<<< Back in Interpreter Frame >>>>> + + // We are in thread_in_native_trans here and back in the normal + // interpreter frame. We don't have to do anything special about + // safepoints and we can switch to Java mode anytime we are ready. + + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. For + // native methods it assumes that the non-FPU/non-void result is + // saved in _native_lresult and a FPU result in _native_fresult. If + // this changes then the interpreter_frame_result implementation + // will need to be updated too. + + // On PPC64, we have stored the result directly after the native call. + + //============================================================================= + // Back in Java + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_Java); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + + __ reset_last_Java_frame(); + + // Jvmdi/jvmpi support. Whether we've got an exception pending or + // not, and whether unlocking throws an exception or not, we notify + // on native method exit. If we do have an exception, we'll end up + // in the caller's context to handle it, so if we don't do the + // notify here, we'll drop it on the floor. + __ notify_method_exit(true/*native method*/, + ilgl /*illegal state (not used for native methods)*/, + InterpreterMacroAssembler::NotifyJVMTI, + false /*check_exceptions*/); + + //============================================================================= + // Handle exceptions + + if (synchronized) { + // Don't check for exceptions since we're still in the i2n frame. Do that + // manually afterwards. + unlock_method(false); + } + + // Reset active handles after returning from native. + // thread->active_handles()->clear(); + __ ld(active_handles, thread_(active_handles)); + // TODO PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); + __ li(R0, 0); + __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles); + + Label exception_return_sync_check_already_unlocked; + __ ld(R0/*pending_exception*/, thread_(pending_exception)); + __ cmpdi(CCR0, R0/*pending_exception*/, 0); + __ bne(CCR0, exception_return_sync_check_already_unlocked); + + //----------------------------------------------------------------------------- + // No exception pending. + + // Move native method result back into proper registers and return. + // Invoke result handler (may unbox/promote). + __ ld(R11_scratch1, 0, R1_SP); + __ ld(R3_RET, _ijava_state_neg(lresult), R11_scratch1); + __ lfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1); + __ call_stub(result_handler_addr); + + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2); + + // Must use the return pc which was loaded from the caller's frame + // as the VM uses return-pc-patching for deoptimization. + __ mtlr(R0); + __ blr(); + + //----------------------------------------------------------------------------- + // An exception is pending. We call into the runtime only if the + // caller was not interpreted. If it was interpreted the + // interpreter will do the correct thing. If it isn't interpreted + // (call stub/compiled code) we will change our return and continue. + + BIND(exception_return_sync_check); + + if (synchronized) { + // Don't check for exceptions since we're still in the i2n frame. Do that + // manually afterwards. + unlock_method(false); + } + BIND(exception_return_sync_check_already_unlocked); + + const Register return_pc = R31; + + __ ld(return_pc, 0, R1_SP); + __ ld(return_pc, _abi(lr), return_pc); + + // Get the address of the exception handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + R16_thread, + return_pc /* return pc */); + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, noreg, R11_scratch1, R12_scratch2); + + // Load the PC of the the exception handler into LR. + __ mtlr(R3_RET); + + // Load exception into R3_ARG1 and clear pending exception in thread. + __ ld(R3_ARG1/*exception*/, thread_(pending_exception)); + __ li(R4_ARG2, 0); + __ std(R4_ARG2, thread_(pending_exception)); + + // Load the original return pc into R4_ARG2. + __ mr(R4_ARG2/*issuing_pc*/, return_pc); + + // Return to exception handler. + __ blr(); + + //============================================================================= + // Counter overflow. + + if (inc_counter) { + // Handle invocation counter overflow. + __ bind(invocation_counter_overflow); + + generate_counter_overflow(continue_after_compile); + } + + return entry; +} + +// Generic interpreted method entry to (asm) interpreter. +// +address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + address entry = __ pc(); + // Generate the code to allocate the interpreter stack frame. + Register Rsize_of_parameters = R4_ARG2, // Written by generate_fixed_frame. + Rsize_of_locals = R5_ARG3; // Written by generate_fixed_frame. + + generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals); + + // -------------------------------------------------------------------------- + // Zero out non-parameter locals. + // Note: *Always* zero out non-parameter locals as Sparc does. It's not + // worth to ask the flag, just do it. + Register Rslot_addr = R6_ARG4, + Rnum = R7_ARG5; + Label Lno_locals, Lzero_loop; + + // Set up the zeroing loop. + __ subf(Rnum, Rsize_of_parameters, Rsize_of_locals); + __ subf(Rslot_addr, Rsize_of_parameters, R18_locals); + __ srdi_(Rnum, Rnum, Interpreter::logStackElementSize); + __ beq(CCR0, Lno_locals); + __ li(R0, 0); + __ mtctr(Rnum); + + // The zero locals loop. + __ bind(Lzero_loop); + __ std(R0, 0, Rslot_addr); + __ addi(Rslot_addr, Rslot_addr, -Interpreter::stackElementSize); + __ bdnz(Lzero_loop); + + __ bind(Lno_locals); + + // -------------------------------------------------------------------------- + // Counter increment and overflow check. + Label invocation_counter_overflow, + profile_method, + profile_method_continue; + if (inc_counter || ProfileInterpreter) { + + Register Rdo_not_unlock_if_synchronized_addr = R11_scratch1; + if (synchronized) { + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + // This flag has two effects, one is to force an unwind in the topmost + // interpreter frame and not perform an unlock while doing so. + __ li(R0, 1); + __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); + } + + // Argument and return type profiling. + __ profile_parameters_type(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4); + + // Increment invocation counter and check for overflow. + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); + } + + __ bind(profile_method_continue); + + // Reset the _do_not_unlock_if_synchronized flag. + if (synchronized) { + __ li(R0, 0); + __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); + } + } + + // -------------------------------------------------------------------------- + // Locking of synchronized methods. Must happen AFTER invocation_counter + // check and stack overflow check, so method is not locked if overflows. + if (synchronized) { + lock_method(R3_ARG1, R4_ARG2, R5_ARG3); + } +#ifdef ASSERT + else { + Label Lok; + __ lwz(R0, in_bytes(Method::access_flags_offset()), R19_method); + __ andi_(R0, R0, JVM_ACC_SYNCHRONIZED); + __ asm_assert_eq("method needs synchronization", 0x8521); + __ bind(Lok); + } +#endif // ASSERT + + __ verify_thread(); + + // -------------------------------------------------------------------------- + // JVMTI support + __ notify_method_entry(); + + // -------------------------------------------------------------------------- + // Start executing instructions. + __ dispatch_next(vtos); + + // -------------------------------------------------------------------------- + // Out of line counter overflow and MDO creation code. + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter. + __ bind(profile_method); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ b(profile_method_continue); + } + + if (inc_counter) { + // Handle invocation counter overflow. + __ bind(invocation_counter_overflow); + generate_counter_overflow(profile_method_continue); + } + return entry; +} + +// CRC32 Intrinsics. +// +// Contract on scratch and work registers. +// ======================================= +// +// On ppc, the register set {R2..R12} is available in the interpreter as scratch/work registers. +// You should, however, keep in mind that {R3_ARG1..R10_ARG8} is the C-ABI argument register set. +// You can't rely on these registers across calls. +// +// The generators for CRC32_update and for CRC32_updateBytes use the +// scratch/work register set internally, passing the work registers +// as arguments to the MacroAssembler emitters as required. +// +// R3_ARG1..R6_ARG4 are preset to hold the incoming java arguments. +// Their contents is not constant but may change according to the requirements +// of the emitted code. +// +// All other registers from the scratch/work register set are used "internally" +// and contain garbage (i.e. unpredictable values) once blr() is reached. +// Basically, only R3_RET contains a defined value which is the function result. +// +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address start = __ pc(); // Remember stub start address (is rtn value). + Label slow_path; + + // Safepoint check + const Register sync_state = R11_scratch1; + int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); + __ lwz(sync_state, sync_state_offs, sync_state); + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + __ bne(CCR0, slow_path); + + // We don't generate local frame and don't align stack because + // we not even call stub code (we generate the code inline) + // and there is no safepoint on this path. + + // Load java parameters. + // R15_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = R15_esp; + const Register crc = R3_ARG1; // crc value + const Register data = R4_ARG2; // address of java byte value (kernel_crc32 needs address) + const Register dataLen = R5_ARG3; // source data len (1 byte). Not used because calling the single-byte emitter. + const Register table = R6_ARG4; // address of crc32 table + const Register tmp = dataLen; // Reuse unused len register to show we don't actually need a separate tmp here. + + BLOCK_COMMENT("CRC32_update {"); + + // Arguments are reversed on java expression stack +#ifdef VM_LITTLE_ENDIAN + __ addi(data, argP, 0+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. + // Being passed as an int, the single byte is at offset +0. +#else + __ addi(data, argP, 3+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. + // Being passed from java as an int, the single byte is at offset +3. +#endif + __ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. + + StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); + __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp); + + // Restore caller sp for c2i case and return. + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + __ blr(); + + // Generate a vanilla native entry as the slow path. + BLOCK_COMMENT("} CRC32_update"); + BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); + return start; + } + + return NULL; +} + +// CRC32 Intrinsics. +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes( int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address start = __ pc(); // Remember stub start address (is rtn value). + Label slow_path; + + // Safepoint check + const Register sync_state = R11_scratch1; + int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); + __ lwz(sync_state, sync_state_offs, sync_state); + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + __ bne(CCR0, slow_path); + + // We don't generate local frame and don't align stack because + // we not even call stub code (we generate the code inline) + // and there is no safepoint on this path. + + // Load parameters. + // Z_esp is callers operand stack pointer, i.e. it points to the parameters. + const Register argP = R15_esp; + const Register crc = R3_ARG1; // crc value + const Register data = R4_ARG2; // address of java byte array + const Register dataLen = R5_ARG3; // source data len + const Register table = R6_ARG4; // address of crc32 table + + const Register t0 = R9; // scratch registers for crc calculation + const Register t1 = R10; + const Register t2 = R11; + const Register t3 = R12; + + const Register tc0 = R2; // registers to hold pre-calculated column addresses + const Register tc1 = R7; + const Register tc2 = R8; + const Register tc3 = table; // table address is reconstructed at the end of kernel_crc32_* emitters + + const Register tmp = t0; // Only used very locally to calculate byte buffer address. + + // Arguments are reversed on java expression stack. + // Calculate address of start element. + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct". + BLOCK_COMMENT("CRC32_updateByteBuffer {"); + // crc @ (SP + 5W) (32bit) + // buf @ (SP + 3W) (64bit ptr to long array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + __ ld( data, 3*wordSize, argP); // start of byte buffer + __ lwa( tmp, 2*wordSize, argP); // byte buffer offset + __ lwa( dataLen, 1*wordSize, argP); // #bytes to process + __ lwz( crc, 5*wordSize, argP); // current crc state + __ add( data, data, tmp); // Add byte buffer offset. + } else { // Used for "updateBytes update". + BLOCK_COMMENT("CRC32_updateBytes {"); + // crc @ (SP + 4W) (32bit) + // buf @ (SP + 3W) (64bit ptr to byte array) + // off @ (SP + 2W) (32bit) + // dataLen @ (SP + 1W) (32bit) + // data = buf + off + base_offset + __ ld( data, 3*wordSize, argP); // start of byte buffer + __ lwa( tmp, 2*wordSize, argP); // byte buffer offset + __ lwa( dataLen, 1*wordSize, argP); // #bytes to process + __ add( data, data, tmp); // add byte buffer offset + __ lwz( crc, 4*wordSize, argP); // current crc state + __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE)); + } + + StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); + + // Performance measurements show the 1word and 2word variants to be almost equivalent, + // with very light advantages for the 1word variant. We chose the 1word variant for + // code compactness. + __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3); + + // Restore caller sp for c2i case and return. + __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. + __ blr(); + + // Generate a vanilla native entry as the slow path. + BLOCK_COMMENT("} CRC32_updateBytes(Buffer)"); + BIND(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); + return start; + } + + return NULL; +} + +// ============================================================================= +// Exceptions + +void TemplateInterpreterGenerator::generate_throw_exception() { + Register Rexception = R17_tos, + Rcontinuation = R3_RET; + + // -------------------------------------------------------------------------- + // Entry point if an method returns with a pending exception (rethrow). + Interpreter::_rethrow_exception_entry = __ pc(); + { + __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp. + __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); + __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); + + // Compiled code destroys templateTableBase, reload. + __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); + } + + // Entry point if a interpreted method throws an exception (throw). + Interpreter::_throw_exception_entry = __ pc(); + { + __ mr(Rexception, R3_RET); + + __ verify_thread(); + __ verify_oop(Rexception); + + // Expression stack must be empty before entering the VM in case of an exception. + __ empty_expression_stack(); + // Find exception handler address and preserve exception oop. + // Call C routine to find handler and jump to it. + __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Rexception); + __ mtctr(Rcontinuation); + // Push exception for exception handler bytecodes. + __ push_ptr(Rexception); + + // Jump to exception handler (may be remove activation entry!). + __ bctr(); + } + + // If the exception is not handled in the current frame the frame is + // removed and the exception is rethrown (i.e. exception + // continuation is _rethrow_exception). + // + // Note: At this point the bci is still the bxi for the instruction + // which caused the exception and the expression stack is + // empty. Thus, for any VM calls at this point, GC will find a legal + // oop map (with empty expression stack). + + // In current activation + // tos: exception + // bcp: exception bcp + + // -------------------------------------------------------------------------- + // JVMTI PopFrame support + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + { + // Set the popframe_processing bit in popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not + // trigger new popframe handling cycles. + __ lwz(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); + __ ori(R11_scratch1, R11_scratch1, JavaThread::popframe_processing_bit); + __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); + + // Empty the expression stack, as in normal exception handling. + __ empty_expression_stack(); + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false); + + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label Lcaller_not_deoptimized; + Register return_pc = R3_ARG1; + __ ld(return_pc, 0, R1_SP); + __ ld(return_pc, _abi(lr), return_pc); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), return_pc); + __ cmpdi(CCR0, R3_RET, 0); + __ bne(CCR0, Lcaller_not_deoptimized); + + // The deoptimized case. + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + __ ld(R4_ARG2, in_bytes(Method::const_offset()), R19_method); + __ lhz(R4_ARG2 /* number of params */, in_bytes(ConstMethod::size_of_parameters_offset()), R4_ARG2); + __ slwi(R4_ARG2, R4_ARG2, Interpreter::logStackElementSize); + __ addi(R5_ARG3, R18_locals, Interpreter::stackElementSize); + __ subf(R5_ARG3, R4_ARG2, R5_ARG3); + // Save these arguments. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), R16_thread, R4_ARG2, R5_ARG3); + + // Inform deoptimization that it is responsible for restoring these arguments. + __ load_const_optimized(R11_scratch1, JavaThread::popframe_force_deopt_reexecution_bit); + __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); + + // Return from the current method into the deoptimization blob. Will eventually + // end up in the deopt interpeter entry, deoptimization prepared everything that + // we will reexecute the call that called us. + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*reload return_pc*/ return_pc, R11_scratch1, R12_scratch2); + __ mtlr(return_pc); + __ blr(); + + // The non-deoptimized case. + __ bind(Lcaller_not_deoptimized); + + // Clear the popframe condition flag. + __ li(R0, 0); + __ stw(R0, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); + + // Get out of the current method and re-execute the call that called us. + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2); + __ restore_interpreter_state(R11_scratch1); + __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); + __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + __ ld(R11_scratch1, 0, R1_SP); + __ std(R28_mdx, _ijava_state_neg(mdx), R11_scratch1); + } +#if INCLUDE_JVMTI + Label L_done; + + __ lbz(R11_scratch1, 0, R14_bcp); + __ cmpwi(CCR0, R11_scratch1, Bytecodes::_invokestatic); + __ bne(CCR0, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + __ ld(R4_ARG2, 0, R18_locals); + __ MacroAssembler::call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp, false); + __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true); + __ cmpdi(CCR0, R4_ARG2, 0); + __ beq(CCR0, L_done); + __ std(R4_ARG2, wordSize, R15_esp); + __ bind(L_done); +#endif // INCLUDE_JVMTI + __ dispatch_next(vtos); + } + // end of JVMTI PopFrame support + + // -------------------------------------------------------------------------- + // Remove activation exception entry. + // This is jumped to if an interpreted method can't handle an exception itself + // (we come from the throw/rethrow exception entry above). We're going to call + // into the VM to find the exception handler in the caller, pop the current + // frame and return the handler we calculated. + Interpreter::_remove_activation_entry = __ pc(); + { + __ pop_ptr(Rexception); + __ verify_thread(); + __ verify_oop(Rexception); + __ std(Rexception, in_bytes(JavaThread::vm_result_offset()), R16_thread); + + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, true); + __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI, false); + + __ get_vm_result(Rexception); + + // We are done with this activation frame; find out where to go next. + // The continuation point will be an exception handler, which expects + // the following registers set up: + // + // RET: exception oop + // ARG2: Issuing PC (see generate_exception_blob()), only used if the caller is compiled. + + Register return_pc = R31; // Needs to survive the runtime call. + __ ld(return_pc, 0, R1_SP); + __ ld(return_pc, _abi(lr), return_pc); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, return_pc); + + // Remove the current activation. + __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2); + + __ mr(R4_ARG2, return_pc); + __ mtlr(R3_RET); + __ mr(R3_RET, Rexception); + __ blr(); + } +} + +// JVMTI ForceEarlyReturn support. +// Returns "in the middle" of a method with a "fake" return value. +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + + Register Rscratch1 = R11_scratch1, + Rscratch2 = R12_scratch2; + + address entry = __ pc(); + __ empty_expression_stack(); + + __ load_earlyret_value(state, Rscratch1); + + __ ld(Rscratch1, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread); + // Clear the earlyret state. + __ li(R0, 0); + __ stw(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rscratch1); + + __ remove_activation(state, false, false); + // Copied from TemplateTable::_return. + // Restoration of lr done by remove_activation. + switch (state) { + case ltos: + case btos: + case ctos: + case stos: + case atos: + case itos: __ mr(R3_RET, R17_tos); break; + case ftos: + case dtos: __ fmr(F1_RET, F15_ftos); break; + case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need + // to get visible before the reference to the object gets stored anywhere. + __ membar(Assembler::StoreStore); break; + default : ShouldNotReachHere(); + } + __ blr(); + + return entry; +} // end of ForceEarlyReturn support + +//----------------------------------------------------------------------------- +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, + address& bep, + address& cep, + address& sep, + address& aep, + address& iep, + address& lep, + address& fep, + address& dep, + address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + + aep = __ pc(); __ push_ptr(); __ b(L); + fep = __ pc(); __ push_f(); __ b(L); + dep = __ pc(); __ push_d(); __ b(L); + lep = __ pc(); __ push_l(); __ b(L); + __ align(32, 12, 24); // align L + bep = cep = sep = + iep = __ pc(); __ push_i(); + vep = __ pc(); + __ bind(L); + generate_and_dispatch(t); +} + +//----------------------------------------------------------------------------- +// Generation of individual instructions + +// helpers for generate_and_dispatch + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // Down here so it can be "virtual". +} + +//----------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + //__ flush_bundle(); + address entry = __ pc(); + + const char *bname = NULL; + uint tsize = 0; + switch(state) { + case ftos: + bname = "trace_code_ftos {"; + tsize = 2; + break; + case btos: + bname = "trace_code_btos {"; + tsize = 2; + break; + case ctos: + bname = "trace_code_ctos {"; + tsize = 2; + break; + case stos: + bname = "trace_code_stos {"; + tsize = 2; + break; + case itos: + bname = "trace_code_itos {"; + tsize = 2; + break; + case ltos: + bname = "trace_code_ltos {"; + tsize = 3; + break; + case atos: + bname = "trace_code_atos {"; + tsize = 2; + break; + case vtos: + // Note: In case of vtos, the topmost of stack value could be a int or doubl + // In case of a double (2 slots) we won't see the 2nd stack value. + // Maybe we simply should print the topmost 3 stack slots to cope with the problem. + bname = "trace_code_vtos {"; + tsize = 2; + + break; + case dtos: + bname = "trace_code_dtos {"; + tsize = 3; + break; + default: + ShouldNotReachHere(); + } + BLOCK_COMMENT(bname); + + // Support short-cut for TraceBytecodesAt. + // Don't call into the VM if we don't want to trace to speed up things. + Label Lskip_vm_call; + if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) { + int offs1 = __ load_const_optimized(R11_scratch1, (address) &TraceBytecodesAt, R0, true); + int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true); + __ ld(R11_scratch1, offs1, R11_scratch1); + __ lwa(R12_scratch2, offs2, R12_scratch2); + __ cmpd(CCR0, R12_scratch2, R11_scratch1); + __ blt(CCR0, Lskip_vm_call); + } + + __ push(state); + // Load 2 topmost expression stack values. + __ ld(R6_ARG4, tsize*Interpreter::stackElementSize, R15_esp); + __ ld(R5_ARG3, Interpreter::stackElementSize, R15_esp); + __ mflr(R31); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), /* unused */ R4_ARG2, R5_ARG3, R6_ARG4, false); + __ mtlr(R31); + __ pop(state); + + if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) { + __ bind(Lskip_vm_call); + } + __ blr(); + BLOCK_COMMENT("} trace_code"); + return entry; +} + +void TemplateInterpreterGenerator::count_bytecode() { + int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeCounter::_counter_value, R12_scratch2, true); + __ lwz(R12_scratch2, offs, R11_scratch1); + __ addi(R12_scratch2, R12_scratch2, 1); + __ stw(R12_scratch2, offs, R11_scratch1); +} + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeHistogram::_counters[t->bytecode()], R12_scratch2, true); + __ lwz(R12_scratch2, offs, R11_scratch1); + __ addi(R12_scratch2, R12_scratch2, 1); + __ stw(R12_scratch2, offs, R11_scratch1); +} + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + const Register addr = R11_scratch1, + tmp = R12_scratch2; + // Get index, shift out old bytecode, bring in new bytecode, and store it. + // _index = (_index >> log2_number_of_codes) | + // (bytecode << log2_number_of_codes); + int offs1 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_index, tmp, true); + __ lwz(tmp, offs1, addr); + __ srwi(tmp, tmp, BytecodePairHistogram::log2_number_of_codes); + __ ori(tmp, tmp, ((int) t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); + __ stw(tmp, offs1, addr); + + // Bump bucket contents. + // _counters[_index] ++; + int offs2 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_counters, R0, true); + __ sldi(tmp, tmp, LogBytesPerInt); + __ add(addr, tmp, addr); + __ lwz(tmp, offs2, addr); + __ addi(tmp, tmp, 1); + __ stw(tmp, offs2, addr); +} + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + + assert(Interpreter::trace_code(t->tos_in()) != NULL, + "entry must have been generated"); + + // Note: we destroy LR here. + __ bl(Interpreter::trace_code(t->tos_in())); +} + +void TemplateInterpreterGenerator::stop_interpreter_at() { + Label L; + int offs1 = __ load_const_optimized(R11_scratch1, (address) &StopInterpreterAt, R0, true); + int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true); + __ ld(R11_scratch1, offs1, R11_scratch1); + __ lwa(R12_scratch2, offs2, R12_scratch2); + __ cmpd(CCR0, R12_scratch2, R11_scratch1); + __ bne(CCR0, L); + __ illtrap(); + __ bind(L); +} + +#endif // !PRODUCT +#endif // !CC_INTERP diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp index 361e637d253..5179d817853 100644 --- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2015 SAP AG. All rights reserved. + * Copyright (c) 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,1389 +24,38 @@ */ #include "precompiled.hpp" -#ifndef CC_INTERP -#include "asm/macroAssembler.inline.hpp" -#include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" -#include "interpreter/interpreterGenerator.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/templateTable.hpp" -#include "oops/arrayOop.hpp" -#include "oops/methodData.hpp" +#include "oops/constMethod.hpp" #include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "prims/jvmtiExport.hpp" -#include "prims/jvmtiThreadState.hpp" -#include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/synchronizer.hpp" -#include "runtime/timer.hpp" -#include "runtime/vframeArray.hpp" #include "utilities/debug.hpp" #include "utilities/macros.hpp" -#undef __ -#define __ _masm-> -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif - -#define BIND(label) __ bind(label); BLOCK_COMMENT(#label ":") - -//----------------------------------------------------------------------------- - -// Actually we should never reach here since we do stack overflow checks before pushing any frame. -address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { - address entry = __ pc(); - __ unimplemented("generate_StackOverflowError_handler"); - return entry; -} - -address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) { - address entry = __ pc(); - __ empty_expression_stack(); - __ load_const_optimized(R4_ARG2, (address) name); - // Index is in R17_tos. - __ mr(R5_ARG3, R17_tos); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException)); - return entry; -} - -#if 0 -// Call special ClassCastException constructor taking object to cast -// and target class as arguments. -address TemplateInterpreterGenerator::generate_ClassCastException_verbose_handler() { - address entry = __ pc(); - - // Expression stack must be empty before entering the VM if an - // exception happened. - __ empty_expression_stack(); - - // Thread will be loaded to R3_ARG1. - // Target class oop is in register R5_ARG3 by convention! - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException_verbose), R17_tos, R5_ARG3); - // Above call must not return here since exception pending. - DEBUG_ONLY(__ should_not_reach_here();) - return entry; -} -#endif - -address TemplateInterpreterGenerator::generate_ClassCastException_handler() { - address entry = __ pc(); - // Expression stack must be empty before entering the VM if an - // exception happened. - __ empty_expression_stack(); - - // Load exception object. - // Thread will be loaded to R3_ARG1. - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), R17_tos); -#ifdef ASSERT - // Above call must not return here since exception pending. - __ should_not_reach_here(); -#endif - return entry; -} - -address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { - address entry = __ pc(); - //__ untested("generate_exception_handler_common"); - Register Rexception = R17_tos; - - // Expression stack must be empty before entering the VM if an exception happened. - __ empty_expression_stack(); - - __ load_const_optimized(R4_ARG2, (address) name, R11_scratch1); - if (pass_oop) { - __ mr(R5_ARG3, Rexception); - __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), false); - } else { - __ load_const_optimized(R5_ARG3, (address) message, R11_scratch1); - __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), false); +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); } - - // Throw exception. - __ mr(R3_ARG1, Rexception); - __ load_const_optimized(R11_scratch1, Interpreter::throw_exception_entry(), R12_scratch2); - __ mtctr(R11_scratch1); - __ bctr(); - - return entry; -} - -address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { - address entry = __ pc(); - __ unimplemented("generate_continuation_for"); - return entry; -} - -// This entry is returned to when a call returns to the interpreter. -// When we arrive here, we expect that the callee stack frame is already popped. -address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { - address entry = __ pc(); - - // Move the value out of the return register back to the TOS cache of current frame. - switch (state) { - case ltos: - case btos: - case ctos: - case stos: - case atos: - case itos: __ mr(R17_tos, R3_RET); break; // RET -> TOS cache - case ftos: - case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET - case vtos: break; // Nothing to do, this was a void return. - default : ShouldNotReachHere(); - } - - __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp. - __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); - __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); - - // Compiled code destroys templateTableBase, reload. - __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R12_scratch2); - - if (state == atos) { - __ profile_return_type(R3_RET, R11_scratch1, R12_scratch2); - } - - const Register cache = R11_scratch1; - const Register size = R12_scratch2; - __ get_cache_and_index_at_bcp(cache, 1, index_size); - - // Get least significant byte of 64 bit value: -#if defined(VM_LITTLE_ENDIAN) - __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()), cache); -#else - __ lbz(size, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache); -#endif - __ sldi(size, size, Interpreter::logStackElementSize); - __ add(R15_esp, R15_esp, size); - __ dispatch_next(state, step); - return entry; -} - -address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { - address entry = __ pc(); - // If state != vtos, we're returning from a native method, which put it's result - // into the result register. So move the value out of the return register back - // to the TOS cache of current frame. - - switch (state) { - case ltos: - case btos: - case ctos: - case stos: - case atos: - case itos: __ mr(R17_tos, R3_RET); break; // GR_RET -> TOS cache - case ftos: - case dtos: __ fmr(F15_ftos, F1_RET); break; // TOS cache -> GR_FRET - case vtos: break; // Nothing to do, this was a void return. - default : ShouldNotReachHere(); - } - - // Load LcpoolCache @@@ should be already set! - __ get_constant_pool_cache(R27_constPoolCache); - - // Handle a pending exception, fall through if none. - __ check_and_forward_exception(R11_scratch1, R12_scratch2); - - // Start executing bytecodes. - __ dispatch_next(state, step); - - return entry; -} - -// A result handler converts the native result into java format. -// Use the shared code between c++ and template interpreter. -address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { - return AbstractInterpreterGenerator::generate_result_handler_for(type); -} - -address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { - address entry = __ pc(); - - __ push(state); - __ call_VM(noreg, runtime_entry); - __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); - - return entry; -} - -// Helpers for commoning out cases in the various type of method entries. - -// Increment invocation count & check for overflow. -// -// Note: checking for negative value instead of overflow -// so we have a 'sticky' overflow test. -// -void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { - // Note: In tiered we increment either counters in method or in MDO depending if we're profiling or not. - Register Rscratch1 = R11_scratch1; - Register Rscratch2 = R12_scratch2; - Register R3_counters = R3_ARG1; - Label done; - - if (TieredCompilation) { - const int increment = InvocationCounter::count_increment; - const int mask = ((1 << Tier0InvokeNotifyFreqLog) - 1) << InvocationCounter::count_shift; - Label no_mdo; - if (ProfileInterpreter) { - const Register Rmdo = Rscratch1; - // If no method data exists, go to profile_continue. - __ ld(Rmdo, in_bytes(Method::method_data_offset()), R19_method); - __ cmpdi(CCR0, Rmdo, 0); - __ beq(CCR0, no_mdo); - - // Increment backedge counter in the MDO. - const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); - __ lwz(Rscratch2, mdo_bc_offs, Rmdo); - __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mdo_bc_offs, Rmdo); - __ load_const_optimized(Rscratch1, mask, R0); - __ and_(Rscratch1, Rscratch2, Rscratch1); - __ bne(CCR0, done); - __ b(*overflow); - } - - // Increment counter in MethodCounters*. - const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); - __ bind(no_mdo); - __ get_method_counters(R19_method, R3_counters, done); - __ lwz(Rscratch2, mo_bc_offs, R3_counters); - __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mo_bc_offs, R3_counters); - __ load_const_optimized(Rscratch1, mask, R0); - __ and_(Rscratch1, Rscratch2, Rscratch1); - __ beq(CCR0, *overflow); - - __ bind(done); - - } else { - - // Update standard invocation counters. - Register Rsum_ivc_bec = R4_ARG2; - __ get_method_counters(R19_method, R3_counters, done); - __ increment_invocation_counter(R3_counters, Rsum_ivc_bec, R12_scratch2); - // Increment interpreter invocation counter. - if (ProfileInterpreter) { // %%% Merge this into methodDataOop. - __ lwz(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters); - __ addi(R12_scratch2, R12_scratch2, 1); - __ stw(R12_scratch2, in_bytes(MethodCounters::interpreter_invocation_counter_offset()), R3_counters); - } - // Check if we must create a method data obj. - if (ProfileInterpreter && profile_method != NULL) { - const Register profile_limit = Rscratch1; - int pl_offs = __ load_const_optimized(profile_limit, &InvocationCounter::InterpreterProfileLimit, R0, true); - __ lwz(profile_limit, pl_offs, profile_limit); - // Test to see if we should create a method data oop. - __ cmpw(CCR0, Rsum_ivc_bec, profile_limit); - __ blt(CCR0, *profile_method_continue); - // If no method data exists, go to profile_method. - __ test_method_data_pointer(*profile_method); - } - // Finally check for counter overflow. - if (overflow) { - const Register invocation_limit = Rscratch1; - int il_offs = __ load_const_optimized(invocation_limit, &InvocationCounter::InterpreterInvocationLimit, R0, true); - __ lwz(invocation_limit, il_offs, invocation_limit); - assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), "unexpected field size"); - __ cmpw(CCR0, Rsum_ivc_bec, invocation_limit); - __ bge(CCR0, *overflow); - } - - __ bind(done); - } -} - -// Generate code to initiate compilation on invocation counter overflow. -void TemplateInterpreterGenerator::generate_counter_overflow(Label& continue_entry) { - // Generate code to initiate compilation on the counter overflow. - - // InterpreterRuntime::frequency_counter_overflow takes one arguments, - // which indicates if the counter overflow occurs at a backwards branch (NULL bcp) - // We pass zero in. - // The call returns the address of the verified entry point for the method or NULL - // if the compilation did not complete (either went background or bailed out). - // - // Unlike the C++ interpreter above: Check exceptions! - // Assumption: Caller must set the flag "do_not_unlock_if_sychronized" if the monitor of a sync'ed - // method has not yet been created. Thus, no unlocking of a non-existing monitor can occur. - - __ li(R4_ARG2, 0); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), R4_ARG2, true); - - // Returns verified_entry_point or NULL. - // We ignore it in any case. - __ b(continue_entry); -} - -void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rmem_frame_size, Register Rscratch1) { - assert_different_registers(Rmem_frame_size, Rscratch1); - __ generate_stack_overflow_check_with_compare_and_throw(Rmem_frame_size, Rscratch1); -} - -void TemplateInterpreterGenerator::unlock_method(bool check_exceptions) { - __ unlock_object(R26_monitor, check_exceptions); -} - -// Lock the current method, interpreter register window must be set up! -void TemplateInterpreterGenerator::lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded) { - const Register Robj_to_lock = Rscratch2; - - { - if (!flags_preloaded) { - __ lwz(Rflags, method_(access_flags)); - } - -#ifdef ASSERT - // Check if methods needs synchronization. - { - Label Lok; - __ testbitdi(CCR0, R0, Rflags, JVM_ACC_SYNCHRONIZED_BIT); - __ btrue(CCR0,Lok); - __ stop("method doesn't need synchronization"); - __ bind(Lok); - } -#endif // ASSERT - } - - // Get synchronization object to Rscratch2. - { - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - Label Lstatic; - Label Ldone; - - __ testbitdi(CCR0, R0, Rflags, JVM_ACC_STATIC_BIT); - __ btrue(CCR0, Lstatic); - - // Non-static case: load receiver obj from stack and we're done. - __ ld(Robj_to_lock, R18_locals); - __ b(Ldone); - - __ bind(Lstatic); // Static case: Lock the java mirror - __ ld(Robj_to_lock, in_bytes(Method::const_offset()), R19_method); - __ ld(Robj_to_lock, in_bytes(ConstMethod::constants_offset()), Robj_to_lock); - __ ld(Robj_to_lock, ConstantPool::pool_holder_offset_in_bytes(), Robj_to_lock); - __ ld(Robj_to_lock, mirror_offset, Robj_to_lock); - - __ bind(Ldone); - __ verify_oop(Robj_to_lock); - } - - // Got the oop to lock => execute! - __ add_monitor_to_stack(true, Rscratch1, R0); - - __ std(Robj_to_lock, BasicObjectLock::obj_offset_in_bytes(), R26_monitor); - __ lock_object(R26_monitor, Robj_to_lock); -} - -// Generate a fixed interpreter frame for pure interpreter -// and I2N native transition frames. -// -// Before (stack grows downwards): -// -// | ... | -// |------------- | -// | java arg0 | -// | ... | -// | java argn | -// | | <- R15_esp -// | | -// |--------------| -// | abi_112 | -// | | <- R1_SP -// |==============| -// -// -// After: -// -// | ... | -// | java arg0 |<- R18_locals -// | ... | -// | java argn | -// |--------------| -// | | -// | java locals | -// | | -// |--------------| -// | abi_48 | -// |==============| -// | | -// | istate | -// | | -// |--------------| -// | monitor |<- R26_monitor -// |--------------| -// | |<- R15_esp -// | expression | -// | stack | -// | | -// |--------------| -// | | -// | abi_112 |<- R1_SP -// |==============| -// -// The top most frame needs an abi space of 112 bytes. This space is needed, -// since we call to c. The c function may spill their arguments to the caller -// frame. When we call to java, we don't need these spill slots. In order to save -// space on the stack, we resize the caller. However, java local reside in -// the caller frame and the frame has to be increased. The frame_size for the -// current frame was calculated based on max_stack as size for the expression -// stack. At the call, just a part of the expression stack might be used. -// We don't want to waste this space and cut the frame back accordingly. -// The resulting amount for resizing is calculated as follows: -// resize = (number_of_locals - number_of_arguments) * slot_size -// + (R1_SP - R15_esp) + 48 -// -// The size for the callee frame is calculated: -// framesize = 112 + max_stack + monitor + state_size -// -// maxstack: Max number of slots on the expression stack, loaded from the method. -// monitor: We statically reserve room for one monitor object. -// state_size: We save the current state of the interpreter to this area. -// -void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals) { - Register parent_frame_resize = R6_ARG4, // Frame will grow by this number of bytes. - top_frame_size = R7_ARG5, - Rconst_method = R8_ARG6; - - assert_different_registers(Rsize_of_parameters, Rsize_of_locals, parent_frame_resize, top_frame_size); - - __ ld(Rconst_method, method_(const)); - __ lhz(Rsize_of_parameters /* number of params */, - in_bytes(ConstMethod::size_of_parameters_offset()), Rconst_method); - if (native_call) { - // If we're calling a native method, we reserve space for the worst-case signature - // handler varargs vector, which is max(Argument::n_register_parameters, parameter_count+2). - // We add two slots to the parameter_count, one for the jni - // environment and one for a possible native mirror. - Label skip_native_calculate_max_stack; - __ addi(top_frame_size, Rsize_of_parameters, 2); - __ cmpwi(CCR0, top_frame_size, Argument::n_register_parameters); - __ bge(CCR0, skip_native_calculate_max_stack); - __ li(top_frame_size, Argument::n_register_parameters); - __ bind(skip_native_calculate_max_stack); - __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize); - __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize); - __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize! - assert(Rsize_of_locals == noreg, "Rsize_of_locals not initialized"); // Only relevant value is Rsize_of_parameters. - } else { - __ lhz(Rsize_of_locals /* number of params */, in_bytes(ConstMethod::size_of_locals_offset()), Rconst_method); - __ sldi(Rsize_of_parameters, Rsize_of_parameters, Interpreter::logStackElementSize); - __ sldi(Rsize_of_locals, Rsize_of_locals, Interpreter::logStackElementSize); - __ lhz(top_frame_size, in_bytes(ConstMethod::max_stack_offset()), Rconst_method); - __ sub(R11_scratch1, Rsize_of_locals, Rsize_of_parameters); // >=0 - __ sub(parent_frame_resize, R1_SP, R15_esp); // <0, off by Interpreter::stackElementSize! - __ sldi(top_frame_size, top_frame_size, Interpreter::logStackElementSize); - __ add(parent_frame_resize, parent_frame_resize, R11_scratch1); - } - - // Compute top frame size. - __ addi(top_frame_size, top_frame_size, frame::abi_reg_args_size + frame::ijava_state_size); - - // Cut back area between esp and max_stack. - __ addi(parent_frame_resize, parent_frame_resize, frame::abi_minframe_size - Interpreter::stackElementSize); - - __ round_to(top_frame_size, frame::alignment_in_bytes); - __ round_to(parent_frame_resize, frame::alignment_in_bytes); - // parent_frame_resize = (locals-parameters) - (ESP-SP-ABI48) Rounded to frame alignment size. - // Enlarge by locals-parameters (not in case of native_call), shrink by ESP-SP-ABI48. - - { - // -------------------------------------------------------------------------- - // Stack overflow check - - Label cont; - __ add(R11_scratch1, parent_frame_resize, top_frame_size); - generate_stack_overflow_check(R11_scratch1, R12_scratch2); - } - - // Set up interpreter state registers. - - __ add(R18_locals, R15_esp, Rsize_of_parameters); - __ ld(R27_constPoolCache, in_bytes(ConstMethod::constants_offset()), Rconst_method); - __ ld(R27_constPoolCache, ConstantPool::cache_offset_in_bytes(), R27_constPoolCache); - - // Set method data pointer. - if (ProfileInterpreter) { - Label zero_continue; - __ ld(R28_mdx, method_(method_data)); - __ cmpdi(CCR0, R28_mdx, 0); - __ beq(CCR0, zero_continue); - __ addi(R28_mdx, R28_mdx, in_bytes(MethodData::data_offset())); - __ bind(zero_continue); - } - - if (native_call) { - __ li(R14_bcp, 0); // Must initialize. - } else { - __ add(R14_bcp, in_bytes(ConstMethod::codes_offset()), Rconst_method); - } - - // Resize parent frame. - __ mflr(R12_scratch2); - __ neg(parent_frame_resize, parent_frame_resize); - __ resize_frame(parent_frame_resize, R11_scratch1); - __ std(R12_scratch2, _abi(lr), R1_SP); - - __ addi(R26_monitor, R1_SP, - frame::ijava_state_size); - __ addi(R15_esp, R26_monitor, - Interpreter::stackElementSize); - - // Store values. - // R15_esp, R14_bcp, R26_monitor, R28_mdx are saved at java calls - // in InterpreterMacroAssembler::call_from_interpreter. - __ std(R19_method, _ijava_state_neg(method), R1_SP); - __ std(R21_sender_SP, _ijava_state_neg(sender_sp), R1_SP); - __ std(R27_constPoolCache, _ijava_state_neg(cpoolCache), R1_SP); - __ std(R18_locals, _ijava_state_neg(locals), R1_SP); - - // Note: esp, bcp, monitor, mdx live in registers. Hence, the correct version can only - // be found in the frame after save_interpreter_state is done. This is always true - // for non-top frames. But when a signal occurs, dumping the top frame can go wrong, - // because e.g. frame::interpreter_frame_bcp() will not access the correct value - // (Enhanced Stack Trace). - // The signal handler does not save the interpreter state into the frame. - __ li(R0, 0); -#ifdef ASSERT - // Fill remaining slots with constants. - __ load_const_optimized(R11_scratch1, 0x5afe); - __ load_const_optimized(R12_scratch2, 0xdead); -#endif - // We have to initialize some frame slots for native calls (accessed by GC). - if (native_call) { - __ std(R26_monitor, _ijava_state_neg(monitors), R1_SP); - __ std(R14_bcp, _ijava_state_neg(bcp), R1_SP); - if (ProfileInterpreter) { __ std(R28_mdx, _ijava_state_neg(mdx), R1_SP); } - } -#ifdef ASSERT - else { - __ std(R12_scratch2, _ijava_state_neg(monitors), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(bcp), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(mdx), R1_SP); - } - __ std(R11_scratch1, _ijava_state_neg(ijava_reserved), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(esp), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(lresult), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(fresult), R1_SP); -#endif - __ subf(R12_scratch2, top_frame_size, R1_SP); - __ std(R0, _ijava_state_neg(oop_tmp), R1_SP); - __ std(R12_scratch2, _ijava_state_neg(top_frame_sp), R1_SP); - - // Push top frame. - __ push_frame(top_frame_size, R11_scratch1); -} - -// End of helpers - - -// Support abs and sqrt like in compiler. -// For others we can use a normal (native) entry. - -inline bool math_entry_available(AbstractInterpreter::MethodKind kind) { - if (!InlineIntrinsics) return false; - - return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) || - (kind==Interpreter::java_lang_math_abs)); -} - -address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { - if (!math_entry_available(kind)) { - NOT_PRODUCT(__ should_not_reach_here();) - return NULL; - } - - address entry = __ pc(); - - __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp); - - // Pop c2i arguments (if any) off when we return. -#ifdef ASSERT - __ ld(R9_ARG7, 0, R1_SP); - __ ld(R10_ARG8, 0, R21_sender_SP); - __ cmpd(CCR0, R9_ARG7, R10_ARG8); - __ asm_assert_eq("backlink", 0x545); -#endif // ASSERT - __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. - - if (kind == Interpreter::java_lang_math_sqrt) { - __ fsqrt(F1_RET, F1_RET); - } else if (kind == Interpreter::java_lang_math_abs) { - __ fabs(F1_RET, F1_RET); - } else { - ShouldNotReachHere(); - } - - // And we're done. - __ blr(); - - __ flush(); - - return entry; -} - -// Interpreter stub for calling a native method. (asm interpreter) -// This sets up a somewhat different looking stack for calling the -// native method than the typical interpreter frame setup. -// -// On entry: -// R19_method - method -// R16_thread - JavaThread* -// R15_esp - intptr_t* sender tos -// -// abstract stack (grows up) -// [ IJava (caller of JNI callee) ] <-- ASP -// ... -address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { - - address entry = __ pc(); - - const bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // ----------------------------------------------------------------------------- - // Allocate a new frame that represents the native callee (i2n frame). - // This is not a full-blown interpreter frame, but in particular, the - // following registers are valid after this: - // - R19_method - // - R18_local (points to start of argumuments to native function) - // - // abstract stack (grows up) - // [ IJava (caller of JNI callee) ] <-- ASP - // ... - - const Register signature_handler_fd = R11_scratch1; - const Register pending_exception = R0; - const Register result_handler_addr = R31; - const Register native_method_fd = R11_scratch1; - const Register access_flags = R22_tmp2; - const Register active_handles = R11_scratch1; // R26_monitor saved to state. - const Register sync_state = R12_scratch2; - const Register sync_state_addr = sync_state; // Address is dead after use. - const Register suspend_flags = R11_scratch1; - - //============================================================================= - // Allocate new frame and initialize interpreter state. - - Label exception_return; - Label exception_return_sync_check; - Label stack_overflow_return; - - // Generate new interpreter state and jump to stack_overflow_return in case of - // a stack overflow. - //generate_compute_interpreter_state(stack_overflow_return); - - Register size_of_parameters = R22_tmp2; - - generate_fixed_frame(true, size_of_parameters, noreg /* unused */); - - //============================================================================= - // Increment invocation counter. On overflow, entry to JNI method - // will be compiled. - Label invocation_counter_overflow, continue_after_compile; - if (inc_counter) { - if (synchronized) { - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. If any exception was thrown by - // runtime, exception handling i.e. unlock_if_synchronized_method will - // check this thread local flag. - // This flag has two effects, one is to force an unwind in the topmost - // interpreter frame and not perform an unlock while doing so. - __ li(R0, 1); - __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); - } - generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - - BIND(continue_after_compile); - // Reset the _do_not_unlock_if_synchronized flag. - if (synchronized) { - __ li(R0, 0); - __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); - } - } - - // access_flags = method->access_flags(); - // Load access flags. - assert(access_flags->is_nonvolatile(), - "access_flags must be in a non-volatile register"); - // Type check. - assert(4 == sizeof(AccessFlags), "unexpected field size"); - __ lwz(access_flags, method_(access_flags)); - - // We don't want to reload R19_method and access_flags after calls - // to some helper functions. - assert(R19_method->is_nonvolatile(), - "R19_method must be a non-volatile register"); - - // Check for synchronized methods. Must happen AFTER invocation counter - // check, so method is not locked if counter overflows. - - if (synchronized) { - lock_method(access_flags, R11_scratch1, R12_scratch2, true); - - // Update monitor in state. - __ ld(R11_scratch1, 0, R1_SP); - __ std(R26_monitor, _ijava_state_neg(monitors), R11_scratch1); - } - - // jvmti/jvmpi support - __ notify_method_entry(); - - //============================================================================= - // Get and call the signature handler. - - __ ld(signature_handler_fd, method_(signature_handler)); - Label call_signature_handler; - - __ cmpdi(CCR0, signature_handler_fd, 0); - __ bne(CCR0, call_signature_handler); - - // Method has never been called. Either generate a specialized - // handler or point to the slow one. - // - // Pass parameter 'false' to avoid exception check in call_VM. - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false); - - // Check for an exception while looking up the target method. If we - // incurred one, bail. - __ ld(pending_exception, thread_(pending_exception)); - __ cmpdi(CCR0, pending_exception, 0); - __ bne(CCR0, exception_return_sync_check); // Has pending exception. - - // Reload signature handler, it may have been created/assigned in the meanwhile. - __ ld(signature_handler_fd, method_(signature_handler)); - __ twi_0(signature_handler_fd); // Order wrt. load of klass mirror and entry point (isync is below). - - BIND(call_signature_handler); - - // Before we call the signature handler we push a new frame to - // protect the interpreter frame volatile registers when we return - // from jni but before we can get back to Java. - - // First set the frame anchor while the SP/FP registers are - // convenient and the slow signature handler can use this same frame - // anchor. - - // We have a TOP_IJAVA_FRAME here, which belongs to us. - __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); - - // Now the interpreter frame (and its call chain) have been - // invalidated and flushed. We are now protected against eager - // being enabled in native code. Even if it goes eager the - // registers will be reloaded as clean and we will invalidate after - // the call so no spurious flush should be possible. - - // Call signature handler and pass locals address. - // - // Our signature handlers copy required arguments to the C stack - // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13. - __ mr(R3_ARG1, R18_locals); -#if !defined(ABI_ELFv2) - __ ld(signature_handler_fd, 0, signature_handler_fd); -#endif - - __ call_stub(signature_handler_fd); - - // Remove the register parameter varargs slots we allocated in - // compute_interpreter_state. SP+16 ends up pointing to the ABI - // outgoing argument area. - // - // Not needed on PPC64. - //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord); - - assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register"); - // Save across call to native method. - __ mr(result_handler_addr, R3_RET); - - __ isync(); // Acquire signature handler before trying to fetch the native entry point and klass mirror. - - // Set up fixed parameters and call the native method. - // If the method is static, get mirror into R4_ARG2. - { - Label method_is_not_static; - // Access_flags is non-volatile and still, no need to restore it. - - // Restore access flags. - __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); - __ bfalse(CCR0, method_is_not_static); - - // constants = method->constants(); - __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); - __ ld(R11_scratch1, in_bytes(ConstMethod::constants_offset()), R11_scratch1); - // pool_holder = method->constants()->pool_holder(); - __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), - R11_scratch1/*constants*/); - - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - - // mirror = pool_holder->klass_part()->java_mirror(); - __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/); - // state->_native_mirror = mirror; - - __ ld(R11_scratch1, 0, R1_SP); - __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); - // R4_ARG2 = &state->_oop_temp; - __ addi(R4_ARG2, R11_scratch1, _ijava_state_neg(oop_tmp)); - BIND(method_is_not_static); - } - - // At this point, arguments have been copied off the stack into - // their JNI positions. Oops are boxed in-place on the stack, with - // handles copied to arguments. The result handler address is in a - // register. - - // Pass JNIEnv address as first parameter. - __ addir(R3_ARG1, thread_(jni_environment)); - - // Load the native_method entry before we change the thread state. - __ ld(native_method_fd, method_(native_function)); - - //============================================================================= - // Transition from _thread_in_Java to _thread_in_native. As soon as - // we make this change the safepoint code needs to be certain that - // the last Java frame we established is good. The pc in that frame - // just needs to be near here not an actual return address. - - // We use release_store_fence to update values like the thread state, where - // we don't want the current thread to continue until all our prior memory - // accesses (including the new thread state) are visible to other threads. - __ li(R0, _thread_in_native); - __ release(); - - // TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); - __ stw(R0, thread_(thread_state)); - - if (UseMembar) { - __ fence(); - } - - //============================================================================= - // Call the native method. Argument registers must not have been - // overwritten since "__ call_stub(signature_handler);" (except for - // ARG1 and ARG2 for static methods). - __ call_c(native_method_fd); - - __ li(R0, 0); - __ ld(R11_scratch1, 0, R1_SP); - __ std(R3_RET, _ijava_state_neg(lresult), R11_scratch1); - __ stfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1); - __ std(R0/*mirror*/, _ijava_state_neg(oop_tmp), R11_scratch1); // reset - - // Note: C++ interpreter needs the following here: - // The frame_manager_lr field, which we use for setting the last - // java frame, gets overwritten by the signature handler. Restore - // it now. - //__ get_PC_trash_LR(R11_scratch1); - //__ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); - - // Because of GC R19_method may no longer be valid. - - // Block, if necessary, before resuming in _thread_in_Java state. - // In order for GC to work, don't clear the last_Java_sp until after - // blocking. - - //============================================================================= - // Switch thread to "native transition" state before reading the - // synchronization state. This additional state is necessary - // because reading and testing the synchronization state is not - // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, - // in _thread_in_native state, loads _not_synchronized and is - // preempted. VM thread changes sync state to synchronizing and - // suspends threads for GC. Thread A is resumed to finish this - // native method, but doesn't block here since it didn't see any - // synchronization in progress, and escapes. - - // We use release_store_fence to update values like the thread state, where - // we don't want the current thread to continue until all our prior memory - // accesses (including the new thread state) are visible to other threads. - __ li(R0/*thread_state*/, _thread_in_native_trans); - __ release(); - __ stw(R0/*thread_state*/, thread_(thread_state)); - if (UseMembar) { - __ fence(); - } - // Write serialization page so that the VM thread can do a pseudo remote - // membar. We use the current thread pointer to calculate a thread - // specific offset to write to within the page. This minimizes bus - // traffic due to cache line collision. - else { - __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2); - } - - // Now before we return to java we must look for a current safepoint - // (a new safepoint can not start since we entered native_trans). - // We must check here because a current safepoint could be modifying - // the callers registers right this moment. - - // Acquire isn't strictly necessary here because of the fence, but - // sync_state is declared to be volatile, so we do it anyway - // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path). - int sync_state_offs = __ load_const_optimized(sync_state_addr, SafepointSynchronize::address_of_state(), /*temp*/R0, true); - - // TODO PPC port assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); - __ lwz(sync_state, sync_state_offs, sync_state_addr); - - // TODO PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); - __ lwz(suspend_flags, thread_(suspend_flags)); - - Label sync_check_done; - Label do_safepoint; - // No synchronization in progress nor yet synchronized. - __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); - // Not suspended. - __ cmpwi(CCR1, suspend_flags, 0); - - __ bne(CCR0, do_safepoint); - __ beq(CCR1, sync_check_done); - __ bind(do_safepoint); - __ isync(); - // Block. We do the call directly and leave the current - // last_Java_frame setup undisturbed. We must save any possible - // native result across the call. No oop is present. - - __ mr(R3_ARG1, R16_thread); -#if defined(ABI_ELFv2) - __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), - relocInfo::none); -#else - __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans), - relocInfo::none); -#endif - - __ bind(sync_check_done); - - //============================================================================= - // <<<<<< Back in Interpreter Frame >>>>> - - // We are in thread_in_native_trans here and back in the normal - // interpreter frame. We don't have to do anything special about - // safepoints and we can switch to Java mode anytime we are ready. - - // Note: frame::interpreter_frame_result has a dependency on how the - // method result is saved across the call to post_method_exit. For - // native methods it assumes that the non-FPU/non-void result is - // saved in _native_lresult and a FPU result in _native_fresult. If - // this changes then the interpreter_frame_result implementation - // will need to be updated too. - - // On PPC64, we have stored the result directly after the native call. - - //============================================================================= - // Back in Java - - // We use release_store_fence to update values like the thread state, where - // we don't want the current thread to continue until all our prior memory - // accesses (including the new thread state) are visible to other threads. - __ li(R0/*thread_state*/, _thread_in_Java); - __ release(); - __ stw(R0/*thread_state*/, thread_(thread_state)); - if (UseMembar) { - __ fence(); - } - - __ reset_last_Java_frame(); - - // Jvmdi/jvmpi support. Whether we've got an exception pending or - // not, and whether unlocking throws an exception or not, we notify - // on native method exit. If we do have an exception, we'll end up - // in the caller's context to handle it, so if we don't do the - // notify here, we'll drop it on the floor. - __ notify_method_exit(true/*native method*/, - ilgl /*illegal state (not used for native methods)*/, - InterpreterMacroAssembler::NotifyJVMTI, - false /*check_exceptions*/); - - //============================================================================= - // Handle exceptions - - if (synchronized) { - // Don't check for exceptions since we're still in the i2n frame. Do that - // manually afterwards. - unlock_method(false); - } - - // Reset active handles after returning from native. - // thread->active_handles()->clear(); - __ ld(active_handles, thread_(active_handles)); - // TODO PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); - __ li(R0, 0); - __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles); - - Label exception_return_sync_check_already_unlocked; - __ ld(R0/*pending_exception*/, thread_(pending_exception)); - __ cmpdi(CCR0, R0/*pending_exception*/, 0); - __ bne(CCR0, exception_return_sync_check_already_unlocked); - - //----------------------------------------------------------------------------- - // No exception pending. - - // Move native method result back into proper registers and return. - // Invoke result handler (may unbox/promote). - __ ld(R11_scratch1, 0, R1_SP); - __ ld(R3_RET, _ijava_state_neg(lresult), R11_scratch1); - __ lfd(F1_RET, _ijava_state_neg(fresult), R11_scratch1); - __ call_stub(result_handler_addr); - - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ R0, R11_scratch1, R12_scratch2); - - // Must use the return pc which was loaded from the caller's frame - // as the VM uses return-pc-patching for deoptimization. - __ mtlr(R0); - __ blr(); - - //----------------------------------------------------------------------------- - // An exception is pending. We call into the runtime only if the - // caller was not interpreted. If it was interpreted the - // interpreter will do the correct thing. If it isn't interpreted - // (call stub/compiled code) we will change our return and continue. - - BIND(exception_return_sync_check); - - if (synchronized) { - // Don't check for exceptions since we're still in the i2n frame. Do that - // manually afterwards. - unlock_method(false); - } - BIND(exception_return_sync_check_already_unlocked); - - const Register return_pc = R31; - - __ ld(return_pc, 0, R1_SP); - __ ld(return_pc, _abi(lr), return_pc); - - // Get the address of the exception handler. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), - R16_thread, - return_pc /* return pc */); - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, noreg, R11_scratch1, R12_scratch2); - - // Load the PC of the the exception handler into LR. - __ mtlr(R3_RET); - - // Load exception into R3_ARG1 and clear pending exception in thread. - __ ld(R3_ARG1/*exception*/, thread_(pending_exception)); - __ li(R4_ARG2, 0); - __ std(R4_ARG2, thread_(pending_exception)); - - // Load the original return pc into R4_ARG2. - __ mr(R4_ARG2/*issuing_pc*/, return_pc); - - // Return to exception handler. - __ blr(); - - //============================================================================= - // Counter overflow. - - if (inc_counter) { - // Handle invocation counter overflow. - __ bind(invocation_counter_overflow); - - generate_counter_overflow(continue_after_compile); - } - - return entry; -} - -// Generic interpreted method entry to (asm) interpreter. -// -address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - address entry = __ pc(); - // Generate the code to allocate the interpreter stack frame. - Register Rsize_of_parameters = R4_ARG2, // Written by generate_fixed_frame. - Rsize_of_locals = R5_ARG3; // Written by generate_fixed_frame. - - generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals); - - // -------------------------------------------------------------------------- - // Zero out non-parameter locals. - // Note: *Always* zero out non-parameter locals as Sparc does. It's not - // worth to ask the flag, just do it. - Register Rslot_addr = R6_ARG4, - Rnum = R7_ARG5; - Label Lno_locals, Lzero_loop; - - // Set up the zeroing loop. - __ subf(Rnum, Rsize_of_parameters, Rsize_of_locals); - __ subf(Rslot_addr, Rsize_of_parameters, R18_locals); - __ srdi_(Rnum, Rnum, Interpreter::logStackElementSize); - __ beq(CCR0, Lno_locals); - __ li(R0, 0); - __ mtctr(Rnum); - - // The zero locals loop. - __ bind(Lzero_loop); - __ std(R0, 0, Rslot_addr); - __ addi(Rslot_addr, Rslot_addr, -Interpreter::stackElementSize); - __ bdnz(Lzero_loop); - - __ bind(Lno_locals); - - // -------------------------------------------------------------------------- - // Counter increment and overflow check. - Label invocation_counter_overflow, - profile_method, - profile_method_continue; - if (inc_counter || ProfileInterpreter) { - - Register Rdo_not_unlock_if_synchronized_addr = R11_scratch1; - if (synchronized) { - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. If any exception was thrown by - // runtime, exception handling i.e. unlock_if_synchronized_method will - // check this thread local flag. - // This flag has two effects, one is to force an unwind in the topmost - // interpreter frame and not perform an unlock while doing so. - __ li(R0, 1); - __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); - } - - // Argument and return type profiling. - __ profile_parameters_type(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4); - - // Increment invocation counter and check for overflow. - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); - } - - __ bind(profile_method_continue); - - // Reset the _do_not_unlock_if_synchronized flag. - if (synchronized) { - __ li(R0, 0); - __ stb(R0, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()), R16_thread); - } - } - - // -------------------------------------------------------------------------- - // Locking of synchronized methods. Must happen AFTER invocation_counter - // check and stack overflow check, so method is not locked if overflows. - if (synchronized) { - lock_method(R3_ARG1, R4_ARG2, R5_ARG3); - } -#ifdef ASSERT - else { - Label Lok; - __ lwz(R0, in_bytes(Method::access_flags_offset()), R19_method); - __ andi_(R0, R0, JVM_ACC_SYNCHRONIZED); - __ asm_assert_eq("method needs synchronization", 0x8521); - __ bind(Lok); - } -#endif // ASSERT - - __ verify_thread(); - - // -------------------------------------------------------------------------- - // JVMTI support - __ notify_method_entry(); - - // -------------------------------------------------------------------------- - // Start executing instructions. - __ dispatch_next(vtos); - - // -------------------------------------------------------------------------- - // Out of line counter overflow and MDO creation code. - if (ProfileInterpreter) { - // We have decided to profile this method in the interpreter. - __ bind(profile_method); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); - __ set_method_data_pointer_for_bcp(); - __ b(profile_method_continue); - } - - if (inc_counter) { - // Handle invocation counter overflow. - __ bind(invocation_counter_overflow); - generate_counter_overflow(profile_method_continue); - } - return entry; -} - -// CRC32 Intrinsics. -// -// Contract on scratch and work registers. -// ======================================= -// -// On ppc, the register set {R2..R12} is available in the interpreter as scratch/work registers. -// You should, however, keep in mind that {R3_ARG1..R10_ARG8} is the C-ABI argument register set. -// You can't rely on these registers across calls. -// -// The generators for CRC32_update and for CRC32_updateBytes use the -// scratch/work register set internally, passing the work registers -// as arguments to the MacroAssembler emitters as required. -// -// R3_ARG1..R6_ARG4 are preset to hold the incoming java arguments. -// Their contents is not constant but may change according to the requirements -// of the emitted code. -// -// All other registers from the scratch/work register set are used "internally" -// and contain garbage (i.e. unpredictable values) once blr() is reached. -// Basically, only R3_RET contains a defined value which is the function result. -// -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address InterpreterGenerator::generate_CRC32_update_entry() { - if (UseCRC32Intrinsics) { - address start = __ pc(); // Remember stub start address (is rtn value). - Label slow_path; - - // Safepoint check - const Register sync_state = R11_scratch1; - int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); - __ lwz(sync_state, sync_state_offs, sync_state); - __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); - __ bne(CCR0, slow_path); - - // We don't generate local frame and don't align stack because - // we not even call stub code (we generate the code inline) - // and there is no safepoint on this path. - - // Load java parameters. - // R15_esp is callers operand stack pointer, i.e. it points to the parameters. - const Register argP = R15_esp; - const Register crc = R3_ARG1; // crc value - const Register data = R4_ARG2; // address of java byte value (kernel_crc32 needs address) - const Register dataLen = R5_ARG3; // source data len (1 byte). Not used because calling the single-byte emitter. - const Register table = R6_ARG4; // address of crc32 table - const Register tmp = dataLen; // Reuse unused len register to show we don't actually need a separate tmp here. - - BLOCK_COMMENT("CRC32_update {"); - - // Arguments are reversed on java expression stack -#ifdef VM_LITTLE_ENDIAN - __ addi(data, argP, 0+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. - // Being passed as an int, the single byte is at offset +0. -#else - __ addi(data, argP, 3+1*wordSize); // (stack) address of byte value. Emitter expects address, not value. - // Being passed from java as an int, the single byte is at offset +3. -#endif - __ lwz(crc, 2*wordSize, argP); // Current crc state, zero extend to 64 bit to have a clean register. - - StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); - __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp); - - // Restore caller sp for c2i case and return. - __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. - __ blr(); - - // Generate a vanilla native entry as the slow path. - BLOCK_COMMENT("} CRC32_update"); - BIND(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); - return start; - } - - return NULL; -} - -// CRC32 Intrinsics. -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes( int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len) - */ -address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32Intrinsics) { - address start = __ pc(); // Remember stub start address (is rtn value). - Label slow_path; - - // Safepoint check - const Register sync_state = R11_scratch1; - int sync_state_offs = __ load_const_optimized(sync_state, SafepointSynchronize::address_of_state(), /*temp*/R0, true); - __ lwz(sync_state, sync_state_offs, sync_state); - __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); - __ bne(CCR0, slow_path); - - // We don't generate local frame and don't align stack because - // we not even call stub code (we generate the code inline) - // and there is no safepoint on this path. - - // Load parameters. - // Z_esp is callers operand stack pointer, i.e. it points to the parameters. - const Register argP = R15_esp; - const Register crc = R3_ARG1; // crc value - const Register data = R4_ARG2; // address of java byte array - const Register dataLen = R5_ARG3; // source data len - const Register table = R6_ARG4; // address of crc32 table - - const Register t0 = R9; // scratch registers for crc calculation - const Register t1 = R10; - const Register t2 = R11; - const Register t3 = R12; - - const Register tc0 = R2; // registers to hold pre-calculated column addresses - const Register tc1 = R7; - const Register tc2 = R8; - const Register tc3 = table; // table address is reconstructed at the end of kernel_crc32_* emitters - - const Register tmp = t0; // Only used very locally to calculate byte buffer address. - - // Arguments are reversed on java expression stack. - // Calculate address of start element. - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { // Used for "updateByteBuffer direct". - BLOCK_COMMENT("CRC32_updateByteBuffer {"); - // crc @ (SP + 5W) (32bit) - // buf @ (SP + 3W) (64bit ptr to long array) - // off @ (SP + 2W) (32bit) - // dataLen @ (SP + 1W) (32bit) - // data = buf + off - __ ld( data, 3*wordSize, argP); // start of byte buffer - __ lwa( tmp, 2*wordSize, argP); // byte buffer offset - __ lwa( dataLen, 1*wordSize, argP); // #bytes to process - __ lwz( crc, 5*wordSize, argP); // current crc state - __ add( data, data, tmp); // Add byte buffer offset. - } else { // Used for "updateBytes update". - BLOCK_COMMENT("CRC32_updateBytes {"); - // crc @ (SP + 4W) (32bit) - // buf @ (SP + 3W) (64bit ptr to byte array) - // off @ (SP + 2W) (32bit) - // dataLen @ (SP + 1W) (32bit) - // data = buf + off + base_offset - __ ld( data, 3*wordSize, argP); // start of byte buffer - __ lwa( tmp, 2*wordSize, argP); // byte buffer offset - __ lwa( dataLen, 1*wordSize, argP); // #bytes to process - __ add( data, data, tmp); // add byte buffer offset - __ lwz( crc, 4*wordSize, argP); // current crc state - __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE)); - } - - StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table); - - // Performance measurements show the 1word and 2word variants to be almost equivalent, - // with very light advantages for the 1word variant. We chose the 1word variant for - // code compactness. - __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3); - - // Restore caller sp for c2i case and return. - __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started. - __ blr(); - - // Generate a vanilla native entry as the slow path. - BLOCK_COMMENT("} CRC32_updateBytes(Buffer)"); - BIND(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1); - return start; - } - - return NULL; + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; } // These should never be compiled since the interpreter will prefer // the compiled version to the intrinsic version. bool AbstractInterpreter::can_be_compiled(methodHandle m) { - return !math_entry_available(method_kind(m)); + return !TemplateInterpreter::math_entry_available(method_kind(m)); } // How much stack a method activation needs in stack slots. @@ -1505,411 +154,14 @@ void AbstractInterpreter::layout_activation(Method* method, } } -// ============================================================================= -// Exceptions +// Support abs and sqrt like in compiler. +// For others we can use a normal (native) entry. -void TemplateInterpreterGenerator::generate_throw_exception() { - Register Rexception = R17_tos, - Rcontinuation = R3_RET; +bool TemplateInterpreter::math_entry_available(AbstractInterpreter::MethodKind kind) { + if (!InlineIntrinsics) return false; - // -------------------------------------------------------------------------- - // Entry point if an method returns with a pending exception (rethrow). - Interpreter::_rethrow_exception_entry = __ pc(); - { - __ restore_interpreter_state(R11_scratch1); // Sets R11_scratch1 = fp. - __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); - __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); - - // Compiled code destroys templateTableBase, reload. - __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1); - } - - // Entry point if a interpreted method throws an exception (throw). - Interpreter::_throw_exception_entry = __ pc(); - { - __ mr(Rexception, R3_RET); - - __ verify_thread(); - __ verify_oop(Rexception); - - // Expression stack must be empty before entering the VM in case of an exception. - __ empty_expression_stack(); - // Find exception handler address and preserve exception oop. - // Call C routine to find handler and jump to it. - __ call_VM(Rexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Rexception); - __ mtctr(Rcontinuation); - // Push exception for exception handler bytecodes. - __ push_ptr(Rexception); - - // Jump to exception handler (may be remove activation entry!). - __ bctr(); - } - - // If the exception is not handled in the current frame the frame is - // removed and the exception is rethrown (i.e. exception - // continuation is _rethrow_exception). - // - // Note: At this point the bci is still the bxi for the instruction - // which caused the exception and the expression stack is - // empty. Thus, for any VM calls at this point, GC will find a legal - // oop map (with empty expression stack). - - // In current activation - // tos: exception - // bcp: exception bcp - - // -------------------------------------------------------------------------- - // JVMTI PopFrame support - - Interpreter::_remove_activation_preserving_args_entry = __ pc(); - { - // Set the popframe_processing bit in popframe_condition indicating that we are - // currently handling popframe, so that call_VMs that may happen later do not - // trigger new popframe handling cycles. - __ lwz(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); - __ ori(R11_scratch1, R11_scratch1, JavaThread::popframe_processing_bit); - __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); - - // Empty the expression stack, as in normal exception handling. - __ empty_expression_stack(); - __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false); - - // Check to see whether we are returning to a deoptimized frame. - // (The PopFrame call ensures that the caller of the popped frame is - // either interpreted or compiled and deoptimizes it if compiled.) - // Note that we don't compare the return PC against the - // deoptimization blob's unpack entry because of the presence of - // adapter frames in C2. - Label Lcaller_not_deoptimized; - Register return_pc = R3_ARG1; - __ ld(return_pc, 0, R1_SP); - __ ld(return_pc, _abi(lr), return_pc); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), return_pc); - __ cmpdi(CCR0, R3_RET, 0); - __ bne(CCR0, Lcaller_not_deoptimized); - - // The deoptimized case. - // In this case, we can't call dispatch_next() after the frame is - // popped, but instead must save the incoming arguments and restore - // them after deoptimization has occurred. - __ ld(R4_ARG2, in_bytes(Method::const_offset()), R19_method); - __ lhz(R4_ARG2 /* number of params */, in_bytes(ConstMethod::size_of_parameters_offset()), R4_ARG2); - __ slwi(R4_ARG2, R4_ARG2, Interpreter::logStackElementSize); - __ addi(R5_ARG3, R18_locals, Interpreter::stackElementSize); - __ subf(R5_ARG3, R4_ARG2, R5_ARG3); - // Save these arguments. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), R16_thread, R4_ARG2, R5_ARG3); - - // Inform deoptimization that it is responsible for restoring these arguments. - __ load_const_optimized(R11_scratch1, JavaThread::popframe_force_deopt_reexecution_bit); - __ stw(R11_scratch1, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); - - // Return from the current method into the deoptimization blob. Will eventually - // end up in the deopt interpeter entry, deoptimization prepared everything that - // we will reexecute the call that called us. - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*reload return_pc*/ return_pc, R11_scratch1, R12_scratch2); - __ mtlr(return_pc); - __ blr(); - - // The non-deoptimized case. - __ bind(Lcaller_not_deoptimized); - - // Clear the popframe condition flag. - __ li(R0, 0); - __ stw(R0, in_bytes(JavaThread::popframe_condition_offset()), R16_thread); - - // Get out of the current method and re-execute the call that called us. - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2); - __ restore_interpreter_state(R11_scratch1); - __ ld(R12_scratch2, _ijava_state_neg(top_frame_sp), R11_scratch1); - __ resize_frame_absolute(R12_scratch2, R11_scratch1, R0); - if (ProfileInterpreter) { - __ set_method_data_pointer_for_bcp(); - __ ld(R11_scratch1, 0, R1_SP); - __ std(R28_mdx, _ijava_state_neg(mdx), R11_scratch1); - } -#if INCLUDE_JVMTI - Label L_done; - - __ lbz(R11_scratch1, 0, R14_bcp); - __ cmpwi(CCR0, R11_scratch1, Bytecodes::_invokestatic); - __ bne(CCR0, L_done); - - // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. - // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. - __ ld(R4_ARG2, 0, R18_locals); - __ MacroAssembler::call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp, false); - __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true); - __ cmpdi(CCR0, R4_ARG2, 0); - __ beq(CCR0, L_done); - __ std(R4_ARG2, wordSize, R15_esp); - __ bind(L_done); -#endif // INCLUDE_JVMTI - __ dispatch_next(vtos); - } - // end of JVMTI PopFrame support - - // -------------------------------------------------------------------------- - // Remove activation exception entry. - // This is jumped to if an interpreted method can't handle an exception itself - // (we come from the throw/rethrow exception entry above). We're going to call - // into the VM to find the exception handler in the caller, pop the current - // frame and return the handler we calculated. - Interpreter::_remove_activation_entry = __ pc(); - { - __ pop_ptr(Rexception); - __ verify_thread(); - __ verify_oop(Rexception); - __ std(Rexception, in_bytes(JavaThread::vm_result_offset()), R16_thread); - - __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, true); - __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI, false); - - __ get_vm_result(Rexception); - - // We are done with this activation frame; find out where to go next. - // The continuation point will be an exception handler, which expects - // the following registers set up: - // - // RET: exception oop - // ARG2: Issuing PC (see generate_exception_blob()), only used if the caller is compiled. - - Register return_pc = R31; // Needs to survive the runtime call. - __ ld(return_pc, 0, R1_SP); - __ ld(return_pc, _abi(lr), return_pc); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, return_pc); - - // Remove the current activation. - __ merge_frames(/*top_frame_sp*/ R21_sender_SP, /*return_pc*/ noreg, R11_scratch1, R12_scratch2); - - __ mr(R4_ARG2, return_pc); - __ mtlr(R3_RET); - __ mr(R3_RET, Rexception); - __ blr(); - } + return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) || + (kind==Interpreter::java_lang_math_abs)); } -// JVMTI ForceEarlyReturn support. -// Returns "in the middle" of a method with a "fake" return value. -address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { - Register Rscratch1 = R11_scratch1, - Rscratch2 = R12_scratch2; - - address entry = __ pc(); - __ empty_expression_stack(); - - __ load_earlyret_value(state, Rscratch1); - - __ ld(Rscratch1, in_bytes(JavaThread::jvmti_thread_state_offset()), R16_thread); - // Clear the earlyret state. - __ li(R0, 0); - __ stw(R0, in_bytes(JvmtiThreadState::earlyret_state_offset()), Rscratch1); - - __ remove_activation(state, false, false); - // Copied from TemplateTable::_return. - // Restoration of lr done by remove_activation. - switch (state) { - case ltos: - case btos: - case ctos: - case stos: - case atos: - case itos: __ mr(R3_RET, R17_tos); break; - case ftos: - case dtos: __ fmr(F1_RET, F15_ftos); break; - case vtos: // This might be a constructor. Final fields (and volatile fields on PPC64) need - // to get visible before the reference to the object gets stored anywhere. - __ membar(Assembler::StoreStore); break; - default : ShouldNotReachHere(); - } - __ blr(); - - return entry; -} // end of ForceEarlyReturn support - -//----------------------------------------------------------------------------- -// Helper for vtos entry point generation - -void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, - address& bep, - address& cep, - address& sep, - address& aep, - address& iep, - address& lep, - address& fep, - address& dep, - address& vep) { - assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); - Label L; - - aep = __ pc(); __ push_ptr(); __ b(L); - fep = __ pc(); __ push_f(); __ b(L); - dep = __ pc(); __ push_d(); __ b(L); - lep = __ pc(); __ push_l(); __ b(L); - __ align(32, 12, 24); // align L - bep = cep = sep = - iep = __ pc(); __ push_i(); - vep = __ pc(); - __ bind(L); - generate_and_dispatch(t); -} - -//----------------------------------------------------------------------------- -// Generation of individual instructions - -// helpers for generate_and_dispatch - -InterpreterGenerator::InterpreterGenerator(StubQueue* code) - : TemplateInterpreterGenerator(code) { - generate_all(); // Down here so it can be "virtual". -} - -//----------------------------------------------------------------------------- - -// Non-product code -#ifndef PRODUCT -address TemplateInterpreterGenerator::generate_trace_code(TosState state) { - //__ flush_bundle(); - address entry = __ pc(); - - const char *bname = NULL; - uint tsize = 0; - switch(state) { - case ftos: - bname = "trace_code_ftos {"; - tsize = 2; - break; - case btos: - bname = "trace_code_btos {"; - tsize = 2; - break; - case ctos: - bname = "trace_code_ctos {"; - tsize = 2; - break; - case stos: - bname = "trace_code_stos {"; - tsize = 2; - break; - case itos: - bname = "trace_code_itos {"; - tsize = 2; - break; - case ltos: - bname = "trace_code_ltos {"; - tsize = 3; - break; - case atos: - bname = "trace_code_atos {"; - tsize = 2; - break; - case vtos: - // Note: In case of vtos, the topmost of stack value could be a int or doubl - // In case of a double (2 slots) we won't see the 2nd stack value. - // Maybe we simply should print the topmost 3 stack slots to cope with the problem. - bname = "trace_code_vtos {"; - tsize = 2; - - break; - case dtos: - bname = "trace_code_dtos {"; - tsize = 3; - break; - default: - ShouldNotReachHere(); - } - BLOCK_COMMENT(bname); - - // Support short-cut for TraceBytecodesAt. - // Don't call into the VM if we don't want to trace to speed up things. - Label Lskip_vm_call; - if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) { - int offs1 = __ load_const_optimized(R11_scratch1, (address) &TraceBytecodesAt, R0, true); - int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true); - __ ld(R11_scratch1, offs1, R11_scratch1); - __ lwa(R12_scratch2, offs2, R12_scratch2); - __ cmpd(CCR0, R12_scratch2, R11_scratch1); - __ blt(CCR0, Lskip_vm_call); - } - - __ push(state); - // Load 2 topmost expression stack values. - __ ld(R6_ARG4, tsize*Interpreter::stackElementSize, R15_esp); - __ ld(R5_ARG3, Interpreter::stackElementSize, R15_esp); - __ mflr(R31); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), /* unused */ R4_ARG2, R5_ARG3, R6_ARG4, false); - __ mtlr(R31); - __ pop(state); - - if (TraceBytecodesAt > 0 && TraceBytecodesAt < max_intx) { - __ bind(Lskip_vm_call); - } - __ blr(); - BLOCK_COMMENT("} trace_code"); - return entry; -} - -void TemplateInterpreterGenerator::count_bytecode() { - int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeCounter::_counter_value, R12_scratch2, true); - __ lwz(R12_scratch2, offs, R11_scratch1); - __ addi(R12_scratch2, R12_scratch2, 1); - __ stw(R12_scratch2, offs, R11_scratch1); -} - -void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { - int offs = __ load_const_optimized(R11_scratch1, (address) &BytecodeHistogram::_counters[t->bytecode()], R12_scratch2, true); - __ lwz(R12_scratch2, offs, R11_scratch1); - __ addi(R12_scratch2, R12_scratch2, 1); - __ stw(R12_scratch2, offs, R11_scratch1); -} - -void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { - const Register addr = R11_scratch1, - tmp = R12_scratch2; - // Get index, shift out old bytecode, bring in new bytecode, and store it. - // _index = (_index >> log2_number_of_codes) | - // (bytecode << log2_number_of_codes); - int offs1 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_index, tmp, true); - __ lwz(tmp, offs1, addr); - __ srwi(tmp, tmp, BytecodePairHistogram::log2_number_of_codes); - __ ori(tmp, tmp, ((int) t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); - __ stw(tmp, offs1, addr); - - // Bump bucket contents. - // _counters[_index] ++; - int offs2 = __ load_const_optimized(addr, (address)&BytecodePairHistogram::_counters, R0, true); - __ sldi(tmp, tmp, LogBytesPerInt); - __ add(addr, tmp, addr); - __ lwz(tmp, offs2, addr); - __ addi(tmp, tmp, 1); - __ stw(tmp, offs2, addr); -} - -void TemplateInterpreterGenerator::trace_bytecode(Template* t) { - // Call a little run-time stub to avoid blow-up for each bytecode. - // The run-time runtime saves the right registers, depending on - // the tosca in-state for the given template. - - assert(Interpreter::trace_code(t->tos_in()) != NULL, - "entry must have been generated"); - - // Note: we destroy LR here. - __ bl(Interpreter::trace_code(t->tos_in())); -} - -void TemplateInterpreterGenerator::stop_interpreter_at() { - Label L; - int offs1 = __ load_const_optimized(R11_scratch1, (address) &StopInterpreterAt, R0, true); - int offs2 = __ load_const_optimized(R12_scratch2, (address) &BytecodeCounter::_counter_value, R0, true); - __ ld(R11_scratch1, offs1, R11_scratch1); - __ lwa(R12_scratch2, offs2, R12_scratch2); - __ cmpd(CCR0, R12_scratch2, R11_scratch1); - __ bne(CCR0, L); - __ illtrap(); - __ bind(L); -} - -#endif // !PRODUCT -#endif // !CC_INTERP diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp index 4450dd71897..b9003dd3c4b 100644 --- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2015 SAP AG. All rights reserved. + * Copyright (c) 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,14 +28,17 @@ protected: - // Size of interpreter code. Increase if too small. Interpreter will + // Size of interpreter code. Increase if too small. Interpreter will // fail with a guarantee ("not enough space for interpreter generation"); // if too small. // Run with +PrintInterpreter to get the VM to print out the size. // Max size with JVMTI - const static int InterpreterCodeSize = 230*K; + public: + // Support abs and sqrt like in compiler. + // For others we can use a normal (native) entry. + static bool math_entry_available(AbstractInterpreter::MethodKind kind); #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP diff --git a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp index c5fc75d049f..4e9199fa9d8 100644 --- a/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/interpreter_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,7 +38,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -62,30 +61,6 @@ //---------------------------------------------------------------------------------------------------- - - - -int AbstractInterpreter::BasicType_as_index(BasicType type) { - int i = 0; - switch (type) { - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : i = 4; break; - case T_LONG : i = 5; break; - case T_VOID : i = 6; break; - case T_FLOAT : i = 7; break; - case T_DOUBLE : i = 8; break; - case T_OBJECT : i = 9; break; - case T_ARRAY : i = 9; break; - default : ShouldNotReachHere(); - } - assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); - return i; -} - - #ifndef _LP64 address AbstractInterpreterGenerator::generate_slow_signature_handler() { address entry = __ pc(); @@ -254,28 +229,3 @@ address InterpreterGenerator::generate_abstract_entry(void) { return entry; } - -bool AbstractInterpreter::can_be_compiled(methodHandle m) { - // No special entry points that preclude compilation - return true; -} - -void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { - - // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in - // the days we had adapter frames. When we deoptimize a situation where a - // compiled caller calls a compiled caller will have registers it expects - // to survive the call to the callee. If we deoptimize the callee the only - // way we can restore these registers is to have the oldest interpreter - // frame that we create restore these values. That is what this routine - // will accomplish. - - // At the moment we have modified c2 to not have any callee save registers - // so this problem does not exist and this routine is just a place holder. - - assert(f->is_interpreted_frame(), "must be interpreted"); -} - - -//---------------------------------------------------------------------------------------------------- -// Exceptions diff --git a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp new file mode 100644 index 00000000000..31545256045 --- /dev/null +++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp @@ -0,0 +1,1832 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#include "utilities/macros.hpp" + +#ifndef CC_INTERP +#ifndef FAST_DISPATCH +#define FAST_DISPATCH 1 +#endif +#undef FAST_DISPATCH + + +// Generation of Interpreter +// +// The InterpreterGenerator generates the interpreter into Interpreter::_code. + + +#define __ _masm-> + + +//---------------------------------------------------------------------------------------------------- + + +void InterpreterGenerator::save_native_result(void) { + // result potentially in O0/O1: save it across calls + const Address& l_tmp = InterpreterMacroAssembler::l_tmp; + + // result potentially in F0/F1: save it across calls + const Address& d_tmp = InterpreterMacroAssembler::d_tmp; + + // save and restore any potential method result value around the unlocking operation + __ stf(FloatRegisterImpl::D, F0, d_tmp); +#ifdef _LP64 + __ stx(O0, l_tmp); +#else + __ std(O0, l_tmp); +#endif +} + +void InterpreterGenerator::restore_native_result(void) { + const Address& l_tmp = InterpreterMacroAssembler::l_tmp; + const Address& d_tmp = InterpreterMacroAssembler::d_tmp; + + // Restore any method result value + __ ldf(FloatRegisterImpl::D, d_tmp, F0); +#ifdef _LP64 + __ ldx(l_tmp, O0); +#else + __ ldd(l_tmp, O0); +#endif +} + +address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { + assert(!pass_oop || message == NULL, "either oop or message but not both"); + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // load exception object + __ set((intptr_t)name, G3_scratch); + if (pass_oop) { + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), G3_scratch, Otos_i); + } else { + __ set((intptr_t)message, G4_scratch); + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), G3_scratch, G4_scratch); + } + // throw exception + assert(Interpreter::throw_exception_entry() != NULL, "generate it first"); + AddressLiteral thrower(Interpreter::throw_exception_entry()); + __ jump_to(thrower, G3_scratch); + __ delayed()->nop(); + return entry; +} + +address TemplateInterpreterGenerator::generate_ClassCastException_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception + // happened + __ empty_expression_stack(); + // load exception object + __ call_VM(Oexception, + CAST_FROM_FN_PTR(address, + InterpreterRuntime::throw_ClassCastException), + Otos_i); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + // convention: expect aberrant index in register G3_scratch, then shuffle the + // index to G4_scratch for the VM call + __ mov(G3_scratch, G4_scratch); + __ set((intptr_t)name, G3_scratch); + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), G3_scratch, G4_scratch); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { + address entry = __ pc(); + // expression stack must be empty before entering the VM if an exception happened + __ empty_expression_stack(); + __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + __ should_not_reach_here(); + return entry; +} + + +address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { + address entry = __ pc(); + + if (state == atos) { + __ profile_return_type(O0, G3_scratch, G1_scratch); + } + +#if !defined(_LP64) && defined(COMPILER2) + // All return values are where we want them, except for Longs. C2 returns + // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. + // Since the interpreter will return longs in G1 and O0/O1 in the 32bit + // build even if we are returning from interpreted we just do a little + // stupid shuffing. + // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to + // do this here. Unfortunately if we did a rethrow we'd see an machepilog node + // first which would move g1 -> O0/O1 and destroy the exception we were throwing. + + if (state == ltos) { + __ srl (G1, 0, O1); + __ srlx(G1, 32, O0); + } +#endif // !_LP64 && COMPILER2 + + // The callee returns with the stack possibly adjusted by adapter transition + // We remove that possible adjustment here. + // All interpreter local registers are untouched. Any result is passed back + // in the O0/O1 or float registers. Before continuing, the arguments must be + // popped from the java expression stack; i.e., Lesp must be adjusted. + + __ mov(Llast_SP, SP); // Remove any adapter added stack space. + + const Register cache = G3_scratch; + const Register index = G1_scratch; + __ get_cache_and_index_at_bcp(cache, index, 1, index_size); + + const Register flags = cache; + __ ld_ptr(cache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset(), flags); + const Register parameter_size = flags; + __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, parameter_size); // argument size in words + __ sll(parameter_size, Interpreter::logStackElementSize, parameter_size); // each argument size in bytes + __ add(Lesp, parameter_size, Lesp); // pop arguments + __ dispatch_next(state, step); + + return entry; +} + + +address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { + address entry = __ pc(); + __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache +#if INCLUDE_JVMCI + // Check if we need to take lock at entry of synchronized method. + if (UseJVMCICompiler) { + Label L; + Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset()); + __ ldbool(pending_monitor_enter_addr, Gtemp); // Load if pending monitor enter + __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L); + // Clear flag. + __ stbool(G0, pending_monitor_enter_addr); + // Take lock. + lock_method(); + __ bind(L); + } +#endif + { Label L; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + __ ld_ptr(exception_addr, Gtemp); // Load pending exception. + __ br_null_short(Gtemp, Assembler::pt, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + __ dispatch_next(state, step); + return entry; +} + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. The activation frame unwind code must be +// consistent with that of TemplateTable::_return(...). In the +// case of native methods, the caller's SP was not modified. +address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { + address entry = __ pc(); + Register Itos_i = Otos_i ->after_save(); + Register Itos_l = Otos_l ->after_save(); + Register Itos_l1 = Otos_l1->after_save(); + Register Itos_l2 = Otos_l2->after_save(); + switch (type) { + case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, Itos_i); break; // !0 => true; 0 => false + case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, Itos_i); break; // cannot use and3, 0xFFFF too big as immediate value! + case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, Itos_i); break; + case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, Itos_i); break; + case T_LONG : +#ifndef _LP64 + __ mov(O1, Itos_l2); // move other half of long +#endif // ifdef or no ifdef, fall through to the T_INT case + case T_INT : __ mov(O0, Itos_i); break; + case T_VOID : /* nothing to do */ break; + case T_FLOAT : assert(F0 == Ftos_f, "fix this code" ); break; + case T_DOUBLE : assert(F0 == Ftos_d, "fix this code" ); break; + case T_OBJECT : + __ ld_ptr(FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS, Itos_i); + __ verify_oop(Itos_i); + break; + default : ShouldNotReachHere(); + } + __ ret(); // return from interpreter activation + __ delayed()->restore(I5_savedSP, G0, SP); // remove interpreter frame + NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly + return entry; +} + +address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { + address entry = __ pc(); + __ push(state); + __ call_VM(noreg, runtime_entry); + __ dispatch_via(vtos, Interpreter::normal_table(vtos)); + return entry; +} + + +address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { + address entry = __ pc(); + __ dispatch_next(state); + return entry; +} + +// +// Helpers for commoning out cases in the various type of method entries. +// + +// increment invocation count & check for overflow +// +// Note: checking for negative value instead of overflow +// so we have a 'sticky' overflow test +// +// Lmethod: method +// ??: invocation counter +// +void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { + // Note: In tiered we increment either counters in MethodCounters* or in + // MDO depending if we're profiling or not. + const Register G3_method_counters = G3_scratch; + Label done; + + if (TieredCompilation) { + const int increment = InvocationCounter::count_increment; + Label no_mdo; + if (ProfileInterpreter) { + // If no method data exists, go to profile_continue. + __ ld_ptr(Lmethod, Method::method_data_offset(), G4_scratch); + __ br_null_short(G4_scratch, Assembler::pn, no_mdo); + // Increment counter + Address mdo_invocation_counter(G4_scratch, + in_bytes(MethodData::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + Address mask(G4_scratch, in_bytes(MethodData::invoke_mask_offset())); + __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, + G3_scratch, Lscratch, + Assembler::zero, overflow); + __ ba_short(done); + } + + // Increment counter in MethodCounters* + __ bind(no_mdo); + Address invocation_counter(G3_method_counters, + in_bytes(MethodCounters::invocation_counter_offset()) + + in_bytes(InvocationCounter::counter_offset())); + __ get_method_counters(Lmethod, G3_method_counters, done); + Address mask(G3_method_counters, in_bytes(MethodCounters::invoke_mask_offset())); + __ increment_mask_and_jump(invocation_counter, increment, mask, + G4_scratch, Lscratch, + Assembler::zero, overflow); + __ bind(done); + } else { // not TieredCompilation + // Update standard invocation counters + __ get_method_counters(Lmethod, G3_method_counters, done); + __ increment_invocation_counter(G3_method_counters, O0, G4_scratch); + if (ProfileInterpreter) { + Address interpreter_invocation_counter(G3_method_counters, + in_bytes(MethodCounters::interpreter_invocation_counter_offset())); + __ ld(interpreter_invocation_counter, G4_scratch); + __ inc(G4_scratch); + __ st(G4_scratch, interpreter_invocation_counter); + } + + if (ProfileInterpreter && profile_method != NULL) { + // Test to see if we should create a method data oop + Address profile_limit(G3_method_counters, in_bytes(MethodCounters::interpreter_profile_limit_offset())); + __ ld(profile_limit, G1_scratch); + __ cmp_and_br_short(O0, G1_scratch, Assembler::lessUnsigned, Assembler::pn, *profile_method_continue); + + // if no method data exists, go to profile_method + __ test_method_data_pointer(*profile_method); + } + + Address invocation_limit(G3_method_counters, in_bytes(MethodCounters::interpreter_invocation_limit_offset())); + __ ld(invocation_limit, G3_scratch); + __ cmp(O0, G3_scratch); + __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow); // Far distance + __ delayed()->nop(); + __ bind(done); + } + +} + +// Allocate monitor and lock method (asm interpreter) +// ebx - Method* +// +void TemplateInterpreterGenerator::lock_method() { + __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0); // Load access flags. + +#ifdef ASSERT + { Label ok; + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::notZero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method doesn't need synchronization"); + __ bind(ok); + } +#endif // ASSERT + + // get synchronization object to O0 + { Label done; + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + __ btst(JVM_ACC_STATIC, O0); + __ br( Assembler::zero, true, Assembler::pt, done); + __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case + + __ ld_ptr( Lmethod, in_bytes(Method::const_offset()), O0); + __ ld_ptr( O0, in_bytes(ConstMethod::constants_offset()), O0); + __ ld_ptr( O0, ConstantPool::pool_holder_offset_in_bytes(), O0); + + // lock the mirror, not the Klass* + __ ld_ptr( O0, mirror_offset, O0); + +#ifdef ASSERT + __ tst(O0); + __ breakpoint_trap(Assembler::zero, Assembler::ptr_cc); +#endif // ASSERT + + __ bind(done); + } + + __ add_monitor_to_stack(true, noreg, noreg); // allocate monitor elem + __ st_ptr( O0, Lmonitors, BasicObjectLock::obj_offset_in_bytes()); // store object + // __ untested("lock_object from method entry"); + __ lock_object(Lmonitors, O0); +} + + +void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe_size, + Register Rscratch, + Register Rscratch2) { + const int page_size = os::vm_page_size(); + Label after_frame_check; + + assert_different_registers(Rframe_size, Rscratch, Rscratch2); + + __ set(page_size, Rscratch); + __ cmp_and_br_short(Rframe_size, Rscratch, Assembler::lessEqual, Assembler::pt, after_frame_check); + + // get the stack base, and in debug, verify it is non-zero + __ ld_ptr( G2_thread, Thread::stack_base_offset(), Rscratch ); +#ifdef ASSERT + Label base_not_zero; + __ br_notnull_short(Rscratch, Assembler::pn, base_not_zero); + __ stop("stack base is zero in generate_stack_overflow_check"); + __ bind(base_not_zero); +#endif + + // get the stack size, and in debug, verify it is non-zero + assert( sizeof(size_t) == sizeof(intptr_t), "wrong load size" ); + __ ld_ptr( G2_thread, Thread::stack_size_offset(), Rscratch2 ); +#ifdef ASSERT + Label size_not_zero; + __ br_notnull_short(Rscratch2, Assembler::pn, size_not_zero); + __ stop("stack size is zero in generate_stack_overflow_check"); + __ bind(size_not_zero); +#endif + + // compute the beginning of the protected zone minus the requested frame size + __ sub( Rscratch, Rscratch2, Rscratch ); + __ set( (StackRedPages+StackYellowPages) * page_size, Rscratch2 ); + __ add( Rscratch, Rscratch2, Rscratch ); + + // Add in the size of the frame (which is the same as subtracting it from the + // SP, which would take another register + __ add( Rscratch, Rframe_size, Rscratch ); + + // the frame is greater than one page in size, so check against + // the bottom of the stack + __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check); + + // the stack will overflow, throw an exception + + // Note that SP is restored to sender's sp (in the delay slot). This + // is necessary if the sender's frame is an extended compiled frame + // (see gen_c2i_adapter()) and safer anyway in case of JSR292 + // adaptations. + + // Note also that the restored frame is not necessarily interpreted. + // Use the shared runtime version of the StackOverflowError. + assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); + AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry()); + __ jump_to(stub, Rscratch); + __ delayed()->mov(O5_savedSP, SP); + + // if you get to here, then there is enough stack space + __ bind( after_frame_check ); +} + + +// +// Generate a fixed interpreter frame. This is identical setup for interpreted +// methods and for native methods hence the shared code. + + +//---------------------------------------------------------------------------------------------------- +// Stack frame layout +// +// When control flow reaches any of the entry types for the interpreter +// the following holds -> +// +// C2 Calling Conventions: +// +// The entry code below assumes that the following registers are set +// when coming in: +// G5_method: holds the Method* of the method to call +// Lesp: points to the TOS of the callers expression stack +// after having pushed all the parameters +// +// The entry code does the following to setup an interpreter frame +// pop parameters from the callers stack by adjusting Lesp +// set O0 to Lesp +// compute X = (max_locals - num_parameters) +// bump SP up by X to accomadate the extra locals +// compute X = max_expression_stack +// + vm_local_words +// + 16 words of register save area +// save frame doing a save sp, -X, sp growing towards lower addresses +// set Lbcp, Lmethod, LcpoolCache +// set Llocals to i0 +// set Lmonitors to FP - rounded_vm_local_words +// set Lesp to Lmonitors - 4 +// +// The frame has now been setup to do the rest of the entry code + +// Try this optimization: Most method entries could live in a +// "one size fits all" stack frame without all the dynamic size +// calculations. It might be profitable to do all this calculation +// statically and approximately for "small enough" methods. + +//----------------------------------------------------------------------------------------------- + +// C1 Calling conventions +// +// Upon method entry, the following registers are setup: +// +// g2 G2_thread: current thread +// g5 G5_method: method to activate +// g4 Gargs : pointer to last argument +// +// +// Stack: +// +// +---------------+ <--- sp +// | | +// : reg save area : +// | | +// +---------------+ <--- sp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- sp + 0x5c +// | | +// : free : +// | | +// +---------------+ <--- Gargs +// | | +// : arguments : +// | | +// +---------------+ +// | | +// +// +// +// AFTER FRAME HAS BEEN SETUP for method interpretation the stack looks like: +// +// +---------------+ <--- sp +// | | +// : reg save area : +// | | +// +---------------+ <--- sp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- sp + 0x5c +// | | +// : : +// | | <--- Lesp +// +---------------+ <--- Lmonitors (fp - 0x18) +// | VM locals | +// +---------------+ <--- fp +// | | +// : reg save area : +// | | +// +---------------+ <--- fp + 0x40 +// | | +// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) +// | | +// +---------------+ <--- fp + 0x5c +// | | +// : free : +// | | +// +---------------+ +// | | +// : nonarg locals : +// | | +// +---------------+ +// | | +// : arguments : +// | | <--- Llocals +// +---------------+ <--- Gargs +// | | + +void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { + // + // + // The entry code sets up a new interpreter frame in 4 steps: + // + // 1) Increase caller's SP by for the extra local space needed: + // (check for overflow) + // Efficient implementation of xload/xstore bytecodes requires + // that arguments and non-argument locals are in a contigously + // addressable memory block => non-argument locals must be + // allocated in the caller's frame. + // + // 2) Create a new stack frame and register window: + // The new stack frame must provide space for the standard + // register save area, the maximum java expression stack size, + // the monitor slots (0 slots initially), and some frame local + // scratch locations. + // + // 3) The following interpreter activation registers must be setup: + // Lesp : expression stack pointer + // Lbcp : bytecode pointer + // Lmethod : method + // Llocals : locals pointer + // Lmonitors : monitor pointer + // LcpoolCache: constant pool cache + // + // 4) Initialize the non-argument locals if necessary: + // Non-argument locals may need to be initialized to NULL + // for GC to work. If the oop-map information is accurate + // (in the absence of the JSR problem), no initialization + // is necessary. + // + // (gri - 2/25/2000) + + + int rounded_vm_local_words = round_to( frame::interpreter_frame_vm_local_words, WordsPerLong ); + + const int extra_space = + rounded_vm_local_words + // frame local scratch space + Method::extra_stack_entries() + // extra stack for jsr 292 + frame::memory_parameter_word_sp_offset + // register save area + (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0); + + const Register Glocals_size = G3; + const Register RconstMethod = Glocals_size; + const Register Otmp1 = O3; + const Register Otmp2 = O4; + // Lscratch can't be used as a temporary because the call_stub uses + // it to assert that the stack frame was setup correctly. + const Address constMethod (G5_method, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + + __ ld_ptr( constMethod, RconstMethod ); + __ lduh( size_of_parameters, Glocals_size); + + // Gargs points to first local + BytesPerWord + // Set the saved SP after the register window save + // + assert_different_registers(Gargs, Glocals_size, Gframe_size, O5_savedSP); + __ sll(Glocals_size, Interpreter::logStackElementSize, Otmp1); + __ add(Gargs, Otmp1, Gargs); + + if (native_call) { + __ calc_mem_param_words( Glocals_size, Gframe_size ); + __ add( Gframe_size, extra_space, Gframe_size); + __ round_to( Gframe_size, WordsPerLong ); + __ sll( Gframe_size, LogBytesPerWord, Gframe_size ); + } else { + + // + // Compute number of locals in method apart from incoming parameters + // + const Address size_of_locals (Otmp1, ConstMethod::size_of_locals_offset()); + __ ld_ptr( constMethod, Otmp1 ); + __ lduh( size_of_locals, Otmp1 ); + __ sub( Otmp1, Glocals_size, Glocals_size ); + __ round_to( Glocals_size, WordsPerLong ); + __ sll( Glocals_size, Interpreter::logStackElementSize, Glocals_size ); + + // see if the frame is greater than one page in size. If so, + // then we need to verify there is enough stack space remaining + // Frame_size = (max_stack + extra_space) * BytesPerWord; + __ ld_ptr( constMethod, Gframe_size ); + __ lduh( Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size ); + __ add( Gframe_size, extra_space, Gframe_size ); + __ round_to( Gframe_size, WordsPerLong ); + __ sll( Gframe_size, Interpreter::logStackElementSize, Gframe_size); + + // Add in java locals size for stack overflow check only + __ add( Gframe_size, Glocals_size, Gframe_size ); + + const Register Otmp2 = O4; + assert_different_registers(Otmp1, Otmp2, O5_savedSP); + generate_stack_overflow_check(Gframe_size, Otmp1, Otmp2); + + __ sub( Gframe_size, Glocals_size, Gframe_size); + + // + // bump SP to accomodate the extra locals + // + __ sub( SP, Glocals_size, SP ); + } + + // + // now set up a stack frame with the size computed above + // + __ neg( Gframe_size ); + __ save( SP, Gframe_size, SP ); + + // + // now set up all the local cache registers + // + // NOTE: At this point, Lbyte_code/Lscratch has been modified. Note + // that all present references to Lbyte_code initialize the register + // immediately before use + if (native_call) { + __ mov(G0, Lbcp); + } else { + __ ld_ptr(G5_method, Method::const_offset(), Lbcp); + __ add(Lbcp, in_bytes(ConstMethod::codes_offset()), Lbcp); + } + __ mov( G5_method, Lmethod); // set Lmethod + __ get_constant_pool_cache( LcpoolCache ); // set LcpoolCache + __ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors +#ifdef _LP64 + __ add( Lmonitors, STACK_BIAS, Lmonitors ); // Account for 64 bit stack bias +#endif + __ sub(Lmonitors, BytesPerWord, Lesp); // set Lesp + + // setup interpreter activation registers + __ sub(Gargs, BytesPerWord, Llocals); // set Llocals + + if (ProfileInterpreter) { +#ifdef FAST_DISPATCH + // FAST_DISPATCH and ProfileInterpreter are mutually exclusive since + // they both use I2. + assert(0, "FAST_DISPATCH and +ProfileInterpreter are mutually exclusive"); +#endif // FAST_DISPATCH + __ set_method_data_pointer(); + } + +} + +// Method entry for java.lang.ref.Reference.get. +address InterpreterGenerator::generate_Reference_get_entry(void) { +#if INCLUDE_ALL_GCS + // Code: _aload_0, _getfield, _areturn + // parameter size = 1 + // + // The code that gets generated by this routine is split into 2 parts: + // 1. The "intrinsified" code for G1 (or any SATB based GC), + // 2. The slow path - which is an expansion of the regular method entry. + // + // Notes:- + // * In the G1 code we do not check whether we need to block for + // a safepoint. If G1 is enabled then we must execute the specialized + // code for Reference.get (except when the Reference object is null) + // so that we can log the value in the referent field with an SATB + // update buffer. + // If the code for the getfield template is modified so that the + // G1 pre-barrier code is executed when the current method is + // Reference.get() then going through the normal method entry + // will be fine. + // * The G1 code can, however, check the receiver object (the instance + // of java.lang.Reference) and jump to the slow path if null. If the + // Reference object is null then we obviously cannot fetch the referent + // and so we don't need to call the G1 pre-barrier. Thus we can use the + // regular method entry code to generate the NPE. + // + // This code is based on generate_accessor_enty. + + address entry = __ pc(); + + const int referent_offset = java_lang_ref_Reference::referent_offset; + guarantee(referent_offset > 0, "referent offset not initialized"); + + if (UseG1GC) { + Label slow_path; + + // In the G1 code we don't check if we need to reach a safepoint. We + // continue and the thread will safepoint at the next bytecode dispatch. + + // Check if local 0 != NULL + // If the receiver is null then it is OK to jump to the slow path. + __ ld_ptr(Gargs, G0, Otos_i ); // get local 0 + // check if local 0 == NULL and go the slow path + __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path); + + + // Load the value of the referent field. + if (Assembler::is_simm13(referent_offset)) { + __ load_heap_oop(Otos_i, referent_offset, Otos_i); + } else { + __ set(referent_offset, G3_scratch); + __ load_heap_oop(Otos_i, G3_scratch, Otos_i); + } + + // Generate the G1 pre-barrier code to log the value of + // the referent field in an SATB buffer. Note with + // these parameters the pre-barrier does not generate + // the load of the previous value + + __ g1_write_barrier_pre(noreg /* obj */, noreg /* index */, 0 /* offset */, + Otos_i /* pre_val */, + G3_scratch /* tmp */, + true /* preserve_o_regs */); + + // _areturn + __ retl(); // return from leaf routine + __ delayed()->mov(O5_savedSP, SP); + + // Generate regular method entry + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); + return entry; + } +#endif // INCLUDE_ALL_GCS + + // If G1 is not enabled then attempt to go through the accessor entry point + // Reference.get is an accessor + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); + __ set(SafepointSynchronize::_not_synchronized, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); + + // Load parameters + const Register crc = O0; // initial crc + const Register val = O1; // byte to update with + const Register table = O2; // address of 256-entry lookup table + + __ ldub(Gargs, 3, val); + __ lduw(Gargs, 8, crc); + + __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); + + __ not1(crc); // ~crc + __ clruwu(crc); + __ update_byte_crc32(crc, val, table); + __ not1(crc); // ~crc + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + Label L_slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); + __ set(SafepointSynchronize::_not_synchronized, O3); + __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); + + // Load parameters from the stack + const Register crc = O0; // initial crc + const Register buf = O1; // source java byte array address + const Register len = O2; // len + const Register offset = O3; // offset + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 32, crc); + __ add(buf, offset, buf); + } else { + __ lduw(Gargs, 0, len); + __ lduw(Gargs, 8, offset); + __ ldx( Gargs, 16, buf); + __ lduw(Gargs, 24, crc); + __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size + __ add(buf ,offset, buf); + } + + // Call the crc32 kernel + __ MacroAssembler::save_thread(L7_thread_cache); + __ kernel_crc32(crc, buf, len, O3); + __ MacroAssembler::restore_thread(L7_thread_cache); + + // result in O0 + __ retl(); + __ delayed()->nop(); + + // generate a vanilla native entry as the slow path + __ bind(L_slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +// +// Interpreter stub for calling a native method. (asm interpreter) +// This sets up a somewhat different looking stack for calling the native method +// than the typical interpreter frame setup. +// + +address InterpreterGenerator::generate_native_entry(bool synchronized) { + address entry = __ pc(); + + // the following temporary registers are used during frame creation + const Register Gtmp1 = G3_scratch ; + const Register Gtmp2 = G1_scratch; + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // make sure registers are different! + assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); + + const Address Laccess_flags(Lmethod, Method::access_flags_offset()); + + const Register Glocals_size = G3; + assert_different_registers(Glocals_size, G4_scratch, Gframe_size); + + // make sure method is native & not abstract + // rethink these assertions - they can be simplified and shared (gri 2/25/2000) +#ifdef ASSERT + __ ld(G5_method, Method::access_flags_offset(), Gtmp1); + { + Label L; + __ btst(JVM_ACC_NATIVE, Gtmp1); + __ br(Assembler::notZero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute non-native method as native"); + __ bind(L); + } + { Label L; + __ btst(JVM_ACC_ABSTRACT, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + + // generate the code to allocate the interpreter stack frame + generate_fixed_frame(true); + + // + // No locals to initialize for native method + // + + // this slot will be set later, we initialize it to null here just in + // case we get a GC before the actual value is stored later + __ st_ptr(G0, FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS); + + const Address do_not_unlock_if_synchronized(G2_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + // This flag has two effects, one is to force an unwind in the topmost + // interpreter frame and not perform an unlock while doing so. + + __ movbool(true, G3_scratch); + __ stbool(G3_scratch, do_not_unlock_if_synchronized); + + // increment invocation counter and check for overflow + // + // Note: checking for negative value instead of overflow + // so we have a 'sticky' overflow test (may be of + // importance as soon as we have true MT/MP) + Label invocation_counter_overflow; + Label Lcontinue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, NULL, NULL); + + } + __ bind(Lcontinue); + + bang_stack_shadow_pages(true); + + // reset the _do_not_unlock_if_synchronized flag + __ stbool(G0, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + + if (synchronized) { + lock_method(); + } else { +#ifdef ASSERT + { Label ok; + __ ld(Laccess_flags, O0); + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::zero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(ok); + } +#endif // ASSERT + } + + + // start execution + __ verify_thread(); + + // JVMTI support + __ notify_method_entry(); + + // native call + + // (note that O0 is never an oop--at most it is a handle) + // It is important not to smash any handles created by this call, + // until any oop handle in O0 is dereferenced. + + // (note that the space for outgoing params is preallocated) + + // get signature handler + { Label L; + Address signature_handler(Lmethod, Method::signature_handler_offset()); + __ ld_ptr(signature_handler, G3_scratch); + __ br_notnull_short(G3_scratch, Assembler::pt, L); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), Lmethod); + __ ld_ptr(signature_handler, G3_scratch); + __ bind(L); + } + + // Push a new frame so that the args will really be stored in + // Copy a few locals across so the new frame has the variables + // we need but these values will be dead at the jni call and + // therefore not gc volatile like the values in the current + // frame (Lmethod in particular) + + // Flush the method pointer to the register save area + __ st_ptr(Lmethod, SP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS); + __ mov(Llocals, O1); + + // calculate where the mirror handle body is allocated in the interpreter frame: + __ add(FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS, O2); + + // Calculate current frame size + __ sub(SP, FP, O3); // Calculate negative of current frame size + __ save(SP, O3, SP); // Allocate an identical sized frame + + // Note I7 has leftover trash. Slow signature handler will fill it in + // should we get there. Normal jni call will set reasonable last_Java_pc + // below (and fix I7 so the stack trace doesn't have a meaningless frame + // in it). + + // Load interpreter frame's Lmethod into same register here + + __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); + + __ mov(I1, Llocals); + __ mov(I2, Lscratch2); // save the address of the mirror + + + // ONLY Lmethod and Llocals are valid here! + + // call signature handler, It will move the arg properly since Llocals in current frame + // matches that in outer frame + + __ callr(G3_scratch, 0); + __ delayed()->nop(); + + // Result handler is in Lscratch + + // Reload interpreter frame's Lmethod since slow signature handler may block + __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); + + { Label not_static; + + __ ld(Laccess_flags, O0); + __ btst(JVM_ACC_STATIC, O0); + __ br( Assembler::zero, false, Assembler::pt, not_static); + // get native function entry point(O0 is a good temp until the very end) + __ delayed()->ld_ptr(Lmethod, in_bytes(Method::native_function_offset()), O0); + // for static methods insert the mirror argument + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + + __ ld_ptr(Lmethod, Method:: const_offset(), O1); + __ ld_ptr(O1, ConstMethod::constants_offset(), O1); + __ ld_ptr(O1, ConstantPool::pool_holder_offset_in_bytes(), O1); + __ ld_ptr(O1, mirror_offset, O1); +#ifdef ASSERT + if (!PrintSignatureHandlers) // do not dirty the output with this + { Label L; + __ br_notnull_short(O1, Assembler::pt, L); + __ stop("mirror is missing"); + __ bind(L); + } +#endif // ASSERT + __ st_ptr(O1, Lscratch2, 0); + __ mov(Lscratch2, O1); + __ bind(not_static); + } + + // At this point, arguments have been copied off of stack into + // their JNI positions, which are O1..O5 and SP[68..]. + // Oops are boxed in-place on the stack, with handles copied to arguments. + // The result handler is in Lscratch. O0 will shortly hold the JNIEnv*. + +#ifdef ASSERT + { Label L; + __ br_notnull_short(O0, Assembler::pt, L); + __ stop("native entry point is missing"); + __ bind(L); + } +#endif // ASSERT + + // + // setup the frame anchor + // + // The scavenge function only needs to know that the PC of this frame is + // in the interpreter method entry code, it doesn't need to know the exact + // PC and hence we can use O7 which points to the return address from the + // previous call in the code stream (signature handler function) + // + // The other trick is we set last_Java_sp to FP instead of the usual SP because + // we have pushed the extra frame in order to protect the volatile register(s) + // in that frame when we return from the jni call + // + + __ set_last_Java_frame(FP, O7); + __ mov(O7, I7); // make dummy interpreter frame look like one above, + // not meaningless information that'll confuse me. + + // flush the windows now. We don't care about the current (protection) frame + // only the outer frames + + __ flushw(); + + // mark windows as flushed + Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); + __ set(JavaFrameAnchor::flushed, G3_scratch); + __ st(G3_scratch, flags); + + // Transition from _thread_in_Java to _thread_in_native. We are already safepoint ready. + + Address thread_state(G2_thread, JavaThread::thread_state_offset()); +#ifdef ASSERT + { Label L; + __ ld(thread_state, G3_scratch); + __ cmp_and_br_short(G3_scratch, _thread_in_Java, Assembler::equal, Assembler::pt, L); + __ stop("Wrong thread state in native stub"); + __ bind(L); + } +#endif // ASSERT + __ set(_thread_in_native, G3_scratch); + __ st(G3_scratch, thread_state); + + // Call the jni method, using the delay slot to set the JNIEnv* argument. + __ save_thread(L7_thread_cache); // save Gthread + __ callr(O0, 0); + __ delayed()-> + add(L7_thread_cache, in_bytes(JavaThread::jni_environment_offset()), O0); + + // Back from jni method Lmethod in this frame is DEAD, DEAD, DEAD + + __ restore_thread(L7_thread_cache); // restore G2_thread + __ reinit_heapbase(); + + // must we block? + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after blocking. + { Label no_block; + AddressLiteral sync_state(SafepointSynchronize::address_of_state()); + + // Switch thread to "native transition" state before reading the synchronization state. + // This additional state is necessary because reading and testing the synchronization + // state is not atomic w.r.t. GC, as this scenario demonstrates: + // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. + // VM thread changes sync state to synchronizing and suspends threads for GC. + // Thread A is resumed to finish this native method, but doesn't block here since it + // didn't see any synchronization is progress, and escapes. + __ set(_thread_in_native_trans, G3_scratch); + __ st(G3_scratch, thread_state); + if(os::is_MP()) { + if (UseMembar) { + // Force this write out before the read below + __ membar(Assembler::StoreLoad); + } else { + // Write serialization page so VM thread can do a pseudo remote membar. + // We use the current thread pointer to calculate a thread specific + // offset to write to within the page. This minimizes bus traffic + // due to cache line collision. + __ serialize_memory(G2_thread, G1_scratch, G3_scratch); + } + } + __ load_contents(sync_state, G3_scratch); + __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); + + Label L; + __ br(Assembler::notEqual, false, Assembler::pn, L); + __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch); + __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); + __ bind(L); + + // Block. Save any potential method result value before the operation and + // use a leaf call to leave the last_Java_frame setup undisturbed. + save_native_result(); + __ call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), + G2_thread); + + // Restore any method result value + restore_native_result(); + __ bind(no_block); + } + + // Clear the frame anchor now + + __ reset_last_Java_frame(); + + // Move the result handler address + __ mov(Lscratch, G3_scratch); + // return possible result to the outer frame +#ifndef __LP64 + __ mov(O0, I0); + __ restore(O1, G0, O1); +#else + __ restore(O0, G0, O0); +#endif /* __LP64 */ + + // Move result handler to expected register + __ mov(G3_scratch, Lscratch); + + // Back in normal (native) interpreter frame. State is thread_in_native_trans + // switch to thread_in_Java. + + __ set(_thread_in_Java, G3_scratch); + __ st(G3_scratch, thread_state); + + // reset handle block + __ ld_ptr(G2_thread, JavaThread::active_handles_offset(), G3_scratch); + __ st(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes()); + + // If we have an oop result store it where it will be safe for any further gc + // until we return now that we've released the handle it might be protected by + + { + Label no_oop, store_result; + + __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch); + __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop); + __ addcc(G0, O0, O0); + __ brx(Assembler::notZero, true, Assembler::pt, store_result); // if result is not NULL: + __ delayed()->ld_ptr(O0, 0, O0); // unbox it + __ mov(G0, O0); + + __ bind(store_result); + // Store it where gc will look for it and result handler expects it. + __ st_ptr(O0, FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS); + + __ bind(no_oop); + + } + + + // handle exceptions (exception handling will handle unlocking!) + { Label L; + Address exception_addr(G2_thread, Thread::pending_exception_offset()); + __ ld_ptr(exception_addr, Gtemp); + __ br_null_short(Gtemp, Assembler::pt, L); + // Note: This could be handled more efficiently since we know that the native + // method doesn't have an exception handler. We could directly return + // to the exception handler for the caller. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); + __ should_not_reach_here(); + __ bind(L); + } + + // JVMTI support (preserves thread register) + __ notify_method_exit(true, ilgl, InterpreterMacroAssembler::NotifyJVMTI); + + if (synchronized) { + // save and restore any potential method result value around the unlocking operation + save_native_result(); + + __ add( __ top_most_monitor(), O1); + __ unlock_object(O1); + + restore_native_result(); + } + +#if defined(COMPILER2) && !defined(_LP64) + + // C2 expects long results in G1 we can't tell if we're returning to interpreted + // or compiled so just be safe. + + __ sllx(O0, 32, G1); // Shift bits into high G1 + __ srl (O1, 0, O1); // Zero extend O1 + __ or3 (O1, G1, G1); // OR 64 bits into G1 + +#endif /* COMPILER2 && !_LP64 */ + + // dispose of return address and remove activation +#ifdef ASSERT + { + Label ok; + __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok); + __ stop("bad I5_savedSP value"); + __ should_not_reach_here(); + __ bind(ok); + } +#endif + if (TraceJumps) { + // Move target to register that is recordable + __ mov(Lscratch, G3_scratch); + __ JMP(G3_scratch, 0); + } else { + __ jmp(Lscratch, 0); + } + __ delayed()->nop(); + + + if (inc_counter) { + // handle invocation counter overflow + __ bind(invocation_counter_overflow); + generate_counter_overflow(Lcontinue); + } + + + + return entry; +} + + +// Generic method entry to (asm) interpreter +address InterpreterGenerator::generate_normal_entry(bool synchronized) { + address entry = __ pc(); + + bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; + + // the following temporary registers are used during frame creation + const Register Gtmp1 = G3_scratch ; + const Register Gtmp2 = G1_scratch; + + // make sure registers are different! + assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); + + const Address constMethod (G5_method, Method::const_offset()); + // Seems like G5_method is live at the point this is used. So we could make this look consistent + // and use in the asserts. + const Address access_flags (Lmethod, Method::access_flags_offset()); + + const Register Glocals_size = G3; + assert_different_registers(Glocals_size, G4_scratch, Gframe_size); + + // make sure method is not native & not abstract + // rethink these assertions - they can be simplified and shared (gri 2/25/2000) +#ifdef ASSERT + __ ld(G5_method, Method::access_flags_offset(), Gtmp1); + { + Label L; + __ btst(JVM_ACC_NATIVE, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute native method as non-native"); + __ bind(L); + } + { Label L; + __ btst(JVM_ACC_ABSTRACT, Gtmp1); + __ br(Assembler::zero, false, Assembler::pt, L); + __ delayed()->nop(); + __ stop("tried to execute abstract method as non-abstract"); + __ bind(L); + } +#endif // ASSERT + + // generate the code to allocate the interpreter stack frame + + generate_fixed_frame(false); + +#ifdef FAST_DISPATCH + __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables); + // set bytecode dispatch table base +#endif + + // + // Code to initialize the extra (i.e. non-parm) locals + // + Register init_value = noreg; // will be G0 if we must clear locals + // The way the code was setup before zerolocals was always true for vanilla java entries. + // It could only be false for the specialized entries like accessor or empty which have + // no extra locals so the testing was a waste of time and the extra locals were always + // initialized. We removed this extra complication to already over complicated code. + + init_value = G0; + Label clear_loop; + + const Register RconstMethod = O1; + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + const Address size_of_locals (RconstMethod, ConstMethod::size_of_locals_offset()); + + // NOTE: If you change the frame layout, this code will need to + // be updated! + __ ld_ptr( constMethod, RconstMethod ); + __ lduh( size_of_locals, O2 ); + __ lduh( size_of_parameters, O1 ); + __ sll( O2, Interpreter::logStackElementSize, O2); + __ sll( O1, Interpreter::logStackElementSize, O1 ); + __ sub( Llocals, O2, O2 ); + __ sub( Llocals, O1, O1 ); + + __ bind( clear_loop ); + __ inc( O2, wordSize ); + + __ cmp( O2, O1 ); + __ brx( Assembler::lessEqualUnsigned, true, Assembler::pt, clear_loop ); + __ delayed()->st_ptr( init_value, O2, 0 ); + + const Address do_not_unlock_if_synchronized(G2_thread, + JavaThread::do_not_unlock_if_synchronized_offset()); + // Since at this point in the method invocation the exception handler + // would try to exit the monitor of synchronized methods which hasn't + // been entered yet, we set the thread local variable + // _do_not_unlock_if_synchronized to true. If any exception was thrown by + // runtime, exception handling i.e. unlock_if_synchronized_method will + // check this thread local flag. + __ movbool(true, G3_scratch); + __ stbool(G3_scratch, do_not_unlock_if_synchronized); + + __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch); + // increment invocation counter and check for overflow + // + // Note: checking for negative value instead of overflow + // so we have a 'sticky' overflow test (may be of + // importance as soon as we have true MT/MP) + Label invocation_counter_overflow; + Label profile_method; + Label profile_method_continue; + Label Lcontinue; + if (inc_counter) { + generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); + if (ProfileInterpreter) { + __ bind(profile_method_continue); + } + } + __ bind(Lcontinue); + + bang_stack_shadow_pages(false); + + // reset the _do_not_unlock_if_synchronized flag + __ stbool(G0, do_not_unlock_if_synchronized); + + // check for synchronized methods + // Must happen AFTER invocation_counter check and stack overflow check, + // so method is not locked if overflows. + + if (synchronized) { + lock_method(); + } else { +#ifdef ASSERT + { Label ok; + __ ld(access_flags, O0); + __ btst(JVM_ACC_SYNCHRONIZED, O0); + __ br( Assembler::zero, false, Assembler::pt, ok); + __ delayed()->nop(); + __ stop("method needs synchronization"); + __ bind(ok); + } +#endif // ASSERT + } + + // start execution + + __ verify_thread(); + + // jvmti support + __ notify_method_entry(); + + // start executing instructions + __ dispatch_next(vtos); + + + if (inc_counter) { + if (ProfileInterpreter) { + // We have decided to profile this method in the interpreter + __ bind(profile_method); + + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); + __ set_method_data_pointer_for_bcp(); + __ ba_short(profile_method_continue); + } + + // handle invocation counter overflow + __ bind(invocation_counter_overflow); + generate_counter_overflow(Lcontinue); + } + + + return entry; +} + +//---------------------------------------------------------------------------------------------------- +// Exceptions +void TemplateInterpreterGenerator::generate_throw_exception() { + + // Entry point in previous activation (i.e., if the caller was interpreted) + Interpreter::_rethrow_exception_entry = __ pc(); + // O0: exception + + // entry point for exceptions thrown within interpreter code + Interpreter::_throw_exception_entry = __ pc(); + __ verify_thread(); + // expression stack is undefined here + // O0: exception, i.e. Oexception + // Lbcp: exception bcp + __ verify_oop(Oexception); + + + // expression stack must be empty before entering the VM in case of an exception + __ empty_expression_stack(); + // find exception handler address and preserve exception oop + // call C routine to find handler and jump to it + __ call_VM(O1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Oexception); + __ push_ptr(O1); // push exception for exception handler bytecodes + + __ JMP(O0, 0); // jump to exception handler (may be remove activation entry!) + __ delayed()->nop(); + + + // if the exception is not handled in the current frame + // the frame is removed and the exception is rethrown + // (i.e. exception continuation is _rethrow_exception) + // + // Note: At this point the bci is still the bxi for the instruction which caused + // the exception and the expression stack is empty. Thus, for any VM calls + // at this point, GC will find a legal oop map (with empty expression stack). + + // in current activation + // tos: exception + // Lbcp: exception bcp + + // + // JVMTI PopFrame support + // + + Interpreter::_remove_activation_preserving_args_entry = __ pc(); + Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); + // Set the popframe_processing bit in popframe_condition indicating that we are + // currently handling popframe, so that call_VMs that may happen later do not trigger new + // popframe handling cycles. + + __ ld(popframe_condition_addr, G3_scratch); + __ or3(G3_scratch, JavaThread::popframe_processing_bit, G3_scratch); + __ stw(G3_scratch, popframe_condition_addr); + + // Empty the expression stack, as in normal exception handling + __ empty_expression_stack(); + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false); + + { + // Check to see whether we are returning to a deoptimized frame. + // (The PopFrame call ensures that the caller of the popped frame is + // either interpreted or compiled and deoptimizes it if compiled.) + // In this case, we can't call dispatch_next() after the frame is + // popped, but instead must save the incoming arguments and restore + // them after deoptimization has occurred. + // + // Note that we don't compare the return PC against the + // deoptimization blob's unpack entry because of the presence of + // adapter frames in C2. + Label caller_not_deoptimized; + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), I7); + __ br_notnull_short(O0, Assembler::pt, caller_not_deoptimized); + + const Register Gtmp1 = G3_scratch; + const Register Gtmp2 = G1_scratch; + const Register RconstMethod = Gtmp1; + const Address constMethod(Lmethod, Method::const_offset()); + const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); + + // Compute size of arguments for saving when returning to deoptimized caller + __ ld_ptr(constMethod, RconstMethod); + __ lduh(size_of_parameters, Gtmp1); + __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1); + __ sub(Llocals, Gtmp1, Gtmp2); + __ add(Gtmp2, wordSize, Gtmp2); + // Save these arguments + __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), G2_thread, Gtmp1, Gtmp2); + // Inform deoptimization that it is responsible for restoring these arguments + __ set(JavaThread::popframe_force_deopt_reexecution_bit, Gtmp1); + Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); + __ st(Gtmp1, popframe_condition_addr); + + // Return from the current method + // The caller's SP was adjusted upon method entry to accomodate + // the callee's non-argument locals. Undo that adjustment. + __ ret(); + __ delayed()->restore(I5_savedSP, G0, SP); + + __ bind(caller_not_deoptimized); + } + + // Clear the popframe condition flag + __ stw(G0 /* popframe_inactive */, popframe_condition_addr); + + // Get out of the current method (how this is done depends on the particular compiler calling + // convention that the interpreter currently follows) + // The caller's SP was adjusted upon method entry to accomodate + // the callee's non-argument locals. Undo that adjustment. + __ restore(I5_savedSP, G0, SP); + // The method data pointer was incremented already during + // call profiling. We have to restore the mdp for the current bcp. + if (ProfileInterpreter) { + __ set_method_data_pointer_for_bcp(); + } + +#if INCLUDE_JVMTI + { + Label L_done; + + __ ldub(Address(Lbcp, 0), G1_scratch); // Load current bytecode + __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done); + + // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. + // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. + + __ call_VM(G1_scratch, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), I0, Lmethod, Lbcp); + + __ br_null(G1_scratch, false, Assembler::pn, L_done); + __ delayed()->nop(); + + __ st_ptr(G1_scratch, Lesp, wordSize); + __ bind(L_done); + } +#endif // INCLUDE_JVMTI + + // Resume bytecode interpretation at the current bcp + __ dispatch_next(vtos); + // end of JVMTI PopFrame support + + Interpreter::_remove_activation_entry = __ pc(); + + // preserve exception over this code sequence (remove activation calls the vm, but oopmaps are not correct here) + __ pop_ptr(Oexception); // get exception + + // Intel has the following comment: + //// remove the activation (without doing throws on illegalMonitorExceptions) + // They remove the activation without checking for bad monitor state. + // %%% We should make sure this is the right semantics before implementing. + + __ set_vm_result(Oexception); + __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false); + + __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI); + + __ get_vm_result(Oexception); + __ verify_oop(Oexception); + + const int return_reg_adjustment = frame::pc_return_offset; + Address issuing_pc_addr(I7, return_reg_adjustment); + + // We are done with this activation frame; find out where to go next. + // The continuation point will be an exception handler, which expects + // the following registers set up: + // + // Oexception: exception + // Oissuing_pc: the local call that threw exception + // Other On: garbage + // In/Ln: the contents of the caller's register window + // + // We do the required restore at the last possible moment, because we + // need to preserve some state across a runtime call. + // (Remember that the caller activation is unknown--it might not be + // interpreted, so things like Lscratch are useless in the caller.) + + // Although the Intel version uses call_C, we can use the more + // compact call_VM. (The only real difference on SPARC is a + // harmlessly ignored [re]set_last_Java_frame, compared with + // the Intel code which lacks this.) + __ mov(Oexception, Oexception ->after_save()); // get exception in I0 so it will be on O0 after restore + __ add(issuing_pc_addr, Oissuing_pc->after_save()); // likewise set I1 to a value local to the caller + __ super_call_VM_leaf(L7_thread_cache, + CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + G2_thread, Oissuing_pc->after_save()); + + // The caller's SP was adjusted upon method entry to accomodate + // the callee's non-argument locals. Undo that adjustment. + __ JMP(O0, 0); // return exception handler in caller + __ delayed()->restore(I5_savedSP, G0, SP); + + // (same old exception object is already in Oexception; see above) + // Note that an "issuing PC" is actually the next PC after the call +} + + +// +// JVMTI ForceEarlyReturn support +// + +address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { + address entry = __ pc(); + + __ empty_expression_stack(); + __ load_earlyret_value(state); + + __ ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), G3_scratch); + Address cond_addr(G3_scratch, JvmtiThreadState::earlyret_state_offset()); + + // Clear the earlyret state + __ stw(G0 /* JvmtiThreadState::earlyret_inactive */, cond_addr); + + __ remove_activation(state, + /* throw_monitor_exception */ false, + /* install_monitor_exception */ false); + + // The caller's SP was adjusted upon method entry to accomodate + // the callee's non-argument locals. Undo that adjustment. + __ ret(); // return to caller + __ delayed()->restore(I5_savedSP, G0, SP); + + return entry; +} // end of JVMTI ForceEarlyReturn support + + +//------------------------------------------------------------------------------------------------------------------------ +// Helper for vtos entry point generation + +void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) { + assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); + Label L; + aep = __ pc(); __ push_ptr(); __ ba_short(L); + fep = __ pc(); __ push_f(); __ ba_short(L); + dep = __ pc(); __ push_d(); __ ba_short(L); + lep = __ pc(); __ push_l(); __ ba_short(L); + iep = __ pc(); __ push_i(); + bep = cep = sep = iep; // there aren't any + vep = __ pc(); __ bind(L); // fall through + generate_and_dispatch(t); +} + +// -------------------------------------------------------------------------------- + + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : TemplateInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +// -------------------------------------------------------------------------------- + +// Non-product code +#ifndef PRODUCT +address TemplateInterpreterGenerator::generate_trace_code(TosState state) { + address entry = __ pc(); + + __ push(state); + __ mov(O7, Lscratch); // protect return address within interpreter + + // Pass a 0 (not used in sparc) and the top of stack to the bytecode tracer + __ mov( Otos_l2, G3_scratch ); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), G0, Otos_l1, G3_scratch); + __ mov(Lscratch, O7); // restore return address + __ pop(state); + __ retl(); + __ delayed()->nop(); + + return entry; +} + + +// helpers for generate_and_dispatch + +void TemplateInterpreterGenerator::count_bytecode() { + __ inc_counter(&BytecodeCounter::_counter_value, G3_scratch, G4_scratch); +} + + +void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { + __ inc_counter(&BytecodeHistogram::_counters[t->bytecode()], G3_scratch, G4_scratch); +} + + +void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { + AddressLiteral index (&BytecodePairHistogram::_index); + AddressLiteral counters((address) &BytecodePairHistogram::_counters); + + // get index, shift out old bytecode, bring in new bytecode, and store it + // _index = (_index >> log2_number_of_codes) | + // (bytecode << log2_number_of_codes); + + __ load_contents(index, G4_scratch); + __ srl( G4_scratch, BytecodePairHistogram::log2_number_of_codes, G4_scratch ); + __ set( ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes, G3_scratch ); + __ or3( G3_scratch, G4_scratch, G4_scratch ); + __ store_contents(G4_scratch, index, G3_scratch); + + // bump bucket contents + // _counters[_index] ++; + + __ set(counters, G3_scratch); // loads into G3_scratch + __ sll( G4_scratch, LogBytesPerWord, G4_scratch ); // Index is word address + __ add (G3_scratch, G4_scratch, G3_scratch); // Add in index + __ ld (G3_scratch, 0, G4_scratch); + __ inc (G4_scratch); + __ st (G4_scratch, 0, G3_scratch); +} + + +void TemplateInterpreterGenerator::trace_bytecode(Template* t) { + // Call a little run-time stub to avoid blow-up for each bytecode. + // The run-time runtime saves the right registers, depending on + // the tosca in-state for the given template. + address entry = Interpreter::trace_code(t->tos_in()); + guarantee(entry != NULL, "entry must have been generated"); + __ call(entry, relocInfo::none); + __ delayed()->nop(); +} + + +void TemplateInterpreterGenerator::stop_interpreter_at() { + AddressLiteral counter(&BytecodeCounter::_counter_value); + __ load_contents(counter, G3_scratch); + AddressLiteral stop_at(&StopInterpreterAt); + __ load_ptr_contents(stop_at, G4_scratch); + __ cmp(G3_scratch, G4_scratch); + __ breakpoint_trap(Assembler::equal, Assembler::icc); +} +#endif // not PRODUCT +#endif // !CC_INTERP diff --git a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp index a76004f786d..04d15c42693 100644 --- a/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/templateInterpreter_sparc.cpp @@ -23,1483 +23,39 @@ */ #include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "interpreter/bytecodeHistogram.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interpreterGenerator.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/templateTable.hpp" -#include "oops/arrayOop.hpp" -#include "oops/methodData.hpp" +#include "oops/constMethod.hpp" #include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "prims/jvmtiExport.hpp" -#include "prims/jvmtiThreadState.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" #include "runtime/synchronizer.hpp" -#include "runtime/timer.hpp" -#include "runtime/vframeArray.hpp" -#include "utilities/debug.hpp" #include "utilities/macros.hpp" -#ifndef CC_INTERP -#ifndef FAST_DISPATCH -#define FAST_DISPATCH 1 -#endif -#undef FAST_DISPATCH - -// Generation of Interpreter -// -// The InterpreterGenerator generates the interpreter into Interpreter::_code. - - -#define __ _masm-> - - -//---------------------------------------------------------------------------------------------------- - - -void InterpreterGenerator::save_native_result(void) { - // result potentially in O0/O1: save it across calls - const Address& l_tmp = InterpreterMacroAssembler::l_tmp; - - // result potentially in F0/F1: save it across calls - const Address& d_tmp = InterpreterMacroAssembler::d_tmp; - - // save and restore any potential method result value around the unlocking operation - __ stf(FloatRegisterImpl::D, F0, d_tmp); -#ifdef _LP64 - __ stx(O0, l_tmp); -#else - __ std(O0, l_tmp); -#endif -} - -void InterpreterGenerator::restore_native_result(void) { - const Address& l_tmp = InterpreterMacroAssembler::l_tmp; - const Address& d_tmp = InterpreterMacroAssembler::d_tmp; - - // Restore any method result value - __ ldf(FloatRegisterImpl::D, d_tmp, F0); -#ifdef _LP64 - __ ldx(l_tmp, O0); -#else - __ ldd(l_tmp, O0); -#endif -} - -address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { - assert(!pass_oop || message == NULL, "either oop or message but not both"); - address entry = __ pc(); - // expression stack must be empty before entering the VM if an exception happened - __ empty_expression_stack(); - // load exception object - __ set((intptr_t)name, G3_scratch); - if (pass_oop) { - __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), G3_scratch, Otos_i); - } else { - __ set((intptr_t)message, G4_scratch); - __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), G3_scratch, G4_scratch); - } - // throw exception - assert(Interpreter::throw_exception_entry() != NULL, "generate it first"); - AddressLiteral thrower(Interpreter::throw_exception_entry()); - __ jump_to(thrower, G3_scratch); - __ delayed()->nop(); - return entry; -} - -address TemplateInterpreterGenerator::generate_ClassCastException_handler() { - address entry = __ pc(); - // expression stack must be empty before entering the VM if an exception - // happened - __ empty_expression_stack(); - // load exception object - __ call_VM(Oexception, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_ClassCastException), - Otos_i); - __ should_not_reach_here(); - return entry; -} - - -address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) { - address entry = __ pc(); - // expression stack must be empty before entering the VM if an exception happened - __ empty_expression_stack(); - // convention: expect aberrant index in register G3_scratch, then shuffle the - // index to G4_scratch for the VM call - __ mov(G3_scratch, G4_scratch); - __ set((intptr_t)name, G3_scratch); - __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), G3_scratch, G4_scratch); - __ should_not_reach_here(); - return entry; -} - - -address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { - address entry = __ pc(); - // expression stack must be empty before entering the VM if an exception happened - __ empty_expression_stack(); - __ call_VM(Oexception, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); - __ should_not_reach_here(); - return entry; -} - - -address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { - address entry = __ pc(); - - if (state == atos) { - __ profile_return_type(O0, G3_scratch, G1_scratch); - } - -#if !defined(_LP64) && defined(COMPILER2) - // All return values are where we want them, except for Longs. C2 returns - // longs in G1 in the 32-bit build whereas the interpreter wants them in O0/O1. - // Since the interpreter will return longs in G1 and O0/O1 in the 32bit - // build even if we are returning from interpreted we just do a little - // stupid shuffing. - // Note: I tried to make c2 return longs in O0/O1 and G1 so we wouldn't have to - // do this here. Unfortunately if we did a rethrow we'd see an machepilog node - // first which would move g1 -> O0/O1 and destroy the exception we were throwing. - - if (state == ltos) { - __ srl (G1, 0, O1); - __ srlx(G1, 32, O0); - } -#endif // !_LP64 && COMPILER2 - - // The callee returns with the stack possibly adjusted by adapter transition - // We remove that possible adjustment here. - // All interpreter local registers are untouched. Any result is passed back - // in the O0/O1 or float registers. Before continuing, the arguments must be - // popped from the java expression stack; i.e., Lesp must be adjusted. - - __ mov(Llast_SP, SP); // Remove any adapter added stack space. - - const Register cache = G3_scratch; - const Register index = G1_scratch; - __ get_cache_and_index_at_bcp(cache, index, 1, index_size); - - const Register flags = cache; - __ ld_ptr(cache, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset(), flags); - const Register parameter_size = flags; - __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, parameter_size); // argument size in words - __ sll(parameter_size, Interpreter::logStackElementSize, parameter_size); // each argument size in bytes - __ add(Lesp, parameter_size, Lesp); // pop arguments - __ dispatch_next(state, step); - - return entry; -} - - -address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { - address entry = __ pc(); - __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache -#if INCLUDE_JVMCI - // Check if we need to take lock at entry of synchronized method. - if (UseJVMCICompiler) { - Label L; - Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset()); - __ ldbool(pending_monitor_enter_addr, Gtemp); // Load if pending monitor enter - __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L); - // Clear flag. - __ stbool(G0, pending_monitor_enter_addr); - // Take lock. - lock_method(); - __ bind(L); - } -#endif - { Label L; - Address exception_addr(G2_thread, Thread::pending_exception_offset()); - __ ld_ptr(exception_addr, Gtemp); // Load pending exception. - __ br_null_short(Gtemp, Assembler::pt, L); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - __ dispatch_next(state, step); - return entry; -} - -// A result handler converts/unboxes a native call result into -// a java interpreter/compiler result. The current frame is an -// interpreter frame. The activation frame unwind code must be -// consistent with that of TemplateTable::_return(...). In the -// case of native methods, the caller's SP was not modified. -address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { - address entry = __ pc(); - Register Itos_i = Otos_i ->after_save(); - Register Itos_l = Otos_l ->after_save(); - Register Itos_l1 = Otos_l1->after_save(); - Register Itos_l2 = Otos_l2->after_save(); +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; switch (type) { - case T_BOOLEAN: __ subcc(G0, O0, G0); __ addc(G0, 0, Itos_i); break; // !0 => true; 0 => false - case T_CHAR : __ sll(O0, 16, O0); __ srl(O0, 16, Itos_i); break; // cannot use and3, 0xFFFF too big as immediate value! - case T_BYTE : __ sll(O0, 24, O0); __ sra(O0, 24, Itos_i); break; - case T_SHORT : __ sll(O0, 16, O0); __ sra(O0, 16, Itos_i); break; - case T_LONG : -#ifndef _LP64 - __ mov(O1, Itos_l2); // move other half of long -#endif // ifdef or no ifdef, fall through to the T_INT case - case T_INT : __ mov(O0, Itos_i); break; - case T_VOID : /* nothing to do */ break; - case T_FLOAT : assert(F0 == Ftos_f, "fix this code" ); break; - case T_DOUBLE : assert(F0 == Ftos_d, "fix this code" ); break; - case T_OBJECT : - __ ld_ptr(FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS, Itos_i); - __ verify_oop(Itos_i); - break; + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; default : ShouldNotReachHere(); } - __ ret(); // return from interpreter activation - __ delayed()->restore(I5_savedSP, G0, SP); // remove interpreter frame - NOT_PRODUCT(__ emit_int32(0);) // marker for disassembly - return entry; + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; } -address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { - address entry = __ pc(); - __ push(state); - __ call_VM(noreg, runtime_entry); - __ dispatch_via(vtos, Interpreter::normal_table(vtos)); - return entry; -} - - -address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { - address entry = __ pc(); - __ dispatch_next(state); - return entry; -} - -// -// Helpers for commoning out cases in the various type of method entries. -// - -// increment invocation count & check for overflow -// -// Note: checking for negative value instead of overflow -// so we have a 'sticky' overflow test -// -// Lmethod: method -// ??: invocation counter -// -void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { - // Note: In tiered we increment either counters in MethodCounters* or in - // MDO depending if we're profiling or not. - const Register G3_method_counters = G3_scratch; - Label done; - - if (TieredCompilation) { - const int increment = InvocationCounter::count_increment; - Label no_mdo; - if (ProfileInterpreter) { - // If no method data exists, go to profile_continue. - __ ld_ptr(Lmethod, Method::method_data_offset(), G4_scratch); - __ br_null_short(G4_scratch, Assembler::pn, no_mdo); - // Increment counter - Address mdo_invocation_counter(G4_scratch, - in_bytes(MethodData::invocation_counter_offset()) + - in_bytes(InvocationCounter::counter_offset())); - Address mask(G4_scratch, in_bytes(MethodData::invoke_mask_offset())); - __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, - G3_scratch, Lscratch, - Assembler::zero, overflow); - __ ba_short(done); - } - - // Increment counter in MethodCounters* - __ bind(no_mdo); - Address invocation_counter(G3_method_counters, - in_bytes(MethodCounters::invocation_counter_offset()) + - in_bytes(InvocationCounter::counter_offset())); - __ get_method_counters(Lmethod, G3_method_counters, done); - Address mask(G3_method_counters, in_bytes(MethodCounters::invoke_mask_offset())); - __ increment_mask_and_jump(invocation_counter, increment, mask, - G4_scratch, Lscratch, - Assembler::zero, overflow); - __ bind(done); - } else { // not TieredCompilation - // Update standard invocation counters - __ get_method_counters(Lmethod, G3_method_counters, done); - __ increment_invocation_counter(G3_method_counters, O0, G4_scratch); - if (ProfileInterpreter) { - Address interpreter_invocation_counter(G3_method_counters, - in_bytes(MethodCounters::interpreter_invocation_counter_offset())); - __ ld(interpreter_invocation_counter, G4_scratch); - __ inc(G4_scratch); - __ st(G4_scratch, interpreter_invocation_counter); - } - - if (ProfileInterpreter && profile_method != NULL) { - // Test to see if we should create a method data oop - Address profile_limit(G3_method_counters, in_bytes(MethodCounters::interpreter_profile_limit_offset())); - __ ld(profile_limit, G1_scratch); - __ cmp_and_br_short(O0, G1_scratch, Assembler::lessUnsigned, Assembler::pn, *profile_method_continue); - - // if no method data exists, go to profile_method - __ test_method_data_pointer(*profile_method); - } - - Address invocation_limit(G3_method_counters, in_bytes(MethodCounters::interpreter_invocation_limit_offset())); - __ ld(invocation_limit, G3_scratch); - __ cmp(O0, G3_scratch); - __ br(Assembler::greaterEqualUnsigned, false, Assembler::pn, *overflow); // Far distance - __ delayed()->nop(); - __ bind(done); - } - -} - -// Allocate monitor and lock method (asm interpreter) -// ebx - Method* -// -void TemplateInterpreterGenerator::lock_method() { - __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0); // Load access flags. - -#ifdef ASSERT - { Label ok; - __ btst(JVM_ACC_SYNCHRONIZED, O0); - __ br( Assembler::notZero, false, Assembler::pt, ok); - __ delayed()->nop(); - __ stop("method doesn't need synchronization"); - __ bind(ok); - } -#endif // ASSERT - - // get synchronization object to O0 - { Label done; - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - __ btst(JVM_ACC_STATIC, O0); - __ br( Assembler::zero, true, Assembler::pt, done); - __ delayed()->ld_ptr(Llocals, Interpreter::local_offset_in_bytes(0), O0); // get receiver for not-static case - - __ ld_ptr( Lmethod, in_bytes(Method::const_offset()), O0); - __ ld_ptr( O0, in_bytes(ConstMethod::constants_offset()), O0); - __ ld_ptr( O0, ConstantPool::pool_holder_offset_in_bytes(), O0); - - // lock the mirror, not the Klass* - __ ld_ptr( O0, mirror_offset, O0); - -#ifdef ASSERT - __ tst(O0); - __ breakpoint_trap(Assembler::zero, Assembler::ptr_cc); -#endif // ASSERT - - __ bind(done); - } - - __ add_monitor_to_stack(true, noreg, noreg); // allocate monitor elem - __ st_ptr( O0, Lmonitors, BasicObjectLock::obj_offset_in_bytes()); // store object - // __ untested("lock_object from method entry"); - __ lock_object(Lmonitors, O0); -} - - -void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe_size, - Register Rscratch, - Register Rscratch2) { - const int page_size = os::vm_page_size(); - Label after_frame_check; - - assert_different_registers(Rframe_size, Rscratch, Rscratch2); - - __ set(page_size, Rscratch); - __ cmp_and_br_short(Rframe_size, Rscratch, Assembler::lessEqual, Assembler::pt, after_frame_check); - - // get the stack base, and in debug, verify it is non-zero - __ ld_ptr( G2_thread, Thread::stack_base_offset(), Rscratch ); -#ifdef ASSERT - Label base_not_zero; - __ br_notnull_short(Rscratch, Assembler::pn, base_not_zero); - __ stop("stack base is zero in generate_stack_overflow_check"); - __ bind(base_not_zero); -#endif - - // get the stack size, and in debug, verify it is non-zero - assert( sizeof(size_t) == sizeof(intptr_t), "wrong load size" ); - __ ld_ptr( G2_thread, Thread::stack_size_offset(), Rscratch2 ); -#ifdef ASSERT - Label size_not_zero; - __ br_notnull_short(Rscratch2, Assembler::pn, size_not_zero); - __ stop("stack size is zero in generate_stack_overflow_check"); - __ bind(size_not_zero); -#endif - - // compute the beginning of the protected zone minus the requested frame size - __ sub( Rscratch, Rscratch2, Rscratch ); - __ set( (StackRedPages+StackYellowPages) * page_size, Rscratch2 ); - __ add( Rscratch, Rscratch2, Rscratch ); - - // Add in the size of the frame (which is the same as subtracting it from the - // SP, which would take another register - __ add( Rscratch, Rframe_size, Rscratch ); - - // the frame is greater than one page in size, so check against - // the bottom of the stack - __ cmp_and_brx_short(SP, Rscratch, Assembler::greaterUnsigned, Assembler::pt, after_frame_check); - - // the stack will overflow, throw an exception - - // Note that SP is restored to sender's sp (in the delay slot). This - // is necessary if the sender's frame is an extended compiled frame - // (see gen_c2i_adapter()) and safer anyway in case of JSR292 - // adaptations. - - // Note also that the restored frame is not necessarily interpreted. - // Use the shared runtime version of the StackOverflowError. - assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); - AddressLiteral stub(StubRoutines::throw_StackOverflowError_entry()); - __ jump_to(stub, Rscratch); - __ delayed()->mov(O5_savedSP, SP); - - // if you get to here, then there is enough stack space - __ bind( after_frame_check ); -} - - -// -// Generate a fixed interpreter frame. This is identical setup for interpreted -// methods and for native methods hence the shared code. - - -//---------------------------------------------------------------------------------------------------- -// Stack frame layout -// -// When control flow reaches any of the entry types for the interpreter -// the following holds -> -// -// C2 Calling Conventions: -// -// The entry code below assumes that the following registers are set -// when coming in: -// G5_method: holds the Method* of the method to call -// Lesp: points to the TOS of the callers expression stack -// after having pushed all the parameters -// -// The entry code does the following to setup an interpreter frame -// pop parameters from the callers stack by adjusting Lesp -// set O0 to Lesp -// compute X = (max_locals - num_parameters) -// bump SP up by X to accomadate the extra locals -// compute X = max_expression_stack -// + vm_local_words -// + 16 words of register save area -// save frame doing a save sp, -X, sp growing towards lower addresses -// set Lbcp, Lmethod, LcpoolCache -// set Llocals to i0 -// set Lmonitors to FP - rounded_vm_local_words -// set Lesp to Lmonitors - 4 -// -// The frame has now been setup to do the rest of the entry code - -// Try this optimization: Most method entries could live in a -// "one size fits all" stack frame without all the dynamic size -// calculations. It might be profitable to do all this calculation -// statically and approximately for "small enough" methods. - -//----------------------------------------------------------------------------------------------- - -// C1 Calling conventions -// -// Upon method entry, the following registers are setup: -// -// g2 G2_thread: current thread -// g5 G5_method: method to activate -// g4 Gargs : pointer to last argument -// -// -// Stack: -// -// +---------------+ <--- sp -// | | -// : reg save area : -// | | -// +---------------+ <--- sp + 0x40 -// | | -// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) -// | | -// +---------------+ <--- sp + 0x5c -// | | -// : free : -// | | -// +---------------+ <--- Gargs -// | | -// : arguments : -// | | -// +---------------+ -// | | -// -// -// -// AFTER FRAME HAS BEEN SETUP for method interpretation the stack looks like: -// -// +---------------+ <--- sp -// | | -// : reg save area : -// | | -// +---------------+ <--- sp + 0x40 -// | | -// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) -// | | -// +---------------+ <--- sp + 0x5c -// | | -// : : -// | | <--- Lesp -// +---------------+ <--- Lmonitors (fp - 0x18) -// | VM locals | -// +---------------+ <--- fp -// | | -// : reg save area : -// | | -// +---------------+ <--- fp + 0x40 -// | | -// : extra 7 slots : note: these slots are not really needed for the interpreter (fix later) -// | | -// +---------------+ <--- fp + 0x5c -// | | -// : free : -// | | -// +---------------+ -// | | -// : nonarg locals : -// | | -// +---------------+ -// | | -// : arguments : -// | | <--- Llocals -// +---------------+ <--- Gargs -// | | - -void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { - // - // - // The entry code sets up a new interpreter frame in 4 steps: - // - // 1) Increase caller's SP by for the extra local space needed: - // (check for overflow) - // Efficient implementation of xload/xstore bytecodes requires - // that arguments and non-argument locals are in a contigously - // addressable memory block => non-argument locals must be - // allocated in the caller's frame. - // - // 2) Create a new stack frame and register window: - // The new stack frame must provide space for the standard - // register save area, the maximum java expression stack size, - // the monitor slots (0 slots initially), and some frame local - // scratch locations. - // - // 3) The following interpreter activation registers must be setup: - // Lesp : expression stack pointer - // Lbcp : bytecode pointer - // Lmethod : method - // Llocals : locals pointer - // Lmonitors : monitor pointer - // LcpoolCache: constant pool cache - // - // 4) Initialize the non-argument locals if necessary: - // Non-argument locals may need to be initialized to NULL - // for GC to work. If the oop-map information is accurate - // (in the absence of the JSR problem), no initialization - // is necessary. - // - // (gri - 2/25/2000) - - - int rounded_vm_local_words = round_to( frame::interpreter_frame_vm_local_words, WordsPerLong ); - - const int extra_space = - rounded_vm_local_words + // frame local scratch space - Method::extra_stack_entries() + // extra stack for jsr 292 - frame::memory_parameter_word_sp_offset + // register save area - (native_call ? frame::interpreter_frame_extra_outgoing_argument_words : 0); - - const Register Glocals_size = G3; - const Register RconstMethod = Glocals_size; - const Register Otmp1 = O3; - const Register Otmp2 = O4; - // Lscratch can't be used as a temporary because the call_stub uses - // it to assert that the stack frame was setup correctly. - const Address constMethod (G5_method, Method::const_offset()); - const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); - - __ ld_ptr( constMethod, RconstMethod ); - __ lduh( size_of_parameters, Glocals_size); - - // Gargs points to first local + BytesPerWord - // Set the saved SP after the register window save - // - assert_different_registers(Gargs, Glocals_size, Gframe_size, O5_savedSP); - __ sll(Glocals_size, Interpreter::logStackElementSize, Otmp1); - __ add(Gargs, Otmp1, Gargs); - - if (native_call) { - __ calc_mem_param_words( Glocals_size, Gframe_size ); - __ add( Gframe_size, extra_space, Gframe_size); - __ round_to( Gframe_size, WordsPerLong ); - __ sll( Gframe_size, LogBytesPerWord, Gframe_size ); - } else { - - // - // Compute number of locals in method apart from incoming parameters - // - const Address size_of_locals (Otmp1, ConstMethod::size_of_locals_offset()); - __ ld_ptr( constMethod, Otmp1 ); - __ lduh( size_of_locals, Otmp1 ); - __ sub( Otmp1, Glocals_size, Glocals_size ); - __ round_to( Glocals_size, WordsPerLong ); - __ sll( Glocals_size, Interpreter::logStackElementSize, Glocals_size ); - - // see if the frame is greater than one page in size. If so, - // then we need to verify there is enough stack space remaining - // Frame_size = (max_stack + extra_space) * BytesPerWord; - __ ld_ptr( constMethod, Gframe_size ); - __ lduh( Gframe_size, in_bytes(ConstMethod::max_stack_offset()), Gframe_size ); - __ add( Gframe_size, extra_space, Gframe_size ); - __ round_to( Gframe_size, WordsPerLong ); - __ sll( Gframe_size, Interpreter::logStackElementSize, Gframe_size); - - // Add in java locals size for stack overflow check only - __ add( Gframe_size, Glocals_size, Gframe_size ); - - const Register Otmp2 = O4; - assert_different_registers(Otmp1, Otmp2, O5_savedSP); - generate_stack_overflow_check(Gframe_size, Otmp1, Otmp2); - - __ sub( Gframe_size, Glocals_size, Gframe_size); - - // - // bump SP to accomodate the extra locals - // - __ sub( SP, Glocals_size, SP ); - } - - // - // now set up a stack frame with the size computed above - // - __ neg( Gframe_size ); - __ save( SP, Gframe_size, SP ); - - // - // now set up all the local cache registers - // - // NOTE: At this point, Lbyte_code/Lscratch has been modified. Note - // that all present references to Lbyte_code initialize the register - // immediately before use - if (native_call) { - __ mov(G0, Lbcp); - } else { - __ ld_ptr(G5_method, Method::const_offset(), Lbcp); - __ add(Lbcp, in_bytes(ConstMethod::codes_offset()), Lbcp); - } - __ mov( G5_method, Lmethod); // set Lmethod - __ get_constant_pool_cache( LcpoolCache ); // set LcpoolCache - __ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors -#ifdef _LP64 - __ add( Lmonitors, STACK_BIAS, Lmonitors ); // Account for 64 bit stack bias -#endif - __ sub(Lmonitors, BytesPerWord, Lesp); // set Lesp - - // setup interpreter activation registers - __ sub(Gargs, BytesPerWord, Llocals); // set Llocals - - if (ProfileInterpreter) { -#ifdef FAST_DISPATCH - // FAST_DISPATCH and ProfileInterpreter are mutually exclusive since - // they both use I2. - assert(0, "FAST_DISPATCH and +ProfileInterpreter are mutually exclusive"); -#endif // FAST_DISPATCH - __ set_method_data_pointer(); - } - -} - -// Method entry for java.lang.ref.Reference.get. -address InterpreterGenerator::generate_Reference_get_entry(void) { -#if INCLUDE_ALL_GCS - // Code: _aload_0, _getfield, _areturn - // parameter size = 1 - // - // The code that gets generated by this routine is split into 2 parts: - // 1. The "intrinsified" code for G1 (or any SATB based GC), - // 2. The slow path - which is an expansion of the regular method entry. - // - // Notes:- - // * In the G1 code we do not check whether we need to block for - // a safepoint. If G1 is enabled then we must execute the specialized - // code for Reference.get (except when the Reference object is null) - // so that we can log the value in the referent field with an SATB - // update buffer. - // If the code for the getfield template is modified so that the - // G1 pre-barrier code is executed when the current method is - // Reference.get() then going through the normal method entry - // will be fine. - // * The G1 code can, however, check the receiver object (the instance - // of java.lang.Reference) and jump to the slow path if null. If the - // Reference object is null then we obviously cannot fetch the referent - // and so we don't need to call the G1 pre-barrier. Thus we can use the - // regular method entry code to generate the NPE. - // - // This code is based on generate_accessor_enty. - - address entry = __ pc(); - - const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); - - if (UseG1GC) { - Label slow_path; - - // In the G1 code we don't check if we need to reach a safepoint. We - // continue and the thread will safepoint at the next bytecode dispatch. - - // Check if local 0 != NULL - // If the receiver is null then it is OK to jump to the slow path. - __ ld_ptr(Gargs, G0, Otos_i ); // get local 0 - // check if local 0 == NULL and go the slow path - __ cmp_and_brx_short(Otos_i, 0, Assembler::equal, Assembler::pn, slow_path); - - - // Load the value of the referent field. - if (Assembler::is_simm13(referent_offset)) { - __ load_heap_oop(Otos_i, referent_offset, Otos_i); - } else { - __ set(referent_offset, G3_scratch); - __ load_heap_oop(Otos_i, G3_scratch, Otos_i); - } - - // Generate the G1 pre-barrier code to log the value of - // the referent field in an SATB buffer. Note with - // these parameters the pre-barrier does not generate - // the load of the previous value - - __ g1_write_barrier_pre(noreg /* obj */, noreg /* index */, 0 /* offset */, - Otos_i /* pre_val */, - G3_scratch /* tmp */, - true /* preserve_o_regs */); - - // _areturn - __ retl(); // return from leaf routine - __ delayed()->mov(O5_savedSP, SP); - - // Generate regular method entry - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); - return entry; - } -#endif // INCLUDE_ALL_GCS - - // If G1 is not enabled then attempt to go through the accessor entry point - // Reference.get is an accessor - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address InterpreterGenerator::generate_CRC32_update_entry() { - - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - Label L_slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); - __ set(SafepointSynchronize::_not_synchronized, O3); - __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); - - // Load parameters - const Register crc = O0; // initial crc - const Register val = O1; // byte to update with - const Register table = O2; // address of 256-entry lookup table - - __ ldub(Gargs, 3, val); - __ lduw(Gargs, 8, crc); - - __ set(ExternalAddress(StubRoutines::crc_table_addr()), table); - - __ not1(crc); // ~crc - __ clruwu(crc); - __ update_byte_crc32(crc, val, table); - __ not1(crc); // ~crc - - // result in O0 - __ retl(); - __ delayed()->nop(); - - // generate a vanilla native entry as the slow path - __ bind(L_slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - Label L_slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ set(ExternalAddress(SafepointSynchronize::address_of_state()), O2); - __ set(SafepointSynchronize::_not_synchronized, O3); - __ cmp_and_br_short(O2, O3, Assembler::notEqual, Assembler::pt, L_slow_path); - - // Load parameters from the stack - const Register crc = O0; // initial crc - const Register buf = O1; // source java byte array address - const Register len = O2; // len - const Register offset = O3; // offset - - // Arguments are reversed on java expression stack - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ lduw(Gargs, 0, len); - __ lduw(Gargs, 8, offset); - __ ldx( Gargs, 16, buf); - __ lduw(Gargs, 32, crc); - __ add(buf, offset, buf); - } else { - __ lduw(Gargs, 0, len); - __ lduw(Gargs, 8, offset); - __ ldx( Gargs, 16, buf); - __ lduw(Gargs, 24, crc); - __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size - __ add(buf ,offset, buf); - } - - // Call the crc32 kernel - __ MacroAssembler::save_thread(L7_thread_cache); - __ kernel_crc32(crc, buf, len, O3); - __ MacroAssembler::restore_thread(L7_thread_cache); - - // result in O0 - __ retl(); - __ delayed()->nop(); - - // generate a vanilla native entry as the slow path - __ bind(L_slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -// -// Interpreter stub for calling a native method. (asm interpreter) -// This sets up a somewhat different looking stack for calling the native method -// than the typical interpreter frame setup. -// - -address InterpreterGenerator::generate_native_entry(bool synchronized) { - address entry = __ pc(); - - // the following temporary registers are used during frame creation - const Register Gtmp1 = G3_scratch ; - const Register Gtmp2 = G1_scratch; - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // make sure registers are different! - assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); - - const Address Laccess_flags(Lmethod, Method::access_flags_offset()); - - const Register Glocals_size = G3; - assert_different_registers(Glocals_size, G4_scratch, Gframe_size); - - // make sure method is native & not abstract - // rethink these assertions - they can be simplified and shared (gri 2/25/2000) -#ifdef ASSERT - __ ld(G5_method, Method::access_flags_offset(), Gtmp1); - { - Label L; - __ btst(JVM_ACC_NATIVE, Gtmp1); - __ br(Assembler::notZero, false, Assembler::pt, L); - __ delayed()->nop(); - __ stop("tried to execute non-native method as native"); - __ bind(L); - } - { Label L; - __ btst(JVM_ACC_ABSTRACT, Gtmp1); - __ br(Assembler::zero, false, Assembler::pt, L); - __ delayed()->nop(); - __ stop("tried to execute abstract method as non-abstract"); - __ bind(L); - } -#endif // ASSERT - - // generate the code to allocate the interpreter stack frame - generate_fixed_frame(true); - - // - // No locals to initialize for native method - // - - // this slot will be set later, we initialize it to null here just in - // case we get a GC before the actual value is stored later - __ st_ptr(G0, FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS); - - const Address do_not_unlock_if_synchronized(G2_thread, - JavaThread::do_not_unlock_if_synchronized_offset()); - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. If any exception was thrown by - // runtime, exception handling i.e. unlock_if_synchronized_method will - // check this thread local flag. - // This flag has two effects, one is to force an unwind in the topmost - // interpreter frame and not perform an unlock while doing so. - - __ movbool(true, G3_scratch); - __ stbool(G3_scratch, do_not_unlock_if_synchronized); - - // increment invocation counter and check for overflow - // - // Note: checking for negative value instead of overflow - // so we have a 'sticky' overflow test (may be of - // importance as soon as we have true MT/MP) - Label invocation_counter_overflow; - Label Lcontinue; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - - } - __ bind(Lcontinue); - - bang_stack_shadow_pages(true); - - // reset the _do_not_unlock_if_synchronized flag - __ stbool(G0, do_not_unlock_if_synchronized); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - - if (synchronized) { - lock_method(); - } else { -#ifdef ASSERT - { Label ok; - __ ld(Laccess_flags, O0); - __ btst(JVM_ACC_SYNCHRONIZED, O0); - __ br( Assembler::zero, false, Assembler::pt, ok); - __ delayed()->nop(); - __ stop("method needs synchronization"); - __ bind(ok); - } -#endif // ASSERT - } - - - // start execution - __ verify_thread(); - - // JVMTI support - __ notify_method_entry(); - - // native call - - // (note that O0 is never an oop--at most it is a handle) - // It is important not to smash any handles created by this call, - // until any oop handle in O0 is dereferenced. - - // (note that the space for outgoing params is preallocated) - - // get signature handler - { Label L; - Address signature_handler(Lmethod, Method::signature_handler_offset()); - __ ld_ptr(signature_handler, G3_scratch); - __ br_notnull_short(G3_scratch, Assembler::pt, L); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), Lmethod); - __ ld_ptr(signature_handler, G3_scratch); - __ bind(L); - } - - // Push a new frame so that the args will really be stored in - // Copy a few locals across so the new frame has the variables - // we need but these values will be dead at the jni call and - // therefore not gc volatile like the values in the current - // frame (Lmethod in particular) - - // Flush the method pointer to the register save area - __ st_ptr(Lmethod, SP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS); - __ mov(Llocals, O1); - - // calculate where the mirror handle body is allocated in the interpreter frame: - __ add(FP, (frame::interpreter_frame_oop_temp_offset * wordSize) + STACK_BIAS, O2); - - // Calculate current frame size - __ sub(SP, FP, O3); // Calculate negative of current frame size - __ save(SP, O3, SP); // Allocate an identical sized frame - - // Note I7 has leftover trash. Slow signature handler will fill it in - // should we get there. Normal jni call will set reasonable last_Java_pc - // below (and fix I7 so the stack trace doesn't have a meaningless frame - // in it). - - // Load interpreter frame's Lmethod into same register here - - __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); - - __ mov(I1, Llocals); - __ mov(I2, Lscratch2); // save the address of the mirror - - - // ONLY Lmethod and Llocals are valid here! - - // call signature handler, It will move the arg properly since Llocals in current frame - // matches that in outer frame - - __ callr(G3_scratch, 0); - __ delayed()->nop(); - - // Result handler is in Lscratch - - // Reload interpreter frame's Lmethod since slow signature handler may block - __ ld_ptr(FP, (Lmethod->sp_offset_in_saved_window() * wordSize) + STACK_BIAS, Lmethod); - - { Label not_static; - - __ ld(Laccess_flags, O0); - __ btst(JVM_ACC_STATIC, O0); - __ br( Assembler::zero, false, Assembler::pt, not_static); - // get native function entry point(O0 is a good temp until the very end) - __ delayed()->ld_ptr(Lmethod, in_bytes(Method::native_function_offset()), O0); - // for static methods insert the mirror argument - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - - __ ld_ptr(Lmethod, Method:: const_offset(), O1); - __ ld_ptr(O1, ConstMethod::constants_offset(), O1); - __ ld_ptr(O1, ConstantPool::pool_holder_offset_in_bytes(), O1); - __ ld_ptr(O1, mirror_offset, O1); -#ifdef ASSERT - if (!PrintSignatureHandlers) // do not dirty the output with this - { Label L; - __ br_notnull_short(O1, Assembler::pt, L); - __ stop("mirror is missing"); - __ bind(L); - } -#endif // ASSERT - __ st_ptr(O1, Lscratch2, 0); - __ mov(Lscratch2, O1); - __ bind(not_static); - } - - // At this point, arguments have been copied off of stack into - // their JNI positions, which are O1..O5 and SP[68..]. - // Oops are boxed in-place on the stack, with handles copied to arguments. - // The result handler is in Lscratch. O0 will shortly hold the JNIEnv*. - -#ifdef ASSERT - { Label L; - __ br_notnull_short(O0, Assembler::pt, L); - __ stop("native entry point is missing"); - __ bind(L); - } -#endif // ASSERT - - // - // setup the frame anchor - // - // The scavenge function only needs to know that the PC of this frame is - // in the interpreter method entry code, it doesn't need to know the exact - // PC and hence we can use O7 which points to the return address from the - // previous call in the code stream (signature handler function) - // - // The other trick is we set last_Java_sp to FP instead of the usual SP because - // we have pushed the extra frame in order to protect the volatile register(s) - // in that frame when we return from the jni call - // - - __ set_last_Java_frame(FP, O7); - __ mov(O7, I7); // make dummy interpreter frame look like one above, - // not meaningless information that'll confuse me. - - // flush the windows now. We don't care about the current (protection) frame - // only the outer frames - - __ flushw(); - - // mark windows as flushed - Address flags(G2_thread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::flags_offset()); - __ set(JavaFrameAnchor::flushed, G3_scratch); - __ st(G3_scratch, flags); - - // Transition from _thread_in_Java to _thread_in_native. We are already safepoint ready. - - Address thread_state(G2_thread, JavaThread::thread_state_offset()); -#ifdef ASSERT - { Label L; - __ ld(thread_state, G3_scratch); - __ cmp_and_br_short(G3_scratch, _thread_in_Java, Assembler::equal, Assembler::pt, L); - __ stop("Wrong thread state in native stub"); - __ bind(L); - } -#endif // ASSERT - __ set(_thread_in_native, G3_scratch); - __ st(G3_scratch, thread_state); - - // Call the jni method, using the delay slot to set the JNIEnv* argument. - __ save_thread(L7_thread_cache); // save Gthread - __ callr(O0, 0); - __ delayed()-> - add(L7_thread_cache, in_bytes(JavaThread::jni_environment_offset()), O0); - - // Back from jni method Lmethod in this frame is DEAD, DEAD, DEAD - - __ restore_thread(L7_thread_cache); // restore G2_thread - __ reinit_heapbase(); - - // must we block? - - // Block, if necessary, before resuming in _thread_in_Java state. - // In order for GC to work, don't clear the last_Java_sp until after blocking. - { Label no_block; - AddressLiteral sync_state(SafepointSynchronize::address_of_state()); - - // Switch thread to "native transition" state before reading the synchronization state. - // This additional state is necessary because reading and testing the synchronization - // state is not atomic w.r.t. GC, as this scenario demonstrates: - // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. - // VM thread changes sync state to synchronizing and suspends threads for GC. - // Thread A is resumed to finish this native method, but doesn't block here since it - // didn't see any synchronization is progress, and escapes. - __ set(_thread_in_native_trans, G3_scratch); - __ st(G3_scratch, thread_state); - if(os::is_MP()) { - if (UseMembar) { - // Force this write out before the read below - __ membar(Assembler::StoreLoad); - } else { - // Write serialization page so VM thread can do a pseudo remote membar. - // We use the current thread pointer to calculate a thread specific - // offset to write to within the page. This minimizes bus traffic - // due to cache line collision. - __ serialize_memory(G2_thread, G1_scratch, G3_scratch); - } - } - __ load_contents(sync_state, G3_scratch); - __ cmp(G3_scratch, SafepointSynchronize::_not_synchronized); - - Label L; - __ br(Assembler::notEqual, false, Assembler::pn, L); - __ delayed()->ld(G2_thread, JavaThread::suspend_flags_offset(), G3_scratch); - __ cmp_and_br_short(G3_scratch, 0, Assembler::equal, Assembler::pt, no_block); - __ bind(L); - - // Block. Save any potential method result value before the operation and - // use a leaf call to leave the last_Java_frame setup undisturbed. - save_native_result(); - __ call_VM_leaf(L7_thread_cache, - CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), - G2_thread); - - // Restore any method result value - restore_native_result(); - __ bind(no_block); - } - - // Clear the frame anchor now - - __ reset_last_Java_frame(); - - // Move the result handler address - __ mov(Lscratch, G3_scratch); - // return possible result to the outer frame -#ifndef __LP64 - __ mov(O0, I0); - __ restore(O1, G0, O1); -#else - __ restore(O0, G0, O0); -#endif /* __LP64 */ - - // Move result handler to expected register - __ mov(G3_scratch, Lscratch); - - // Back in normal (native) interpreter frame. State is thread_in_native_trans - // switch to thread_in_Java. - - __ set(_thread_in_Java, G3_scratch); - __ st(G3_scratch, thread_state); - - // reset handle block - __ ld_ptr(G2_thread, JavaThread::active_handles_offset(), G3_scratch); - __ st(G0, G3_scratch, JNIHandleBlock::top_offset_in_bytes()); - - // If we have an oop result store it where it will be safe for any further gc - // until we return now that we've released the handle it might be protected by - - { - Label no_oop, store_result; - - __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch); - __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop); - __ addcc(G0, O0, O0); - __ brx(Assembler::notZero, true, Assembler::pt, store_result); // if result is not NULL: - __ delayed()->ld_ptr(O0, 0, O0); // unbox it - __ mov(G0, O0); - - __ bind(store_result); - // Store it where gc will look for it and result handler expects it. - __ st_ptr(O0, FP, (frame::interpreter_frame_oop_temp_offset*wordSize) + STACK_BIAS); - - __ bind(no_oop); - - } - - - // handle exceptions (exception handling will handle unlocking!) - { Label L; - Address exception_addr(G2_thread, Thread::pending_exception_offset()); - __ ld_ptr(exception_addr, Gtemp); - __ br_null_short(Gtemp, Assembler::pt, L); - // Note: This could be handled more efficiently since we know that the native - // method doesn't have an exception handler. We could directly return - // to the exception handler for the caller. - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - - // JVMTI support (preserves thread register) - __ notify_method_exit(true, ilgl, InterpreterMacroAssembler::NotifyJVMTI); - - if (synchronized) { - // save and restore any potential method result value around the unlocking operation - save_native_result(); - - __ add( __ top_most_monitor(), O1); - __ unlock_object(O1); - - restore_native_result(); - } - -#if defined(COMPILER2) && !defined(_LP64) - - // C2 expects long results in G1 we can't tell if we're returning to interpreted - // or compiled so just be safe. - - __ sllx(O0, 32, G1); // Shift bits into high G1 - __ srl (O1, 0, O1); // Zero extend O1 - __ or3 (O1, G1, G1); // OR 64 bits into G1 - -#endif /* COMPILER2 && !_LP64 */ - - // dispose of return address and remove activation -#ifdef ASSERT - { - Label ok; - __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok); - __ stop("bad I5_savedSP value"); - __ should_not_reach_here(); - __ bind(ok); - } -#endif - if (TraceJumps) { - // Move target to register that is recordable - __ mov(Lscratch, G3_scratch); - __ JMP(G3_scratch, 0); - } else { - __ jmp(Lscratch, 0); - } - __ delayed()->nop(); - - - if (inc_counter) { - // handle invocation counter overflow - __ bind(invocation_counter_overflow); - generate_counter_overflow(Lcontinue); - } - - - - return entry; -} - - -// Generic method entry to (asm) interpreter -address InterpreterGenerator::generate_normal_entry(bool synchronized) { - address entry = __ pc(); - - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // the following temporary registers are used during frame creation - const Register Gtmp1 = G3_scratch ; - const Register Gtmp2 = G1_scratch; - - // make sure registers are different! - assert_different_registers(G2_thread, G5_method, Gargs, Gtmp1, Gtmp2); - - const Address constMethod (G5_method, Method::const_offset()); - // Seems like G5_method is live at the point this is used. So we could make this look consistent - // and use in the asserts. - const Address access_flags (Lmethod, Method::access_flags_offset()); - - const Register Glocals_size = G3; - assert_different_registers(Glocals_size, G4_scratch, Gframe_size); - - // make sure method is not native & not abstract - // rethink these assertions - they can be simplified and shared (gri 2/25/2000) -#ifdef ASSERT - __ ld(G5_method, Method::access_flags_offset(), Gtmp1); - { - Label L; - __ btst(JVM_ACC_NATIVE, Gtmp1); - __ br(Assembler::zero, false, Assembler::pt, L); - __ delayed()->nop(); - __ stop("tried to execute native method as non-native"); - __ bind(L); - } - { Label L; - __ btst(JVM_ACC_ABSTRACT, Gtmp1); - __ br(Assembler::zero, false, Assembler::pt, L); - __ delayed()->nop(); - __ stop("tried to execute abstract method as non-abstract"); - __ bind(L); - } -#endif // ASSERT - - // generate the code to allocate the interpreter stack frame - - generate_fixed_frame(false); - -#ifdef FAST_DISPATCH - __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables); - // set bytecode dispatch table base -#endif - - // - // Code to initialize the extra (i.e. non-parm) locals - // - Register init_value = noreg; // will be G0 if we must clear locals - // The way the code was setup before zerolocals was always true for vanilla java entries. - // It could only be false for the specialized entries like accessor or empty which have - // no extra locals so the testing was a waste of time and the extra locals were always - // initialized. We removed this extra complication to already over complicated code. - - init_value = G0; - Label clear_loop; - - const Register RconstMethod = O1; - const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); - const Address size_of_locals (RconstMethod, ConstMethod::size_of_locals_offset()); - - // NOTE: If you change the frame layout, this code will need to - // be updated! - __ ld_ptr( constMethod, RconstMethod ); - __ lduh( size_of_locals, O2 ); - __ lduh( size_of_parameters, O1 ); - __ sll( O2, Interpreter::logStackElementSize, O2); - __ sll( O1, Interpreter::logStackElementSize, O1 ); - __ sub( Llocals, O2, O2 ); - __ sub( Llocals, O1, O1 ); - - __ bind( clear_loop ); - __ inc( O2, wordSize ); - - __ cmp( O2, O1 ); - __ brx( Assembler::lessEqualUnsigned, true, Assembler::pt, clear_loop ); - __ delayed()->st_ptr( init_value, O2, 0 ); - - const Address do_not_unlock_if_synchronized(G2_thread, - JavaThread::do_not_unlock_if_synchronized_offset()); - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. If any exception was thrown by - // runtime, exception handling i.e. unlock_if_synchronized_method will - // check this thread local flag. - __ movbool(true, G3_scratch); - __ stbool(G3_scratch, do_not_unlock_if_synchronized); - - __ profile_parameters_type(G1_scratch, G3_scratch, G4_scratch, Lscratch); - // increment invocation counter and check for overflow - // - // Note: checking for negative value instead of overflow - // so we have a 'sticky' overflow test (may be of - // importance as soon as we have true MT/MP) - Label invocation_counter_overflow; - Label profile_method; - Label profile_method_continue; - Label Lcontinue; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); - if (ProfileInterpreter) { - __ bind(profile_method_continue); - } - } - __ bind(Lcontinue); - - bang_stack_shadow_pages(false); - - // reset the _do_not_unlock_if_synchronized flag - __ stbool(G0, do_not_unlock_if_synchronized); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - - if (synchronized) { - lock_method(); - } else { -#ifdef ASSERT - { Label ok; - __ ld(access_flags, O0); - __ btst(JVM_ACC_SYNCHRONIZED, O0); - __ br( Assembler::zero, false, Assembler::pt, ok); - __ delayed()->nop(); - __ stop("method needs synchronization"); - __ bind(ok); - } -#endif // ASSERT - } - - // start execution - - __ verify_thread(); - - // jvmti support - __ notify_method_entry(); - - // start executing instructions - __ dispatch_next(vtos); - - - if (inc_counter) { - if (ProfileInterpreter) { - // We have decided to profile this method in the interpreter - __ bind(profile_method); - - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); - __ set_method_data_pointer_for_bcp(); - __ ba_short(profile_method_continue); - } - - // handle invocation counter overflow - __ bind(invocation_counter_overflow); - generate_counter_overflow(Lcontinue); - } - - - return entry; +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + // No special entry points that preclude compilation + return true; } static int size_activation_helper(int callee_extra_locals, int max_stack, int monitor_size) { @@ -1747,332 +303,3 @@ void AbstractInterpreter::layout_activation(Method* method, assert(lo <= esp && esp < monitors, "esp in bounds"); #endif // ASSERT } - -//---------------------------------------------------------------------------------------------------- -// Exceptions -void TemplateInterpreterGenerator::generate_throw_exception() { - - // Entry point in previous activation (i.e., if the caller was interpreted) - Interpreter::_rethrow_exception_entry = __ pc(); - // O0: exception - - // entry point for exceptions thrown within interpreter code - Interpreter::_throw_exception_entry = __ pc(); - __ verify_thread(); - // expression stack is undefined here - // O0: exception, i.e. Oexception - // Lbcp: exception bcp - __ verify_oop(Oexception); - - - // expression stack must be empty before entering the VM in case of an exception - __ empty_expression_stack(); - // find exception handler address and preserve exception oop - // call C routine to find handler and jump to it - __ call_VM(O1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), Oexception); - __ push_ptr(O1); // push exception for exception handler bytecodes - - __ JMP(O0, 0); // jump to exception handler (may be remove activation entry!) - __ delayed()->nop(); - - - // if the exception is not handled in the current frame - // the frame is removed and the exception is rethrown - // (i.e. exception continuation is _rethrow_exception) - // - // Note: At this point the bci is still the bxi for the instruction which caused - // the exception and the expression stack is empty. Thus, for any VM calls - // at this point, GC will find a legal oop map (with empty expression stack). - - // in current activation - // tos: exception - // Lbcp: exception bcp - - // - // JVMTI PopFrame support - // - - Interpreter::_remove_activation_preserving_args_entry = __ pc(); - Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); - // Set the popframe_processing bit in popframe_condition indicating that we are - // currently handling popframe, so that call_VMs that may happen later do not trigger new - // popframe handling cycles. - - __ ld(popframe_condition_addr, G3_scratch); - __ or3(G3_scratch, JavaThread::popframe_processing_bit, G3_scratch); - __ stw(G3_scratch, popframe_condition_addr); - - // Empty the expression stack, as in normal exception handling - __ empty_expression_stack(); - __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false, /* install_monitor_exception */ false); - - { - // Check to see whether we are returning to a deoptimized frame. - // (The PopFrame call ensures that the caller of the popped frame is - // either interpreted or compiled and deoptimizes it if compiled.) - // In this case, we can't call dispatch_next() after the frame is - // popped, but instead must save the incoming arguments and restore - // them after deoptimization has occurred. - // - // Note that we don't compare the return PC against the - // deoptimization blob's unpack entry because of the presence of - // adapter frames in C2. - Label caller_not_deoptimized; - __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), I7); - __ br_notnull_short(O0, Assembler::pt, caller_not_deoptimized); - - const Register Gtmp1 = G3_scratch; - const Register Gtmp2 = G1_scratch; - const Register RconstMethod = Gtmp1; - const Address constMethod(Lmethod, Method::const_offset()); - const Address size_of_parameters(RconstMethod, ConstMethod::size_of_parameters_offset()); - - // Compute size of arguments for saving when returning to deoptimized caller - __ ld_ptr(constMethod, RconstMethod); - __ lduh(size_of_parameters, Gtmp1); - __ sll(Gtmp1, Interpreter::logStackElementSize, Gtmp1); - __ sub(Llocals, Gtmp1, Gtmp2); - __ add(Gtmp2, wordSize, Gtmp2); - // Save these arguments - __ call_VM_leaf(L7_thread_cache, CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), G2_thread, Gtmp1, Gtmp2); - // Inform deoptimization that it is responsible for restoring these arguments - __ set(JavaThread::popframe_force_deopt_reexecution_bit, Gtmp1); - Address popframe_condition_addr(G2_thread, JavaThread::popframe_condition_offset()); - __ st(Gtmp1, popframe_condition_addr); - - // Return from the current method - // The caller's SP was adjusted upon method entry to accomodate - // the callee's non-argument locals. Undo that adjustment. - __ ret(); - __ delayed()->restore(I5_savedSP, G0, SP); - - __ bind(caller_not_deoptimized); - } - - // Clear the popframe condition flag - __ stw(G0 /* popframe_inactive */, popframe_condition_addr); - - // Get out of the current method (how this is done depends on the particular compiler calling - // convention that the interpreter currently follows) - // The caller's SP was adjusted upon method entry to accomodate - // the callee's non-argument locals. Undo that adjustment. - __ restore(I5_savedSP, G0, SP); - // The method data pointer was incremented already during - // call profiling. We have to restore the mdp for the current bcp. - if (ProfileInterpreter) { - __ set_method_data_pointer_for_bcp(); - } - -#if INCLUDE_JVMTI - { - Label L_done; - - __ ldub(Address(Lbcp, 0), G1_scratch); // Load current bytecode - __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done); - - // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. - // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. - - __ call_VM(G1_scratch, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), I0, Lmethod, Lbcp); - - __ br_null(G1_scratch, false, Assembler::pn, L_done); - __ delayed()->nop(); - - __ st_ptr(G1_scratch, Lesp, wordSize); - __ bind(L_done); - } -#endif // INCLUDE_JVMTI - - // Resume bytecode interpretation at the current bcp - __ dispatch_next(vtos); - // end of JVMTI PopFrame support - - Interpreter::_remove_activation_entry = __ pc(); - - // preserve exception over this code sequence (remove activation calls the vm, but oopmaps are not correct here) - __ pop_ptr(Oexception); // get exception - - // Intel has the following comment: - //// remove the activation (without doing throws on illegalMonitorExceptions) - // They remove the activation without checking for bad monitor state. - // %%% We should make sure this is the right semantics before implementing. - - __ set_vm_result(Oexception); - __ unlock_if_synchronized_method(vtos, /* throw_monitor_exception */ false); - - __ notify_method_exit(false, vtos, InterpreterMacroAssembler::SkipNotifyJVMTI); - - __ get_vm_result(Oexception); - __ verify_oop(Oexception); - - const int return_reg_adjustment = frame::pc_return_offset; - Address issuing_pc_addr(I7, return_reg_adjustment); - - // We are done with this activation frame; find out where to go next. - // The continuation point will be an exception handler, which expects - // the following registers set up: - // - // Oexception: exception - // Oissuing_pc: the local call that threw exception - // Other On: garbage - // In/Ln: the contents of the caller's register window - // - // We do the required restore at the last possible moment, because we - // need to preserve some state across a runtime call. - // (Remember that the caller activation is unknown--it might not be - // interpreted, so things like Lscratch are useless in the caller.) - - // Although the Intel version uses call_C, we can use the more - // compact call_VM. (The only real difference on SPARC is a - // harmlessly ignored [re]set_last_Java_frame, compared with - // the Intel code which lacks this.) - __ mov(Oexception, Oexception ->after_save()); // get exception in I0 so it will be on O0 after restore - __ add(issuing_pc_addr, Oissuing_pc->after_save()); // likewise set I1 to a value local to the caller - __ super_call_VM_leaf(L7_thread_cache, - CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), - G2_thread, Oissuing_pc->after_save()); - - // The caller's SP was adjusted upon method entry to accomodate - // the callee's non-argument locals. Undo that adjustment. - __ JMP(O0, 0); // return exception handler in caller - __ delayed()->restore(I5_savedSP, G0, SP); - - // (same old exception object is already in Oexception; see above) - // Note that an "issuing PC" is actually the next PC after the call -} - - -// -// JVMTI ForceEarlyReturn support -// - -address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { - address entry = __ pc(); - - __ empty_expression_stack(); - __ load_earlyret_value(state); - - __ ld_ptr(G2_thread, JavaThread::jvmti_thread_state_offset(), G3_scratch); - Address cond_addr(G3_scratch, JvmtiThreadState::earlyret_state_offset()); - - // Clear the earlyret state - __ stw(G0 /* JvmtiThreadState::earlyret_inactive */, cond_addr); - - __ remove_activation(state, - /* throw_monitor_exception */ false, - /* install_monitor_exception */ false); - - // The caller's SP was adjusted upon method entry to accomodate - // the callee's non-argument locals. Undo that adjustment. - __ ret(); // return to caller - __ delayed()->restore(I5_savedSP, G0, SP); - - return entry; -} // end of JVMTI ForceEarlyReturn support - - -//------------------------------------------------------------------------------------------------------------------------ -// Helper for vtos entry point generation - -void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) { - assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); - Label L; - aep = __ pc(); __ push_ptr(); __ ba_short(L); - fep = __ pc(); __ push_f(); __ ba_short(L); - dep = __ pc(); __ push_d(); __ ba_short(L); - lep = __ pc(); __ push_l(); __ ba_short(L); - iep = __ pc(); __ push_i(); - bep = cep = sep = iep; // there aren't any - vep = __ pc(); __ bind(L); // fall through - generate_and_dispatch(t); -} - -// -------------------------------------------------------------------------------- - - -InterpreterGenerator::InterpreterGenerator(StubQueue* code) - : TemplateInterpreterGenerator(code) { - generate_all(); // down here so it can be "virtual" -} - -// -------------------------------------------------------------------------------- - -// Non-product code -#ifndef PRODUCT -address TemplateInterpreterGenerator::generate_trace_code(TosState state) { - address entry = __ pc(); - - __ push(state); - __ mov(O7, Lscratch); // protect return address within interpreter - - // Pass a 0 (not used in sparc) and the top of stack to the bytecode tracer - __ mov( Otos_l2, G3_scratch ); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), G0, Otos_l1, G3_scratch); - __ mov(Lscratch, O7); // restore return address - __ pop(state); - __ retl(); - __ delayed()->nop(); - - return entry; -} - - -// helpers for generate_and_dispatch - -void TemplateInterpreterGenerator::count_bytecode() { - __ inc_counter(&BytecodeCounter::_counter_value, G3_scratch, G4_scratch); -} - - -void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { - __ inc_counter(&BytecodeHistogram::_counters[t->bytecode()], G3_scratch, G4_scratch); -} - - -void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { - AddressLiteral index (&BytecodePairHistogram::_index); - AddressLiteral counters((address) &BytecodePairHistogram::_counters); - - // get index, shift out old bytecode, bring in new bytecode, and store it - // _index = (_index >> log2_number_of_codes) | - // (bytecode << log2_number_of_codes); - - __ load_contents(index, G4_scratch); - __ srl( G4_scratch, BytecodePairHistogram::log2_number_of_codes, G4_scratch ); - __ set( ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes, G3_scratch ); - __ or3( G3_scratch, G4_scratch, G4_scratch ); - __ store_contents(G4_scratch, index, G3_scratch); - - // bump bucket contents - // _counters[_index] ++; - - __ set(counters, G3_scratch); // loads into G3_scratch - __ sll( G4_scratch, LogBytesPerWord, G4_scratch ); // Index is word address - __ add (G3_scratch, G4_scratch, G3_scratch); // Add in index - __ ld (G3_scratch, 0, G4_scratch); - __ inc (G4_scratch); - __ st (G4_scratch, 0, G3_scratch); -} - - -void TemplateInterpreterGenerator::trace_bytecode(Template* t) { - // Call a little run-time stub to avoid blow-up for each bytecode. - // The run-time runtime saves the right registers, depending on - // the tosca in-state for the given template. - address entry = Interpreter::trace_code(t->tos_in()); - guarantee(entry != NULL, "entry must have been generated"); - __ call(entry, relocInfo::none); - __ delayed()->nop(); -} - - -void TemplateInterpreterGenerator::stop_interpreter_at() { - AddressLiteral counter(&BytecodeCounter::_counter_value); - __ load_contents(counter, G3_scratch); - AddressLiteral stop_at(&StopInterpreterAt); - __ load_ptr_contents(stop_at, G4_scratch); - __ cmp(G3_scratch, G4_scratch); - __ breakpoint_trap(Assembler::equal, Assembler::icc); -} -#endif // not PRODUCT -#endif // !CC_INTERP diff --git a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp index 08a293f99a2..de467071b1b 100644 --- a/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp +++ b/hotspot/src/cpu/x86/vm/interp_masm_x86.hpp @@ -175,6 +175,7 @@ class InterpreterMacroAssembler: public MacroAssembler { movptr(rsp, Address(rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize)); // NULL last_sp until next java call movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); + NOT_LP64(empty_FPU_stack()); } // Helpers for swap and dup diff --git a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp index 47ae7aeb26c..a0e3f0685b6 100644 --- a/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,7 +38,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -184,20 +183,3 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin return entry_point; } - - -void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { - - // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in - // the days we had adapter frames. When we deoptimize a situation where a - // compiled caller calls a compiled caller will have registers it expects - // to survive the call to the callee. If we deoptimize the callee the only - // way we can restore these registers is to have the oldest interpreter - // frame that we create restore these values. That is what this routine - // will accomplish. - - // At the moment we have modified c2 to not have any callee save registers - // so this problem does not exist and this routine is just a place holder. - - assert(f->is_interpreted_frame(), "must be interpreted"); -} diff --git a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp index cf3e45849bc..42d7fecb8b1 100644 --- a/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/interpreter_x86_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,7 +38,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -298,19 +297,3 @@ address InterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKin return entry_point; } - -void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { - - // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in - // the days we had adapter frames. When we deoptimize a situation where a - // compiled caller calls a compiled caller will have registers it expects - // to survive the call to the callee. If we deoptimize the callee the only - // way we can restore these registers is to have the oldest interpreter - // frame that we create restore these values. That is what this routine - // will accomplish. - - // At the moment we have modified c2 to not have any callee save registers - // so this problem does not exist and this routine is just a place holder. - - assert(f->is_interpreted_frame(), "must be interpreted"); -} diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp similarity index 79% rename from hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp rename to hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp index cea18ba83a7..e011552dfcb 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86.cpp @@ -51,6 +51,10 @@ #ifndef CC_INTERP +// Global Register Names +static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi); +static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi); + const int method_offset = frame::interpreter_frame_method_offset * wordSize; const int bcp_offset = frame::interpreter_frame_bcp_offset * wordSize; const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; @@ -95,12 +99,13 @@ address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler( __ empty_expression_stack(); // setup parameters // ??? convention: expect aberrant index in register ebx - __ lea(c_rarg1, ExternalAddress((address)name)); + Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); + __ lea(rarg, ExternalAddress((address)name)); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime:: throw_ArrayIndexOutOfBoundsException), - c_rarg1, rbx); + rarg, rbx); return entry; } @@ -108,7 +113,8 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() { address entry = __ pc(); // object is at TOS - __ pop(c_rarg1); + Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); + __ pop(rarg); // expression stack must be empty before entering the VM if an // exception happened @@ -118,7 +124,7 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() { CAST_FROM_FN_PTR(address, InterpreterRuntime:: throw_ClassCastException), - c_rarg1); + rarg); return entry; } @@ -126,31 +132,35 @@ address TemplateInterpreterGenerator::generate_exception_handler_common( const char* name, const char* message, bool pass_oop) { assert(!pass_oop || message == NULL, "either oop or message but not both"); address entry = __ pc(); + + Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); + Register rarg2 = NOT_LP64(rbx) LP64_ONLY(c_rarg2); + if (pass_oop) { // object is at TOS - __ pop(c_rarg2); + __ pop(rarg2); } // expression stack must be empty before entering the VM if an // exception happened __ empty_expression_stack(); // setup parameters - __ lea(c_rarg1, ExternalAddress((address)name)); + __ lea(rarg, ExternalAddress((address)name)); if (pass_oop) { __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime:: create_klass_exception), - c_rarg1, c_rarg2); + rarg, rarg2); } else { // kind of lame ExternalAddress can't take NULL because // external_word_Relocation will assert. if (message != NULL) { - __ lea(c_rarg2, ExternalAddress((address)message)); + __ lea(rarg2, ExternalAddress((address)message)); } else { - __ movptr(c_rarg2, NULL_WORD); + __ movptr(rarg2, NULL_WORD); } __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), - c_rarg1, c_rarg2); + rarg, rarg2); } // throw exception __ jump(ExternalAddress(Interpreter::throw_exception_entry())); @@ -170,6 +180,30 @@ address TemplateInterpreterGenerator::generate_continuation_for(TosState state) address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { address entry = __ pc(); +#ifndef _LP64 +#ifdef COMPILER2 + // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases + if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { + for (int i = 1; i < 8; i++) { + __ ffree(i); + } + } else if (UseSSE < 2) { + __ empty_FPU_stack(); + } +#endif // COMPILER2 + if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { + __ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled"); + } else { + __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled"); + } + + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter"); + } +#endif // _LP64 + // Restore stack bottom in case i2c adjusted stack __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); // and NULL it as marker that esp is now tos until next java call @@ -200,18 +234,29 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { address entry = __ pc(); + +#ifndef _LP64 + if (state == ftos) { + __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter"); + } else if (state == dtos) { + __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter"); + } +#endif // _LP64 + // NULL last_sp until next java call __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); __ restore_bcp(); __ restore_locals(); + const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); #if INCLUDE_JVMCI // Check if we need to take lock at entry of synchronized method. if (UseJVMCICompiler) { Label L; - __ cmpb(Address(r15_thread, JavaThread::pending_monitorenter_offset()), 0); + __ cmpb(Address(thread, JavaThread::pending_monitorenter_offset()), 0); __ jcc(Assembler::zero, L); // Clear flag. - __ movb(Address(r15_thread, JavaThread::pending_monitorenter_offset()), 0); + __ movb(Address(thread, JavaThread::pending_monitorenter_offset()), 0); // Satisfy calling convention for lock_method(). __ get_method(rbx); // Take lock. @@ -222,7 +267,7 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i // handle exceptions { Label L; - __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); + __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); __ jcc(Assembler::zero, L); __ call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -234,41 +279,52 @@ address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, i return entry; } -int AbstractInterpreter::BasicType_as_index(BasicType type) { - int i = 0; - switch (type) { - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : i = 4; break; - case T_LONG : i = 5; break; - case T_VOID : i = 6; break; - case T_FLOAT : i = 7; break; - case T_DOUBLE : i = 8; break; - case T_OBJECT : i = 9; break; - case T_ARRAY : i = 9; break; - default : ShouldNotReachHere(); - } - assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, - "index out of bounds"); - return i; -} - - address TemplateInterpreterGenerator::generate_result_handler_for( BasicType type) { address entry = __ pc(); switch (type) { case T_BOOLEAN: __ c2bool(rax); break; +#ifndef _LP64 + case T_CHAR : __ andptr(rax, 0xFFFF); break; +#else case T_CHAR : __ movzwl(rax, rax); break; +#endif // _LP64 case T_BYTE : __ sign_extend_byte(rax); break; case T_SHORT : __ sign_extend_short(rax); break; case T_INT : /* nothing to do */ break; case T_LONG : /* nothing to do */ break; case T_VOID : /* nothing to do */ break; +#ifndef _LP64 + case T_DOUBLE : + case T_FLOAT : + { const Register t = InterpreterRuntime::SignatureHandlerGenerator::temp(); + __ pop(t); // remove return address first + // Must return a result for interpreter or compiler. In SSE + // mode, results are returned in xmm0 and the FPU stack must + // be empty. + if (type == T_FLOAT && UseSSE >= 1) { + // Load ST0 + __ fld_d(Address(rsp, 0)); + // Store as float and empty fpu stack + __ fstp_s(Address(rsp, 0)); + // and reload + __ movflt(xmm0, Address(rsp, 0)); + } else if (type == T_DOUBLE && UseSSE >= 2 ) { + __ movdbl(xmm0, Address(rsp, 0)); + } else { + // restore ST0 + __ fld_d(Address(rsp, 0)); + } + // and pop the temp + __ addptr(rsp, 2 * wordSize); + __ push(t); // restore return address + } + break; +#else case T_FLOAT : /* nothing to do */ break; case T_DOUBLE : /* nothing to do */ break; +#endif // _LP64 + case T_OBJECT : // retrieve result from frame __ movptr(rax, Address(rbp, frame::interpreter_frame_oop_temp_offset*wordSize)); @@ -303,7 +359,7 @@ address TemplateInterpreterGenerator::generate_safept_entry_for( // so we have a 'sticky' overflow test // // rbx: method -// ecx: invocation counter +// rcx: invocation counter // void InterpreterGenerator::generate_counter_incr( Label* overflow, @@ -383,10 +439,10 @@ void InterpreterGenerator::generate_counter_incr( void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { // Asm interpreter on entry - // r14 - locals - // r13 - bcp + // r14/rdi - locals + // r13/rsi - bcp // rbx - method - // edx - cpool --- DOES NOT APPEAR TO BE TRUE + // rdx - cpool --- DOES NOT APPEAR TO BE TRUE // rbp - interpreter frame // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] @@ -400,11 +456,12 @@ void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { // of the verified entry point for the method or NULL if the // compilation did not complete (either went background or bailed // out). - __ movl(c_rarg1, 0); + Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); + __ movl(rarg, 0); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), - c_rarg1); + rarg); __ movptr(rbx, Address(rbp, method_offset)); // restore Method* // Preserve invariant that r13/r14 contain bcp/locals of sender frame @@ -450,8 +507,15 @@ void InterpreterGenerator::generate_stack_overflow_check(void) { // compute rsp as if this were going to be the last frame on // the stack before the red zone - const Address stack_base(r15_thread, Thread::stack_base_offset()); - const Address stack_size(r15_thread, Thread::stack_size_offset()); + Label after_frame_check_pop; + const Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread); +#ifndef _LP64 + __ push(thread); + __ get_thread(thread); +#endif + + const Address stack_base(thread, Thread::stack_base_offset()); + const Address stack_size(thread, Thread::stack_size_offset()); // locals + overhead, in bytes __ mov(rax, rdx); @@ -485,20 +549,25 @@ void InterpreterGenerator::generate_stack_overflow_check(void) { // check against the current stack bottom __ cmpptr(rsp, rax); - __ jcc(Assembler::above, after_frame_check); + + __ jcc(Assembler::above, after_frame_check_pop); + NOT_LP64(__ pop(rsi)); // get saved bcp // Restore sender's sp as SP. This is necessary if the sender's // frame is an extended compiled frame (see gen_c2i_adapter()) // and safer anyway in case of JSR292 adaptations. __ pop(rax); // return address must be moved if SP is changed - __ mov(rsp, r13); + __ mov(rsp, rbcp); __ push(rax); // Note: the restored frame is not necessarily interpreted. // Use the shared runtime version of the StackOverflowError. assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry())); + // all done with frame size check + __ bind(after_frame_check_pop); + NOT_LP64(__ pop(rsi)); // all done with frame size check __ bind(after_frame_check); @@ -508,7 +577,7 @@ void InterpreterGenerator::generate_stack_overflow_check(void) { // // Args: // rbx: Method* -// r14: locals +// r14/rdi: locals // // Kills: // rax @@ -540,7 +609,7 @@ void TemplateInterpreterGenerator::lock_method() { __ movl(rax, access_flags); __ testl(rax, JVM_ACC_STATIC); // get receiver (assume this is frequent case) - __ movptr(rax, Address(r14, Interpreter::local_offset_in_bytes(0))); + __ movptr(rax, Address(rlocals, Interpreter::local_offset_in_bytes(0))); __ jcc(Assembler::zero, done); __ movptr(rax, Address(rbx, Method::const_offset())); __ movptr(rax, Address(rax, ConstMethod::constants_offset())); @@ -566,8 +635,9 @@ void TemplateInterpreterGenerator::lock_method() { __ movptr(monitor_block_top, rsp); // set new monitor block top // store object __ movptr(Address(rsp, BasicObjectLock::obj_offset_in_bytes()), rax); - __ movptr(c_rarg1, rsp); // object address - __ lock_object(c_rarg1); + const Register lockreg = NOT_LP64(rdx) LP64_ONLY(c_rarg1); + __ movptr(lockreg, rsp); // object address + __ lock_object(lockreg); } // Generate a fixed interpreter frame. This is identical setup for @@ -576,17 +646,17 @@ void TemplateInterpreterGenerator::lock_method() { // Args: // rax: return address // rbx: Method* -// r14: pointer to locals -// r13: sender sp +// r14/rdi: pointer to locals +// r13/rsi: sender sp // rdx: cp cache void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { // initialize fixed part of activation frame __ push(rax); // save return address __ enter(); // save old & set new rbp - __ push(r13); // set sender sp + __ push(rbcp); // set sender sp __ push((int)NULL_WORD); // leave last_sp as null - __ movptr(r13, Address(rbx, Method::const_offset())); // get ConstMethod* - __ lea(r13, Address(r13, ConstMethod::codes_offset())); // get codebase + __ movptr(rbcp, Address(rbx, Method::const_offset())); // get ConstMethod* + __ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase __ push(rbx); // save Method* if (ProfileInterpreter) { Label method_data_continue; @@ -604,11 +674,11 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { __ movptr(rdx, Address(rdx, ConstMethod::constants_offset())); __ movptr(rdx, Address(rdx, ConstantPool::cache_offset_in_bytes())); __ push(rdx); // set constant pool cache - __ push(r14); // set locals pointer + __ push(rlocals); // set locals pointer if (native_call) { __ push(0); // no bcp } else { - __ push(r13); // set bcp + __ push(rbcp); // set bcp } __ push(0); // reserve word for pointer to expression stack bottom __ movptr(Address(rsp, 0), rsp); // set expression stack bottom @@ -667,6 +737,10 @@ address InterpreterGenerator::generate_Reference_get_entry(void) { // rdx: scratch // rdi: scratch + // Preserve the sender sp in case the pre-barrier + // calls the runtime + NOT_LP64(__ push(rsi)); + // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. @@ -674,18 +748,23 @@ address InterpreterGenerator::generate_Reference_get_entry(void) { const Address field_address(rax, referent_offset); __ load_heap_oop(rax, field_address); + const Register sender_sp = NOT_LP64(rsi) LP64_ONLY(r13); + const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); + // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. __ g1_write_barrier_pre(noreg /* obj */, rax /* pre_val */, - r15_thread /* thread */, + thread /* thread */, rbx /* tmp */, true /* tosca_live */, true /* expand_call */); // _areturn + NOT_LP64(__ pop(rsi)); // get sender sp __ pop(rdi); // get return address - __ mov(rsp, r13); // set sp to sender sp + __ mov(rsp, sender_sp); // set sp to sender sp __ jmp(rdi); __ ret(0); @@ -701,169 +780,6 @@ address InterpreterGenerator::generate_Reference_get_entry(void) { return NULL; } -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address InterpreterGenerator::generate_CRC32_update_entry() { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rbx,: Method* - // r13: senderSP must preserved for slow path, set SP to it on fast path - // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) - // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), - SafepointSynchronize::_not_synchronized); - __ jcc(Assembler::notEqual, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register val = c_rarg0; // source java byte value - const Register tbl = c_rarg1; // scratch - - // Arguments are reversed on java expression stack - __ movl(val, Address(rsp, wordSize)); // byte value - __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC - - __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); - __ notl(crc); // ~crc - __ update_byte_crc32(crc, val, tbl); - __ notl(crc); // ~crc - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, r13); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rbx,: Method* - // r13: senderSP must preserved for slow path, set SP to it on fast path - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), - SafepointSynchronize::_not_synchronized); - __ jcc(Assembler::notEqual, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = c_rarg0; // crc - const Register buf = c_rarg1; // source java byte array address - const Register len = c_rarg2; // length - const Register off = len; // offset (never overlaps with 'len') - - // Arguments are reversed on java expression stack - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ movptr(buf, Address(rsp, 3*wordSize)); // long buf - __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset - __ addq(buf, off); // + offset - __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset - __ addq(buf, off); // + offset - __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC - } - // Can now load 'len' since we're finished with 'off' - __ movl(len, Address(rsp, wordSize)); // Length - - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, r13); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** -* Method entry for static native methods: -* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) -* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) -*/ -address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32CIntrinsics) { - address entry = __ pc(); - // Load parameters - const Register crc = c_rarg0; // crc - const Register buf = c_rarg1; // source java byte array address - const Register len = c_rarg2; - const Register off = c_rarg3; // offset - const Register end = len; - - // Arguments are reversed on java expression stack - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { - __ movptr(buf, Address(rsp, 3 * wordSize)); // long buf - __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset - __ addq(buf, off); // + offset - __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC - // Note on 5 * wordSize vs. 4 * wordSize: - // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) - // 4 2,3 1 0 - // end starts at SP + 8 - // The Java(R) Virtual Machine Specification Java SE 7 Edition - // 4.10.2.3. Values of Types long and double - // "When calculating operand stack length, values of type long and double have length two." - } else { - __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset - __ addq(buf, off); // + offset - __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC - } - __ movl(end, Address(rsp, wordSize)); // end - __ subl(end, off); // end - off - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); - // result in rax - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, r13); // set sp to sender sp - __ jmp(rdi); - - return entry; - } - - return NULL; -} - // Interpreter stub for calling a native method. (asm interpreter) // This sets up a somewhat different looking stack for calling the // native method than the typical interpreter frame setup. @@ -872,7 +788,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; // rbx: Method* - // r13: sender sp + // rbcp: sender sp address entry_point = __ pc(); @@ -892,13 +808,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // rbx: Method* // rcx: size of parameters - // r13: sender sp + // rbcp: sender sp __ pop(rax); // get return address // for natives the size of locals is zero - // compute beginning of parameters (r14) - __ lea(r14, Address(rsp, rcx, Address::times_8, -wordSize)); + // compute beginning of parameters + __ lea(rlocals, Address(rsp, rcx, Interpreter::stackElementScale(), -wordSize)); // add 2 zero-initialized slots for native calls // initialize result_handler slot @@ -935,7 +851,9 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // _do_not_unlock_if_synchronized to true. The remove_activation will // check this flag. - const Address do_not_unlock_if_synchronized(r15_thread, + const Register thread1 = NOT_LP64(rax) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread1)); + const Address do_not_unlock_if_synchronized(thread1, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); __ movbool(do_not_unlock_if_synchronized, true); @@ -951,6 +869,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { bang_stack_shadow_pages(true); // reset the _do_not_unlock_if_synchronized flag + NOT_LP64(__ get_thread(thread1)); __ movbool(do_not_unlock_if_synchronized, false); // check for synchronized methods @@ -991,17 +910,26 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // work registers const Register method = rbx; - const Register t = r11; + const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); + const Register t = NOT_LP64(rcx) LP64_ONLY(r11); // allocate space for parameters __ get_method(method); __ movptr(t, Address(method, Method::const_offset())); __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); + +#ifndef _LP64 + __ shlptr(t, Interpreter::logStackElementSize); + __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror + __ subptr(rsp, t); + __ andptr(rsp, -(StackAlignmentInBytes)); // gcc needs 16 byte aligned stacks to do XMM intrinsics +#else __ shll(t, Interpreter::logStackElementSize); __ subptr(rsp, t); __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI) +#endif // _LP64 // get signature handler { @@ -1019,12 +947,12 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { } // call signature handler - assert(InterpreterRuntime::SignatureHandlerGenerator::from() == r14, + assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals, "adjust this code"); assert(InterpreterRuntime::SignatureHandlerGenerator::to() == rsp, "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, - "adjust this code"); + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == NOT_LP64(t) LP64_ONLY(rscratch1), + "adjust this code"); // The generated handlers do not touch RBX (the method oop). // However, large signatures cannot be cached and are generated @@ -1056,8 +984,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { __ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize), t); // pass handle to mirror +#ifndef _LP64 + __ lea(t, Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize)); + __ movptr(Address(rsp, wordSize), t); +#else __ lea(c_rarg1, Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize)); +#endif // _LP64 __ bind(L); } @@ -1066,8 +999,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { Label L; __ movptr(rax, Address(method, Method::native_function_offset())); ExternalAddress unsatisfied(SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); - __ movptr(rscratch2, unsatisfied.addr()); - __ cmpptr(rax, rscratch2); + __ cmpptr(rax, unsatisfied.addr()); __ jcc(Assembler::notEqual, L); __ call_VM(noreg, CAST_FROM_FN_PTR(address, @@ -1079,17 +1011,28 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { } // pass JNIEnv - __ lea(c_rarg0, Address(r15_thread, JavaThread::jni_environment_offset())); +#ifndef _LP64 + __ get_thread(thread); + __ lea(t, Address(thread, JavaThread::jni_environment_offset())); + __ movptr(Address(rsp, 0), t); - // It is enough that the pc() points into the right code - // segment. It does not have to be the correct return pc. - __ set_last_Java_frame(rsp, rbp, (address) __ pc()); + // set_last_Java_frame_before_call + // It is enough that the pc() + // points into the right code segment. It does not have to be the correct return pc. + __ set_last_Java_frame(thread, noreg, rbp, __ pc()); +#else + __ lea(c_rarg0, Address(r15_thread, JavaThread::jni_environment_offset())); + + // It is enough that the pc() points into the right code + // segment. It does not have to be the correct return pc. + __ set_last_Java_frame(rsp, rbp, (address) __ pc()); +#endif // _LP64 // change thread state #ifdef ASSERT { Label L; - __ movl(t, Address(r15_thread, JavaThread::thread_state_offset())); + __ movl(t, Address(thread, JavaThread::thread_state_offset())); __ cmpl(t, _thread_in_Java); __ jcc(Assembler::equal, L); __ stop("Wrong thread state in native stub"); @@ -1099,12 +1042,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // Change state to native - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), + __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); // Call the native method. __ call(rax); - // result potentially in rax or xmm0 + // 32: result potentially in rdx:rax or ST0 + // 64: result potentially in rax or xmm0 // Verify or restore cpu control state after JNI call __ restore_cpu_control_state_after_jni(); @@ -1114,11 +1058,40 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // pushes change or anything else is added to the stack then the code in // interpreter_frame_result must also change. +#ifndef _LP64 + // save potential result in ST(0) & rdx:rax + // (if result handler is the T_FLOAT or T_DOUBLE handler, result must be in ST0 - + // the check is necessary to avoid potential Intel FPU overflow problems by saving/restoring 'empty' FPU registers) + // It is safe to do this push because state is _thread_in_native and return address will be found + // via _last_native_pc and not via _last_jave_sp + + // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. + // If the order changes or anything else is added to the stack the code in + // interpreter_frame_result will have to be changed. + + { Label L; + Label push_double; + ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT)); + ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE)); + __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize), + float_handler.addr()); + __ jcc(Assembler::equal, push_double); + __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize), + double_handler.addr()); + __ jcc(Assembler::notEqual, L); + __ bind(push_double); + __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0). + __ bind(L); + } +#else __ push(dtos); +#endif // _LP64 + __ push(ltos); // change thread state - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), + NOT_LP64(__ get_thread(thread)); + __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); if (os::is_MP()) { @@ -1132,10 +1105,17 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // We use the current thread pointer to calculate a thread specific // offset to write to within the page. This minimizes bus traffic // due to cache line collision. - __ serialize_memory(r15_thread, rscratch2); + __ serialize_memory(thread, rcx); } } +#ifndef _LP64 + if (AlwaysRestoreFPU) { + // Make sure the control word is correct. + __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); + } +#endif // _LP64 + // check for safepoint operation in progress and/or pending suspend requests { Label Continue; @@ -1144,7 +1124,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { Label L; __ jcc(Assembler::notEqual, L); - __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0); + __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0); __ jcc(Assembler::equal, Continue); __ bind(L); @@ -1155,6 +1135,13 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // preserved and correspond to the bcp/locals pointers. So we do a // runtime call by hand. // +#ifndef _LP64 + __ push(thread); + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, + JavaThread::check_special_condition_for_native_trans))); + __ increment(rsp, wordSize); + __ get_thread(thread); +#else __ mov(c_rarg0, r15_thread); __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows @@ -1162,17 +1149,18 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); +#endif // _LP64 __ bind(Continue); } // change thread state - __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_Java); + __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java); // reset_last_Java_frame - __ reset_last_Java_frame(true, true); + __ reset_last_Java_frame(thread, true, true); // reset handle block - __ movptr(t, Address(r15_thread, JavaThread::active_handles_offset())); + __ movptr(t, Address(thread, JavaThread::active_handles_offset())); __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), (int32_t)NULL_WORD); // If result is an oop unbox and store it in frame where gc will see it @@ -1190,7 +1178,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { __ movptr(rax, Address(rax, 0)); __ bind(store_result); __ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset*wordSize), rax); - // keep stack depth as expected by pushing oop which will eventually be discarde + // keep stack depth as expected by pushing oop which will eventually be discarded __ push(ltos); __ bind(no_oop); } @@ -1198,11 +1186,15 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { { Label no_reguard; - __ cmpl(Address(r15_thread, JavaThread::stack_guard_state_offset()), + __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled); __ jcc(Assembler::notEqual, no_reguard); __ pusha(); // XXX only save smashed registers +#ifndef _LP64 + __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); + __ popa(); +#else __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // align stack as required by ABI @@ -1210,6 +1202,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { __ mov(rsp, r12); // restore sp __ popa(); // XXX only restore smashed registers __ reinit_heapbase(); +#endif // _LP64 __ bind(no_reguard); } @@ -1220,14 +1213,14 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // restored. Need bcp for throwing exception below so get it now. __ get_method(method); - // restore r13 to have legal interpreter frame, i.e., bci == 0 <=> - // r13 == code_base() - __ movptr(r13, Address(method, Method::const_offset())); // get ConstMethod* - __ lea(r13, Address(r13, ConstMethod::codes_offset())); // get codebase + // restore to have legal interpreter frame, i.e., bci == 0 <=> code_base() + __ movptr(rbcp, Address(method, Method::const_offset())); // get ConstMethod* + __ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase + // handle exceptions (exception handling will handle unlocking!) { Label L; - __ cmpptr(Address(r15_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); + __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD); __ jcc(Assembler::zero, L); // Note: At some point we may want to unify this with the code // used in call_VM_base(); i.e., we should use the @@ -1255,12 +1248,14 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // has not been unlocked by an explicit monitorexit bytecode. const Address monitor(rbp, (intptr_t)(frame::interpreter_frame_initial_sp_offset * - wordSize - sizeof(BasicObjectLock))); + wordSize - (int)sizeof(BasicObjectLock))); + + const Register regmon = NOT_LP64(rdx) LP64_ONLY(c_rarg1); // monitor expect in c_rarg1 for slow unlock path - __ lea(c_rarg1, monitor); // address of first monitor + __ lea(regmon, monitor); // address of first monitor - __ movptr(t, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes())); + __ movptr(t, Address(regmon, BasicObjectLock::obj_offset_in_bytes())); __ testptr(t, t); __ jcc(Assembler::notZero, unlock); @@ -1271,7 +1266,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { __ should_not_reach_here(); __ bind(unlock); - __ unlock_object(c_rarg1); + __ unlock_object(regmon); } __ bind(L); } @@ -1287,7 +1282,7 @@ address InterpreterGenerator::generate_native_entry(bool synchronized) { // restore potential result in ST0 & handle result __ pop(ltos); - __ pop(dtos); + LP64_ONLY( __ pop(dtos)); __ movptr(t, Address(rbp, (frame::interpreter_frame_result_handler_offset) * wordSize)); @@ -1319,7 +1314,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; // ebx: Method* - // r13: sender sp + // rbcp: sender sp address entry_point = __ pc(); const Address constMethod(rbx, Method::const_offset()); @@ -1335,7 +1330,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // rbx: Method* // rcx: size of parameters - // r13: sender_sp (could differ from sp+wordSize if we were called via c2i ) + // rbcp: sender_sp (could differ from sp+wordSize if we were called via c2i ) __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words __ subl(rdx, rcx); // rdx = no. of additional locals @@ -1350,8 +1345,8 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // get return address __ pop(rax); - // compute beginning of parameters (r14) - __ lea(r14, Address(rsp, rcx, Address::times_8, -wordSize)); + // compute beginning of parameters + __ lea(rlocals, Address(rsp, rcx, Interpreter::stackElementScale(), -wordSize)); // rdx - # of additional locals // allocate space for locals @@ -1395,7 +1390,9 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { // _do_not_unlock_if_synchronized to true. The remove_activation // will check this flag. - const Address do_not_unlock_if_synchronized(r15_thread, + const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); + const Address do_not_unlock_if_synchronized(thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); __ movbool(do_not_unlock_if_synchronized, true); @@ -1420,6 +1417,7 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { bang_stack_shadow_pages(false); // reset the _do_not_unlock_if_synchronized flag + NOT_LP64(__ get_thread(thread)); __ movbool(do_not_unlock_if_synchronized, false); // check for synchronized methods @@ -1479,42 +1477,6 @@ address InterpreterGenerator::generate_normal_entry(bool synchronized) { return entry_point; } - -// These should never be compiled since the interpreter will prefer -// the compiled version to the intrinsic version. -bool AbstractInterpreter::can_be_compiled(methodHandle m) { - switch (method_kind(m)) { - case Interpreter::java_lang_math_sin : // fall thru - case Interpreter::java_lang_math_cos : // fall thru - case Interpreter::java_lang_math_tan : // fall thru - case Interpreter::java_lang_math_abs : // fall thru - case Interpreter::java_lang_math_log : // fall thru - case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : // fall thru - case Interpreter::java_lang_math_pow : // fall thru - case Interpreter::java_lang_math_exp : - return false; - default: - return true; - } -} - -// How much stack a method activation needs in words. -int AbstractInterpreter::size_top_interpreter_activation(Method* method) { - const int entry_size = frame::interpreter_frame_monitor_size(); - - // total overhead size: entry_size + (saved rbp thru expr stack - // bottom). be sure to change this if you add/subtract anything - // to/from the overhead area - const int overhead_size = - -(frame::interpreter_frame_initial_sp_offset) + entry_size; - - const int stub_code = frame::entry_frame_after_call_words; - const int method_stack = (method->max_locals() + method->max_stack()) * - Interpreter::stackElementWords; - return (overhead_size + method_stack + stub_code); -} - //----------------------------------------------------------------------------- // Exceptions @@ -1527,16 +1489,17 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); // rax: exception // rdx: return address/pc that threw exception - __ restore_bcp(); // r13 points to call/send + __ restore_bcp(); // r13/rsi points to call/send __ restore_locals(); - __ reinit_heapbase(); // restore r12 as heapbase. + LP64_ONLY(__ reinit_heapbase()); // restore r12 as heapbase. // Entry point for exceptions thrown within interpreter code Interpreter::_throw_exception_entry = __ pc(); // expression stack is undefined here // rax: exception - // r13: exception bcp + // r13/rsi: exception bcp __ verify_oop(rax); - __ mov(c_rarg1, rax); + Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); + LP64_ONLY(__ mov(c_rarg1, rax)); // expression stack must be empty before entering the VM in case of // an exception @@ -1545,10 +1508,10 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ call_VM(rdx, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), - c_rarg1); + rarg); // rax: exception handler entry point // rdx: preserved exception oop - // r13: bcp for exception handler + // r13/rsi: bcp for exception handler __ push_ptr(rdx); // push exception which is now the only value on the stack __ jmp(rax); // jump to exception handler (may be _remove_activation_entry!) @@ -1575,9 +1538,11 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // indicating that we are currently handling popframe, so that // call_VMs that may happen later do not trigger new popframe // handling cycles. - __ movl(rdx, Address(r15_thread, JavaThread::popframe_condition_offset())); + const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); + __ movl(rdx, Address(thread, JavaThread::popframe_condition_offset())); __ orl(rdx, JavaThread::popframe_processing_bit); - __ movl(Address(r15_thread, JavaThread::popframe_condition_offset()), rdx); + __ movl(Address(thread, JavaThread::popframe_condition_offset()), rdx); { // Check to see whether we are returning to a deoptimized frame. @@ -1591,9 +1556,10 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // deoptimization blob's unpack entry because of the presence of // adapter frames in C2. Label caller_not_deoptimized; - __ movptr(c_rarg1, Address(rbp, frame::return_addr_offset * wordSize)); + Register rarg = NOT_LP64(rdx) LP64_ONLY(c_rarg1); + __ movptr(rarg, Address(rbp, frame::return_addr_offset * wordSize)); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, - InterpreterRuntime::interpreter_contains), c_rarg1); + InterpreterRuntime::interpreter_contains), rarg); __ testl(rax, rax); __ jcc(Assembler::notZero, caller_not_deoptimized); @@ -1604,14 +1570,15 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod:: size_of_parameters_offset()))); __ shll(rax, Interpreter::logStackElementSize); - __ restore_locals(); // XXX do we need this? - __ subptr(r14, rax); - __ addptr(r14, wordSize); + __ restore_locals(); + __ subptr(rlocals, rax); + __ addptr(rlocals, wordSize); // Save these arguments + NOT_LP64(__ get_thread(thread)); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization:: popframe_preserve_args), - r15_thread, rax, r14); + thread, rax, rlocals); __ remove_activation(vtos, rdx, /* throw_monitor_exception */ false, @@ -1620,7 +1587,8 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // Inform deoptimization that it is responsible for restoring // these arguments - __ movl(Address(r15_thread, JavaThread::popframe_condition_offset()), + NOT_LP64(__ get_thread(thread)); + __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_force_deopt_reexecution_bit); // Continue in deoptimization handler @@ -1645,18 +1613,29 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // maintain this kind of invariant all the time we call a small // fixup routine to move the mutated arguments onto the top of our // expression stack if necessary. +#ifndef _LP64 + __ mov(rax, rsp); + __ movptr(rbx, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); + __ get_thread(thread); + // PC must point into interpreter here + __ set_last_Java_frame(thread, noreg, rbp, __ pc()); + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, rax, rbx); + __ get_thread(thread); +#else __ mov(c_rarg1, rsp); __ movptr(c_rarg2, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); // PC must point into interpreter here __ set_last_Java_frame(noreg, rbp, __ pc()); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), r15_thread, c_rarg1, c_rarg2); - __ reset_last_Java_frame(true, true); +#endif + __ reset_last_Java_frame(thread, true, true); + // Restore the last_sp and null it out __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD); - __ restore_bcp(); // XXX do we need this? - __ restore_locals(); // XXX do we need this? + __ restore_bcp(); + __ restore_locals(); // The method data pointer was incremented already during // call profiling. We have to restore the mdp for the current bcp. if (ProfileInterpreter) { @@ -1664,15 +1643,16 @@ void TemplateInterpreterGenerator::generate_throw_exception() { } // Clear the popframe condition flag - __ movl(Address(r15_thread, JavaThread::popframe_condition_offset()), + NOT_LP64(__ get_thread(thread)); + __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive); #if INCLUDE_JVMTI { Label L_done; - const Register local0 = r14; + const Register local0 = rlocals; - __ cmpb(Address(r13, 0), Bytecodes::_invokestatic); + __ cmpb(Address(rbcp, 0), Bytecodes::_invokestatic); __ jcc(Assembler::notEqual, L_done); // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. @@ -1680,7 +1660,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ get_method(rdx); __ movptr(rax, Address(local0, 0)); - __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, r13); + __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, rbcp); __ testptr(rax, rax); __ jcc(Assembler::zero, L_done); @@ -1697,11 +1677,13 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // preserve exception over this code sequence __ pop_ptr(rax); - __ movptr(Address(r15_thread, JavaThread::vm_result_offset()), rax); + NOT_LP64(__ get_thread(thread)); + __ movptr(Address(thread, JavaThread::vm_result_offset()), rax); // remove the activation (without doing throws on illegalMonitorExceptions) __ remove_activation(vtos, rdx, false, true, false); // restore exception - __ get_vm_result(rax, r15_thread); + NOT_LP64(__ get_thread(thread)); + __ get_vm_result(rax, thread); // In between activations - previous activation type unknown yet // compute continuation point - the continuation point expects the @@ -1715,7 +1697,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ push(rdx); // save return address __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), - r15_thread, rdx); + thread, rdx); __ mov(rbx, rax); // save exception handler __ pop(rdx); // restore return address __ pop(rax); // restore exception @@ -1734,10 +1716,12 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state __ restore_bcp(); __ restore_locals(); __ empty_expression_stack(); - __ load_earlyret_value(state); + __ load_earlyret_value(state); // 32 bits returns value in rdx, so don't reuse - __ movptr(rdx, Address(r15_thread, JavaThread::jvmti_thread_state_offset())); - Address cond_addr(rdx, JvmtiThreadState::earlyret_state_offset()); + const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); + NOT_LP64(__ get_thread(thread)); + __ movptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset())); + Address cond_addr(rcx, JvmtiThreadState::earlyret_state_offset()); // Clear the earlyret state __ movl(cond_addr, JvmtiThreadState::earlyret_inactive); @@ -1768,8 +1752,13 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; aep = __ pc(); __ push_ptr(); __ jmp(L); +#ifndef _LP64 + fep = __ pc(); __ push(ftos); __ jmp(L); + dep = __ pc(); __ push(dtos); __ jmp(L); +#else fep = __ pc(); __ push_f(xmm0); __ jmp(L); dep = __ pc(); __ push_d(xmm0); __ jmp(L); +#endif // _LP64 lep = __ pc(); __ push_l(); __ jmp(L); bep = cep = sep = iep = __ pc(); __ push_i(); @@ -1794,9 +1783,23 @@ InterpreterGenerator::InterpreterGenerator(StubQueue* code) // Non-product code #ifndef PRODUCT + address TemplateInterpreterGenerator::generate_trace_code(TosState state) { address entry = __ pc(); +#ifndef _LP64 + // prepare expression stack + __ pop(rcx); // pop return address so expression stack is 'pure' + __ push(state); // save tosca + + // pass tosca registers as arguments & call tracer + __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), rcx, rax, rdx); + __ mov(rcx, rax); // make sure return address is not destroyed by pop(state) + __ pop(state); // restore tosca + + // return + __ jmp(rcx); +#else __ push(state); __ push(c_rarg0); __ push(c_rarg1); @@ -1815,6 +1818,7 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) { __ pop(c_rarg0); __ pop(state); __ ret(0); // return from result handler +#endif // _LP64 return entry; } @@ -1846,11 +1850,15 @@ void TemplateInterpreterGenerator::trace_bytecode(Template* t) { assert(Interpreter::trace_code(t->tos_in()) != NULL, "entry must have been generated"); +#ifndef _LP64 + __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); +#else __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ andptr(rsp, -16); // align stack as required by ABI __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); +#endif // _LP64 } diff --git a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp new file mode 100644 index 00000000000..d43d2606829 --- /dev/null +++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_32.cpp @@ -0,0 +1,305 @@ +/* + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "runtime/arguments.hpp" + +#define __ _masm-> + + +#ifndef CC_INTERP + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx: Method* + // rsi: senderSP must preserved for slow path, set SP to it on fast path + // rdx: scratch + // rdi: scratch + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = rax; // crc + const Register val = rdx; // source java byte value + const Register tbl = rdi; // scratch + + // Arguments are reversed on java expression stack + __ movl(val, Address(rsp, wordSize)); // byte value + __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC + + __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); + __ notl(crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ notl(crc); // ~crc + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx,: Method* + // rsi: senderSP must preserved for slow path, set SP to it on fast path + // rdx: scratch + // rdi: scratch + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = rax; // crc + const Register buf = rdx; // source java byte array address + const Register len = rdi; // length + + // value x86_32 + // interp. arg ptr ESP + 4 + // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + // 3 2 1 0 + // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + // 4 2,3 1 0 + + // Arguments are reversed on java expression stack + __ movl(len, Address(rsp, 4 + 0)); // Length + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC + } else { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC + } + + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** +* Method entry for static native methods: +* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) +* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) +*/ +address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + // Load parameters + const Register crc = rax; // crc + const Register buf = rcx; // source java byte array address + const Register len = rdx; // length + const Register end = len; + + // value x86_32 + // interp. arg ptr ESP + 4 + // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end) + // 3 2 1 0 + // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end) + // 4 2,3 1 0 + + // Arguments are reversed on java expression stack + __ movl(end, Address(rsp, 4 + 0)); // end + __ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC + } else { + __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset + __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC + } + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); + // result in rax + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set sp to sender sp + __ jmp(rdi); + + return entry; + } + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Float.intBitsToFloat(int bits) + */ +address InterpreterGenerator::generate_Float_intBitsToFloat_entry() { + if (UseSSE >= 1) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movflt(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Float.floatToRawIntBits(float value) + */ +address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() { + if (UseSSE >= 1) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + + +/** + * Method entry for static native method: + * java.lang.Double.longBitsToDouble(long bits) + */ +address InterpreterGenerator::generate_Double_longBitsToDouble_entry() { + if (UseSSE >= 2) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load 'bits' into xmm0 (interpreter returns results in xmm0) + __ movdbl(xmm0, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} + +/** + * Method entry for static native method: + * java.lang.Double.doubleToRawLongBits(double value) + */ +address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { + if (UseSSE >= 2) { + address entry = __ pc(); + + // rsi: the sender's SP + + // Skip safepoint check (compiler intrinsic versions of this method + // do not perform safepoint checks either). + + // Load the parameter (a floating-point value) into rax. + __ movl(rdx, Address(rsp, 2*wordSize)); + __ movl(rax, Address(rsp, wordSize)); + + // Return + __ pop(rdi); // get return address + __ mov(rsp, rsi); // set rsp to the sender's SP + __ jmp(rdi); + return entry; + } + + return NULL; +} +#endif // CC_INTERP diff --git a/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp new file mode 100644 index 00000000000..b77270b02ca --- /dev/null +++ b/hotspot/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "runtime/arguments.hpp" + +#define __ _masm-> + +#ifndef CC_INTERP + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.update(int crc, int b) + */ +address InterpreterGenerator::generate_CRC32_update_entry() { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx,: Method* + // r13: senderSP must preserved for slow path, set SP to it on fast path + // c_rarg0: scratch (rdi on non-Win64, rcx on Win64) + // c_rarg1: scratch (rsi on non-Win64, rdx on Win64) + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = rax; // crc + const Register val = c_rarg0; // source java byte value + const Register tbl = c_rarg1; // scratch + + // Arguments are reversed on java expression stack + __ movl(val, Address(rsp, wordSize)); // byte value + __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC + + __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); + __ notl(crc); // ~crc + __ update_byte_crc32(crc, val, tbl); + __ notl(crc); // ~crc + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, r13); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** + * Method entry for static native methods: + * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) + */ +address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32Intrinsics) { + address entry = __ pc(); + + // rbx,: Method* + // r13: senderSP must preserved for slow path, set SP to it on fast path + + Label slow_path; + // If we need a safepoint check, generate full interpreter entry. + ExternalAddress state(SafepointSynchronize::address_of_state()); + __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), + SafepointSynchronize::_not_synchronized); + __ jcc(Assembler::notEqual, slow_path); + + // We don't generate local frame and don't align stack because + // we call stub code and there is no safepoint on this path. + + // Load parameters + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; // length + const Register off = len; // offset (never overlaps with 'len') + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { + __ movptr(buf, Address(rsp, 3*wordSize)); // long buf + __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset + __ addq(buf, off); // + offset + __ movl(crc, Address(rsp, 5*wordSize)); // Initial CRC + } else { + __ movptr(buf, Address(rsp, 3*wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ movl2ptr(off, Address(rsp, 2*wordSize)); // offset + __ addq(buf, off); // + offset + __ movl(crc, Address(rsp, 4*wordSize)); // Initial CRC + } + // Can now load 'len' since we're finished with 'off' + __ movl(len, Address(rsp, wordSize)); // Length + + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); + // result in rax + + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, r13); // set sp to sender sp + __ jmp(rdi); + + // generate a vanilla native entry as the slow path + __ bind(slow_path); + __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); + return entry; + } + return NULL; +} + +/** +* Method entry for static native methods: +* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) +* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) +*/ +address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { + if (UseCRC32CIntrinsics) { + address entry = __ pc(); + // Load parameters + const Register crc = c_rarg0; // crc + const Register buf = c_rarg1; // source java byte array address + const Register len = c_rarg2; + const Register off = c_rarg3; // offset + const Register end = len; + + // Arguments are reversed on java expression stack + // Calculate address of start element + if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { + __ movptr(buf, Address(rsp, 3 * wordSize)); // long buf + __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset + __ addq(buf, off); // + offset + __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC + // Note on 5 * wordSize vs. 4 * wordSize: + // * int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) + // 4 2,3 1 0 + // end starts at SP + 8 + // The Java(R) Virtual Machine Specification Java SE 7 Edition + // 4.10.2.3. Values of Types long and double + // "When calculating operand stack length, values of type long and double have length two." + } else { + __ movptr(buf, Address(rsp, 3 * wordSize)); // byte[] array + __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size + __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset + __ addq(buf, off); // + offset + __ movl(crc, Address(rsp, 4 * wordSize)); // Initial CRC + } + __ movl(end, Address(rsp, wordSize)); // end + __ subl(end, off); // end - off + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); + // result in rax + // _areturn + __ pop(rdi); // get return address + __ mov(rsp, r13); // set sp to sender sp + __ jmp(rdi); + + return entry; + } + + return NULL; +} +#endif // ! CC_INTERP diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp index 7a12d82e9af..9b84f71bc3c 100644 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp +++ b/hotspot/src/cpu/x86/vm/templateInterpreter_x86.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -116,4 +116,87 @@ void AbstractInterpreter::layout_activation(Method* method, method->constants()->cache(); } +#ifndef _LP64 +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : // fall through + case T_LONG : // fall through + case T_VOID : i = 4; break; + case T_FLOAT : i = 5; break; // have to treat float and double separately for SSE + case T_DOUBLE : i = 6; break; + case T_OBJECT : // fall through + case T_ARRAY : i = 7; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); + return i; +} +#else +int AbstractInterpreter::BasicType_as_index(BasicType type) { + int i = 0; + switch (type) { + case T_BOOLEAN: i = 0; break; + case T_CHAR : i = 1; break; + case T_BYTE : i = 2; break; + case T_SHORT : i = 3; break; + case T_INT : i = 4; break; + case T_LONG : i = 5; break; + case T_VOID : i = 6; break; + case T_FLOAT : i = 7; break; + case T_DOUBLE : i = 8; break; + case T_OBJECT : i = 9; break; + case T_ARRAY : i = 9; break; + default : ShouldNotReachHere(); + } + assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, + "index out of bounds"); + return i; +} +#endif // _LP64 + +// These should never be compiled since the interpreter will prefer +// the compiled version to the intrinsic version. +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + switch (method_kind(m)) { + case Interpreter::java_lang_math_sin : // fall thru + case Interpreter::java_lang_math_cos : // fall thru + case Interpreter::java_lang_math_tan : // fall thru + case Interpreter::java_lang_math_abs : // fall thru + case Interpreter::java_lang_math_log : // fall thru + case Interpreter::java_lang_math_log10 : // fall thru + case Interpreter::java_lang_math_sqrt : // fall thru + case Interpreter::java_lang_math_pow : // fall thru + case Interpreter::java_lang_math_exp : + return false; + default: + return true; + } +} + +// How much stack a method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + const int entry_size = frame::interpreter_frame_monitor_size(); + + // total overhead size: entry_size + (saved rbp thru expr stack + // bottom). be sure to change this if you add/subtract anything + // to/from the overhead area + const int overhead_size = + -(frame::interpreter_frame_initial_sp_offset) + entry_size; + +#ifndef _LP64 + const int stub_code = 4; // see generate_call_stub +#else + const int stub_code = frame::entry_frame_after_call_words; +#endif + + const int method_stack = (method->max_locals() + method->max_stack()) * + Interpreter::stackElementWords; + return (overhead_size + method_stack + stub_code); +} + #endif // CC_INTERP diff --git a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp deleted file mode 100644 index 6e27d776142..00000000000 --- a/hotspot/src/cpu/x86/vm/templateInterpreter_x86_32.cpp +++ /dev/null @@ -1,1916 +0,0 @@ -/* - * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "interpreter/bytecodeHistogram.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterGenerator.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/templateTable.hpp" -#include "oops/arrayOop.hpp" -#include "oops/methodData.hpp" -#include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "prims/jvmtiExport.hpp" -#include "prims/jvmtiThreadState.hpp" -#include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" -#include "runtime/frame.inline.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/synchronizer.hpp" -#include "runtime/timer.hpp" -#include "runtime/vframeArray.hpp" -#include "utilities/debug.hpp" -#include "utilities/macros.hpp" - -#define __ _masm-> - - -#ifndef CC_INTERP -const int method_offset = frame::interpreter_frame_method_offset * wordSize; -const int bcp_offset = frame::interpreter_frame_bcp_offset * wordSize; -const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; - -//------------------------------------------------------------------------------------------------------------------------ - -address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { - address entry = __ pc(); - - // Note: There should be a minimal interpreter frame set up when stack - // overflow occurs since we check explicitly for it now. - // -#ifdef ASSERT - { Label L; - __ lea(rax, Address(rbp, - frame::interpreter_frame_monitor_block_top_offset * wordSize)); - __ cmpptr(rax, rsp); // rax, = maximal rsp for current rbp, - // (stack grows negative) - __ jcc(Assembler::aboveEqual, L); // check if frame is complete - __ stop ("interpreter frame not set up"); - __ bind(L); - } -#endif // ASSERT - // Restore bcp under the assumption that the current frame is still - // interpreted - __ restore_bcp(); - - // expression stack must be empty before entering the VM if an exception - // happened - __ empty_expression_stack(); - __ empty_FPU_stack(); - // throw exception - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); - return entry; -} - -address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler(const char* name) { - address entry = __ pc(); - // expression stack must be empty before entering the VM if an exception happened - __ empty_expression_stack(); - __ empty_FPU_stack(); - // setup parameters - // ??? convention: expect aberrant index in register rbx, - __ lea(rax, ExternalAddress((address)name)); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), rax, rbx); - return entry; -} - -address TemplateInterpreterGenerator::generate_ClassCastException_handler() { - address entry = __ pc(); - // object is at TOS - __ pop(rax); - // expression stack must be empty before entering the VM if an exception - // happened - __ empty_expression_stack(); - __ empty_FPU_stack(); - __ call_VM(noreg, - CAST_FROM_FN_PTR(address, - InterpreterRuntime::throw_ClassCastException), - rax); - return entry; -} - -address TemplateInterpreterGenerator::generate_exception_handler_common(const char* name, const char* message, bool pass_oop) { - assert(!pass_oop || message == NULL, "either oop or message but not both"); - address entry = __ pc(); - if (pass_oop) { - // object is at TOS - __ pop(rbx); - } - // expression stack must be empty before entering the VM if an exception happened - __ empty_expression_stack(); - __ empty_FPU_stack(); - // setup parameters - __ lea(rax, ExternalAddress((address)name)); - if (pass_oop) { - __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), rax, rbx); - } else { - if (message != NULL) { - __ lea(rbx, ExternalAddress((address)message)); - } else { - __ movptr(rbx, NULL_WORD); - } - __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), rax, rbx); - } - // throw exception - __ jump(ExternalAddress(Interpreter::throw_exception_entry())); - return entry; -} - - -address TemplateInterpreterGenerator::generate_continuation_for(TosState state) { - address entry = __ pc(); - // NULL last_sp until next java call - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - __ dispatch_next(state); - return entry; -} - - -address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { - address entry = __ pc(); - -#ifdef COMPILER2 - // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases - if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { - for (int i = 1; i < 8; i++) { - __ ffree(i); - } - } else if (UseSSE < 2) { - __ empty_FPU_stack(); - } -#endif - if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { - __ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled"); - } else { - __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled"); - } - - if (state == ftos) { - __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter"); - } else if (state == dtos) { - __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter"); - } - - // Restore stack bottom in case i2c adjusted stack - __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); - // and NULL it as marker that rsp is now tos until next java call - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - - __ restore_bcp(); - __ restore_locals(); - - if (state == atos) { - Register mdp = rbx; - Register tmp = rcx; - __ profile_return_type(mdp, rax, tmp); - } - - const Register cache = rbx; - const Register index = rcx; - __ get_cache_and_index_at_bcp(cache, index, 1, index_size); - - const Register flags = cache; - __ movl(flags, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); - __ andl(flags, ConstantPoolCacheEntry::parameter_size_mask); - __ lea(rsp, Address(rsp, flags, Interpreter::stackElementScale())); - __ dispatch_next(state, step); - - return entry; -} - - -address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) { - address entry = __ pc(); - - if (state == ftos) { - __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter"); - } else if (state == dtos) { - __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter"); - } - - // The stack is not extended by deopt but we must NULL last_sp as this - // entry is like a "return". - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - __ restore_bcp(); - __ restore_locals(); - // handle exceptions - { Label L; - const Register thread = rcx; - __ get_thread(thread); - __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); - __ jcc(Assembler::zero, L); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - __ dispatch_next(state, step); - return entry; -} - - -int AbstractInterpreter::BasicType_as_index(BasicType type) { - int i = 0; - switch (type) { - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : // fall through - case T_LONG : // fall through - case T_VOID : i = 4; break; - case T_FLOAT : i = 5; break; // have to treat float and double separately for SSE - case T_DOUBLE : i = 6; break; - case T_OBJECT : // fall through - case T_ARRAY : i = 7; break; - default : ShouldNotReachHere(); - } - assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); - return i; -} - - -address TemplateInterpreterGenerator::generate_result_handler_for(BasicType type) { - address entry = __ pc(); - switch (type) { - case T_BOOLEAN: __ c2bool(rax); break; - case T_CHAR : __ andptr(rax, 0xFFFF); break; - case T_BYTE : __ sign_extend_byte (rax); break; - case T_SHORT : __ sign_extend_short(rax); break; - case T_INT : /* nothing to do */ break; - case T_LONG : /* nothing to do */ break; - case T_VOID : /* nothing to do */ break; - case T_DOUBLE : - case T_FLOAT : - { const Register t = InterpreterRuntime::SignatureHandlerGenerator::temp(); - __ pop(t); // remove return address first - // Must return a result for interpreter or compiler. In SSE - // mode, results are returned in xmm0 and the FPU stack must - // be empty. - if (type == T_FLOAT && UseSSE >= 1) { - // Load ST0 - __ fld_d(Address(rsp, 0)); - // Store as float and empty fpu stack - __ fstp_s(Address(rsp, 0)); - // and reload - __ movflt(xmm0, Address(rsp, 0)); - } else if (type == T_DOUBLE && UseSSE >= 2 ) { - __ movdbl(xmm0, Address(rsp, 0)); - } else { - // restore ST0 - __ fld_d(Address(rsp, 0)); - } - // and pop the temp - __ addptr(rsp, 2 * wordSize); - __ push(t); // restore return address - } - break; - case T_OBJECT : - // retrieve result from frame - __ movptr(rax, Address(rbp, frame::interpreter_frame_oop_temp_offset*wordSize)); - // and verify it - __ verify_oop(rax); - break; - default : ShouldNotReachHere(); - } - __ ret(0); // return from result handler - return entry; -} - -address TemplateInterpreterGenerator::generate_safept_entry_for(TosState state, address runtime_entry) { - address entry = __ pc(); - __ push(state); - __ call_VM(noreg, runtime_entry); - __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); - return entry; -} - - -// Helpers for commoning out cases in the various type of method entries. -// - -// increment invocation count & check for overflow -// -// Note: checking for negative value instead of overflow -// so we have a 'sticky' overflow test -// -// rbx,: method -// rcx: invocation counter -// -void InterpreterGenerator::generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue) { - Label done; - // Note: In tiered we increment either counters in MethodCounters* or in MDO - // depending if we're profiling or not. - if (TieredCompilation) { - int increment = InvocationCounter::count_increment; - Label no_mdo; - if (ProfileInterpreter) { - // Are we profiling? - __ movptr(rax, Address(rbx, Method::method_data_offset())); - __ testptr(rax, rax); - __ jccb(Assembler::zero, no_mdo); - // Increment counter in the MDO - const Address mdo_invocation_counter(rax, in_bytes(MethodData::invocation_counter_offset()) + - in_bytes(InvocationCounter::counter_offset())); - const Address mask(rax, in_bytes(MethodData::invoke_mask_offset())); - __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rcx, false, Assembler::zero, overflow); - __ jmp(done); - } - __ bind(no_mdo); - // Increment counter in MethodCounters - const Address invocation_counter(rax, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); - - __ get_method_counters(rbx, rax, done); - const Address mask(rax, in_bytes(MethodCounters::invoke_mask_offset())); - __ increment_mask_and_jump(invocation_counter, increment, mask, - rcx, false, Assembler::zero, overflow); - __ bind(done); - } else { // not TieredCompilation - const Address backedge_counter(rax, - MethodCounters::backedge_counter_offset() + - InvocationCounter::counter_offset()); - const Address invocation_counter(rax, - MethodCounters::invocation_counter_offset() + - InvocationCounter::counter_offset()); - - __ get_method_counters(rbx, rax, done); - - if (ProfileInterpreter) { - __ incrementl(Address(rax, - MethodCounters::interpreter_invocation_counter_offset())); - } - - // Update standard invocation counters - __ movl(rcx, invocation_counter); - __ incrementl(rcx, InvocationCounter::count_increment); - __ movl(invocation_counter, rcx); // save invocation count - - __ movl(rax, backedge_counter); // load backedge counter - __ andl(rax, InvocationCounter::count_mask_value); // mask out the status bits - - __ addl(rcx, rax); // add both counters - - // profile_method is non-null only for interpreted method so - // profile_method != NULL == !native_call - // BytecodeInterpreter only calls for native so code is elided. - - if (ProfileInterpreter && profile_method != NULL) { - // Test to see if we should create a method data oop - __ movptr(rax, Address(rbx, Method::method_counters_offset())); - __ cmp32(rcx, Address(rax, in_bytes(MethodCounters::interpreter_profile_limit_offset()))); - __ jcc(Assembler::less, *profile_method_continue); - - // if no method data exists, go to profile_method - __ test_method_data_pointer(rax, *profile_method); - } - - __ movptr(rax, Address(rbx, Method::method_counters_offset())); - __ cmp32(rcx, Address(rax, in_bytes(MethodCounters::interpreter_invocation_limit_offset()))); - __ jcc(Assembler::aboveEqual, *overflow); - __ bind(done); - } -} - -void InterpreterGenerator::generate_counter_overflow(Label* do_continue) { - - // Asm interpreter on entry - // rdi - locals - // rsi - bcp - // rbx, - method - // rdx - cpool - // rbp, - interpreter frame - - // C++ interpreter on entry - // rsi - new interpreter state pointer - // rbp - interpreter frame pointer - // rbx - method - - // On return (i.e. jump to entry_point) [ back to invocation of interpreter ] - // rbx, - method - // rcx - rcvr (assuming there is one) - // top of stack return address of interpreter caller - // rsp - sender_sp - - // C++ interpreter only - // rsi - previous interpreter state pointer - - // InterpreterRuntime::frequency_counter_overflow takes one argument - // indicating if the counter overflow occurs at a backwards branch (non-NULL bcp). - // The call returns the address of the verified entry point for the method or NULL - // if the compilation did not complete (either went background or bailed out). - __ movptr(rax, (intptr_t)false); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), rax); - - __ movptr(rbx, Address(rbp, method_offset)); // restore Method* - - // Preserve invariant that rsi/rdi contain bcp/locals of sender frame - // and jump to the interpreted entry. - __ jmp(*do_continue, relocInfo::none); - -} - -void InterpreterGenerator::generate_stack_overflow_check(void) { - // see if we've got enough room on the stack for locals plus overhead. - // the expression stack grows down incrementally, so the normal guard - // page mechanism will work for that. - // - // Registers live on entry: - // - // Asm interpreter - // rdx: number of additional locals this frame needs (what we must check) - // rbx,: Method* - - // destroyed on exit - // rax, - - // NOTE: since the additional locals are also always pushed (wasn't obvious in - // generate_fixed_frame) so the guard should work for them too. - // - - // monitor entry size: see picture of stack in frame_x86.hpp - const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; - - // total overhead size: entry_size + (saved rbp, thru expr stack bottom). - // be sure to change this if you add/subtract anything to/from the overhead area - const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) + entry_size; - - const int page_size = os::vm_page_size(); - - Label after_frame_check; - - // see if the frame is greater than one page in size. If so, - // then we need to verify there is enough stack space remaining - // for the additional locals. - __ cmpl(rdx, (page_size - overhead_size)/Interpreter::stackElementSize); - __ jcc(Assembler::belowEqual, after_frame_check); - - // compute rsp as if this were going to be the last frame on - // the stack before the red zone - - Label after_frame_check_pop; - - __ push(rsi); - - const Register thread = rsi; - - __ get_thread(thread); - - const Address stack_base(thread, Thread::stack_base_offset()); - const Address stack_size(thread, Thread::stack_size_offset()); - - // locals + overhead, in bytes - __ lea(rax, Address(noreg, rdx, Interpreter::stackElementScale(), overhead_size)); - -#ifdef ASSERT - Label stack_base_okay, stack_size_okay; - // verify that thread stack base is non-zero - __ cmpptr(stack_base, (int32_t)NULL_WORD); - __ jcc(Assembler::notEqual, stack_base_okay); - __ stop("stack base is zero"); - __ bind(stack_base_okay); - // verify that thread stack size is non-zero - __ cmpptr(stack_size, 0); - __ jcc(Assembler::notEqual, stack_size_okay); - __ stop("stack size is zero"); - __ bind(stack_size_okay); -#endif - - // Add stack base to locals and subtract stack size - __ addptr(rax, stack_base); - __ subptr(rax, stack_size); - - // Use the maximum number of pages we might bang. - const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages : - (StackRedPages+StackYellowPages); - __ addptr(rax, max_pages * page_size); - - // check against the current stack bottom - __ cmpptr(rsp, rax); - __ jcc(Assembler::above, after_frame_check_pop); - - __ pop(rsi); // get saved bcp / (c++ prev state ). - - // Restore sender's sp as SP. This is necessary if the sender's - // frame is an extended compiled frame (see gen_c2i_adapter()) - // and safer anyway in case of JSR292 adaptations. - - __ pop(rax); // return address must be moved if SP is changed - __ mov(rsp, rsi); - __ push(rax); - - // Note: the restored frame is not necessarily interpreted. - // Use the shared runtime version of the StackOverflowError. - assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); - __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry())); - // all done with frame size check - __ bind(after_frame_check_pop); - __ pop(rsi); - - __ bind(after_frame_check); -} - -// Allocate monitor and lock method (asm interpreter) -// rbx, - Method* -// -void TemplateInterpreterGenerator::lock_method() { - // synchronize method - const Address access_flags (rbx, Method::access_flags_offset()); - const Address monitor_block_top (rbp, frame::interpreter_frame_monitor_block_top_offset * wordSize); - const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; - - #ifdef ASSERT - { Label L; - __ movl(rax, access_flags); - __ testl(rax, JVM_ACC_SYNCHRONIZED); - __ jcc(Assembler::notZero, L); - __ stop("method doesn't need synchronization"); - __ bind(L); - } - #endif // ASSERT - // get synchronization object - { Label done; - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - __ movl(rax, access_flags); - __ testl(rax, JVM_ACC_STATIC); - __ movptr(rax, Address(rdi, Interpreter::local_offset_in_bytes(0))); // get receiver (assume this is frequent case) - __ jcc(Assembler::zero, done); - __ movptr(rax, Address(rbx, Method::const_offset())); - __ movptr(rax, Address(rax, ConstMethod::constants_offset())); - __ movptr(rax, Address(rax, ConstantPool::pool_holder_offset_in_bytes())); - __ movptr(rax, Address(rax, mirror_offset)); - __ bind(done); - } - // add space for monitor & lock - __ subptr(rsp, entry_size); // add space for a monitor entry - __ movptr(monitor_block_top, rsp); // set new monitor block top - __ movptr(Address(rsp, BasicObjectLock::obj_offset_in_bytes()), rax); // store object - __ mov(rdx, rsp); // object address - __ lock_object(rdx); -} - -// -// Generate a fixed interpreter frame. This is identical setup for interpreted methods -// and for native methods hence the shared code. - -void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { - // initialize fixed part of activation frame - __ push(rax); // save return address - __ enter(); // save old & set new rbp, - - - __ push(rsi); // set sender sp - __ push((int32_t)NULL_WORD); // leave last_sp as null - __ movptr(rsi, Address(rbx,Method::const_offset())); // get ConstMethod* - __ lea(rsi, Address(rsi,ConstMethod::codes_offset())); // get codebase - __ push(rbx); // save Method* - if (ProfileInterpreter) { - Label method_data_continue; - __ movptr(rdx, Address(rbx, in_bytes(Method::method_data_offset()))); - __ testptr(rdx, rdx); - __ jcc(Assembler::zero, method_data_continue); - __ addptr(rdx, in_bytes(MethodData::data_offset())); - __ bind(method_data_continue); - __ push(rdx); // set the mdp (method data pointer) - } else { - __ push(0); - } - - __ movptr(rdx, Address(rbx, Method::const_offset())); - __ movptr(rdx, Address(rdx, ConstMethod::constants_offset())); - __ movptr(rdx, Address(rdx, ConstantPool::cache_offset_in_bytes())); - __ push(rdx); // set constant pool cache - __ push(rdi); // set locals pointer - if (native_call) { - __ push(0); // no bcp - } else { - __ push(rsi); // set bcp - } - __ push(0); // reserve word for pointer to expression stack bottom - __ movptr(Address(rsp, 0), rsp); // set expression stack bottom -} - - -// Method entry for java.lang.ref.Reference.get. -address InterpreterGenerator::generate_Reference_get_entry(void) { -#if INCLUDE_ALL_GCS - // Code: _aload_0, _getfield, _areturn - // parameter size = 1 - // - // The code that gets generated by this routine is split into 2 parts: - // 1. The "intrinsified" code for G1 (or any SATB based GC), - // 2. The slow path - which is an expansion of the regular method entry. - // - // Notes:- - // * In the G1 code we do not check whether we need to block for - // a safepoint. If G1 is enabled then we must execute the specialized - // code for Reference.get (except when the Reference object is null) - // so that we can log the value in the referent field with an SATB - // update buffer. - // If the code for the getfield template is modified so that the - // G1 pre-barrier code is executed when the current method is - // Reference.get() then going through the normal method entry - // will be fine. - // * The G1 code below can, however, check the receiver object (the instance - // of java.lang.Reference) and jump to the slow path if null. If the - // Reference object is null then we obviously cannot fetch the referent - // and so we don't need to call the G1 pre-barrier. Thus we can use the - // regular method entry code to generate the NPE. - // - // This code is based on generate_accessor_enty. - - // rbx,: Method* - // rcx: receiver (preserve for slow entry into asm interpreter) - - // rsi: senderSP must preserved for slow path, set SP to it on fast path - - address entry = __ pc(); - - const int referent_offset = java_lang_ref_Reference::referent_offset; - guarantee(referent_offset > 0, "referent offset not initialized"); - - if (UseG1GC) { - Label slow_path; - - // Check if local 0 != NULL - // If the receiver is null then it is OK to jump to the slow path. - __ movptr(rax, Address(rsp, wordSize)); - __ testptr(rax, rax); - __ jcc(Assembler::zero, slow_path); - - // rax: local 0 (must be preserved across the G1 barrier call) - // - // rbx: method (at this point it's scratch) - // rcx: receiver (at this point it's scratch) - // rdx: scratch - // rdi: scratch - // - // rsi: sender sp - - // Preserve the sender sp in case the pre-barrier - // calls the runtime - __ push(rsi); - - // Load the value of the referent field. - const Address field_address(rax, referent_offset); - __ movptr(rax, field_address); - - // Generate the G1 pre-barrier code to log the value of - // the referent field in an SATB buffer. - __ get_thread(rcx); - __ g1_write_barrier_pre(noreg /* obj */, - rax /* pre_val */, - rcx /* thread */, - rbx /* tmp */, - true /* tosca_save */, - true /* expand_call */); - - // _areturn - __ pop(rsi); // get sender sp - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); - return entry; - } -#endif // INCLUDE_ALL_GCS - - // If G1 is not enabled then attempt to go through the accessor entry point - // Reference.get is an accessor - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address InterpreterGenerator::generate_CRC32_update_entry() { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rbx: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), - SafepointSynchronize::_not_synchronized); - __ jcc(Assembler::notEqual, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register val = rdx; // source java byte value - const Register tbl = rdi; // scratch - - // Arguments are reversed on java expression stack - __ movl(val, Address(rsp, wordSize)); // byte value - __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC - - __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); - __ notl(crc); // ~crc - __ update_byte_crc32(crc, val, tbl); - __ notl(crc); // ~crc - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32Intrinsics) { - address entry = __ pc(); - - // rbx,: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - ExternalAddress state(SafepointSynchronize::address_of_state()); - __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), - SafepointSynchronize::_not_synchronized); - __ jcc(Assembler::notEqual, slow_path); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register buf = rdx; // source java byte array address - const Register len = rdi; // length - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(len, Address(rsp, 4 + 0)); // Length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; - } - return NULL; -} - -/** -* Method entry for static native methods: -* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) -* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) -*/ -address InterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - if (UseCRC32CIntrinsics) { - address entry = __ pc(); - // Load parameters - const Register crc = rax; // crc - const Register buf = rcx; // source java byte array address - const Register len = rdx; // length - const Register end = len; - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(end, Address(rsp, 4 + 0)); // end - __ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); - // result in rax - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry; - } - return NULL; -} - -/** - * Method entry for static native method: - * java.lang.Float.intBitsToFloat(int bits) - */ -address InterpreterGenerator::generate_Float_intBitsToFloat_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movflt(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return NULL; -} - -/** - * Method entry for static native method: - * java.lang.Float.floatToRawIntBits(float value) - */ -address InterpreterGenerator::generate_Float_floatToRawIntBits_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return NULL; -} - - -/** - * Method entry for static native method: - * java.lang.Double.longBitsToDouble(long bits) - */ -address InterpreterGenerator::generate_Double_longBitsToDouble_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movdbl(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return NULL; -} - -/** - * Method entry for static native method: - * java.lang.Double.doubleToRawLongBits(double value) - */ -address InterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rdx, Address(rsp, 2*wordSize)); - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return NULL; -} - -// -// Interpreter stub for calling a native method. (asm interpreter) -// This sets up a somewhat different looking stack for calling the native method -// than the typical interpreter frame setup. -// - -address InterpreterGenerator::generate_native_entry(bool synchronized) { - // determine code generation flags - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // rbx,: Method* - // rsi: sender sp - // rsi: previous interpreter state (C++ interpreter) must preserve - address entry_point = __ pc(); - - const Address constMethod (rbx, Method::const_offset()); - const Address access_flags (rbx, Method::access_flags_offset()); - const Address size_of_parameters(rcx, ConstMethod::size_of_parameters_offset()); - - // get parameter size (always needed) - __ movptr(rcx, constMethod); - __ load_unsigned_short(rcx, size_of_parameters); - - // native calls don't need the stack size check since they have no expression stack - // and the arguments are already on the stack and we only add a handful of words - // to the stack - - // rbx,: Method* - // rcx: size of parameters - // rsi: sender sp - - __ pop(rax); // get return address - // for natives the size of locals is zero - - // compute beginning of parameters (rdi) - __ lea(rdi, Address(rsp, rcx, Interpreter::stackElementScale(), -wordSize)); - - - // add 2 zero-initialized slots for native calls - // NULL result handler - __ push((int32_t)NULL_WORD); - // NULL oop temp (mirror or jni oop result) - __ push((int32_t)NULL_WORD); - - // initialize fixed part of activation frame - generate_fixed_frame(true); - - // make sure method is native & not abstract -#ifdef ASSERT - __ movl(rax, access_flags); - { - Label L; - __ testl(rax, JVM_ACC_NATIVE); - __ jcc(Assembler::notZero, L); - __ stop("tried to execute non-native method as native"); - __ bind(L); - } - { Label L; - __ testl(rax, JVM_ACC_ABSTRACT); - __ jcc(Assembler::zero, L); - __ stop("tried to execute abstract method in interpreter"); - __ bind(L); - } -#endif - - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. The remove_activation will - // check this flag. - - __ get_thread(rax); - const Address do_not_unlock_if_synchronized(rax, - in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); - __ movbool(do_not_unlock_if_synchronized, true); - - // increment invocation count & check for overflow - Label invocation_counter_overflow; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, NULL, NULL); - } - - Label continue_after_compile; - __ bind(continue_after_compile); - - bang_stack_shadow_pages(true); - - // reset the _do_not_unlock_if_synchronized flag - __ get_thread(rax); - __ movbool(do_not_unlock_if_synchronized, false); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - // - if (synchronized) { - lock_method(); - } else { - // no synchronization necessary -#ifdef ASSERT - { Label L; - __ movl(rax, access_flags); - __ testl(rax, JVM_ACC_SYNCHRONIZED); - __ jcc(Assembler::zero, L); - __ stop("method needs synchronization"); - __ bind(L); - } -#endif - } - - // start execution -#ifdef ASSERT - { Label L; - const Address monitor_block_top (rbp, - frame::interpreter_frame_monitor_block_top_offset * wordSize); - __ movptr(rax, monitor_block_top); - __ cmpptr(rax, rsp); - __ jcc(Assembler::equal, L); - __ stop("broken stack frame setup in interpreter"); - __ bind(L); - } -#endif - - // jvmti/dtrace support - __ notify_method_entry(); - - // work registers - const Register method = rbx; - const Register thread = rdi; - const Register t = rcx; - - // allocate space for parameters - __ get_method(method); - __ movptr(t, Address(method, Method::const_offset())); - __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); - - __ shlptr(t, Interpreter::logStackElementSize); - __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror - __ subptr(rsp, t); - __ andptr(rsp, -(StackAlignmentInBytes)); // gcc needs 16 byte aligned stacks to do XMM intrinsics - - // get signature handler - { Label L; - __ movptr(t, Address(method, Method::signature_handler_offset())); - __ testptr(t, t); - __ jcc(Assembler::notZero, L); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); - __ get_method(method); - __ movptr(t, Address(method, Method::signature_handler_offset())); - __ bind(L); - } - - // call signature handler - assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rdi, "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::to () == rsp, "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); - // The generated handlers do not touch RBX (the method oop). - // However, large signatures cannot be cached and are generated - // each time here. The slow-path generator will blow RBX - // sometime, so we must reload it after the call. - __ call(t); - __ get_method(method); // slow path call blows RBX on DevStudio 5.0 - - // result handler is in rax, - // set result handler - __ movptr(Address(rbp, frame::interpreter_frame_result_handler_offset*wordSize), rax); - - // pass mirror handle if static call - { Label L; - const int mirror_offset = in_bytes(Klass::java_mirror_offset()); - __ movl(t, Address(method, Method::access_flags_offset())); - __ testl(t, JVM_ACC_STATIC); - __ jcc(Assembler::zero, L); - // get mirror - __ movptr(t, Address(method, Method:: const_offset())); - __ movptr(t, Address(t, ConstMethod::constants_offset())); - __ movptr(t, Address(t, ConstantPool::pool_holder_offset_in_bytes())); - __ movptr(t, Address(t, mirror_offset)); - // copy mirror into activation frame - __ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize), t); - // pass handle to mirror - __ lea(t, Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize)); - __ movptr(Address(rsp, wordSize), t); - __ bind(L); - } - - // get native function entry point - { Label L; - __ movptr(rax, Address(method, Method::native_function_offset())); - ExternalAddress unsatisfied(SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); - __ cmpptr(rax, unsatisfied.addr()); - __ jcc(Assembler::notEqual, L); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); - __ get_method(method); - __ movptr(rax, Address(method, Method::native_function_offset())); - __ bind(L); - } - - // pass JNIEnv - __ get_thread(thread); - __ lea(t, Address(thread, JavaThread::jni_environment_offset())); - __ movptr(Address(rsp, 0), t); - - // set_last_Java_frame_before_call - // It is enough that the pc() - // points into the right code segment. It does not have to be the correct return pc. - __ set_last_Java_frame(thread, noreg, rbp, __ pc()); - - // change thread state -#ifdef ASSERT - { Label L; - __ movl(t, Address(thread, JavaThread::thread_state_offset())); - __ cmpl(t, _thread_in_Java); - __ jcc(Assembler::equal, L); - __ stop("Wrong thread state in native stub"); - __ bind(L); - } -#endif - - // Change state to native - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); - __ call(rax); - - // result potentially in rdx:rax or ST0 - - // Verify or restore cpu control state after JNI call - __ restore_cpu_control_state_after_jni(); - - // save potential result in ST(0) & rdx:rax - // (if result handler is the T_FLOAT or T_DOUBLE handler, result must be in ST0 - - // the check is necessary to avoid potential Intel FPU overflow problems by saving/restoring 'empty' FPU registers) - // It is safe to do this push because state is _thread_in_native and return address will be found - // via _last_native_pc and not via _last_jave_sp - - // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. - // If the order changes or anything else is added to the stack the code in - // interpreter_frame_result will have to be changed. - - { Label L; - Label push_double; - ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT)); - ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE)); - __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize), - float_handler.addr()); - __ jcc(Assembler::equal, push_double); - __ cmpptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset + 1)*wordSize), - double_handler.addr()); - __ jcc(Assembler::notEqual, L); - __ bind(push_double); - __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0). - __ bind(L); - } - __ push(ltos); - - // change thread state - __ get_thread(thread); - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - if(os::is_MP()) { - if (UseMembar) { - // Force this write out before the read below - __ membar(Assembler::Membar_mask_bits( - Assembler::LoadLoad | Assembler::LoadStore | - Assembler::StoreLoad | Assembler::StoreStore)); - } else { - // Write serialization page so VM thread can do a pseudo remote membar. - // We use the current thread pointer to calculate a thread specific - // offset to write to within the page. This minimizes bus traffic - // due to cache line collision. - __ serialize_memory(thread, rcx); - } - } - - if (AlwaysRestoreFPU) { - // Make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std())); - } - - // check for safepoint operation in progress and/or pending suspend requests - { Label Continue; - - __ cmp32(ExternalAddress(SafepointSynchronize::address_of_state()), - SafepointSynchronize::_not_synchronized); - - Label L; - __ jcc(Assembler::notEqual, L); - __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::equal, Continue); - __ bind(L); - - // Don't use call_VM as it will see a possible pending exception and forward it - // and never return here preventing us from clearing _last_native_pc down below. - // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are - // preserved and correspond to the bcp/locals pointers. So we do a runtime call - // by hand. - // - __ push(thread); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, - JavaThread::check_special_condition_for_native_trans))); - __ increment(rsp, wordSize); - __ get_thread(thread); - - __ bind(Continue); - } - - // change thread state - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java); - - __ reset_last_Java_frame(thread, true, true); - - // reset handle block - __ movptr(t, Address(thread, JavaThread::active_handles_offset())); - __ movl(Address(t, JNIHandleBlock::top_offset_in_bytes()), NULL_WORD); - - // If result was an oop then unbox and save it in the frame - { Label L; - Label no_oop, store_result; - ExternalAddress handler(AbstractInterpreter::result_handler(T_OBJECT)); - __ cmpptr(Address(rbp, frame::interpreter_frame_result_handler_offset*wordSize), - handler.addr()); - __ jcc(Assembler::notEqual, no_oop); - __ cmpptr(Address(rsp, 0), (int32_t)NULL_WORD); - __ pop(ltos); - __ testptr(rax, rax); - __ jcc(Assembler::zero, store_result); - // unbox - __ movptr(rax, Address(rax, 0)); - __ bind(store_result); - __ movptr(Address(rbp, (frame::interpreter_frame_oop_temp_offset)*wordSize), rax); - // keep stack depth as expected by pushing oop which will eventually be discarded - __ push(ltos); - __ bind(no_oop); - } - - { - Label no_reguard; - __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_disabled); - __ jcc(Assembler::notEqual, no_reguard); - - __ pusha(); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); - __ popa(); - - __ bind(no_reguard); - } - - // restore rsi to have legal interpreter frame, - // i.e., bci == 0 <=> rsi == code_base() - // Can't call_VM until bcp is within reasonable. - __ get_method(method); // method is junk from thread_in_native to now. - __ movptr(rsi, Address(method,Method::const_offset())); // get ConstMethod* - __ lea(rsi, Address(rsi,ConstMethod::codes_offset())); // get codebase - - // handle exceptions (exception handling will handle unlocking!) - { Label L; - __ cmpptr(Address(thread, Thread::pending_exception_offset()), (int32_t)NULL_WORD); - __ jcc(Assembler::zero, L); - // Note: At some point we may want to unify this with the code used in call_VM_base(); - // i.e., we should use the StubRoutines::forward_exception code. For now this - // doesn't work here because the rsp is not correctly set at this point. - __ MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); - __ should_not_reach_here(); - __ bind(L); - } - - // do unlocking if necessary - { Label L; - __ movl(t, Address(method, Method::access_flags_offset())); - __ testl(t, JVM_ACC_SYNCHRONIZED); - __ jcc(Assembler::zero, L); - // the code below should be shared with interpreter macro assembler implementation - { Label unlock; - // BasicObjectLock will be first in list, since this is a synchronized method. However, need - // to check that the object has not been unlocked by an explicit monitorexit bytecode. - const Address monitor(rbp, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); - - __ lea(rdx, monitor); // address of first monitor - - __ movptr(t, Address(rdx, BasicObjectLock::obj_offset_in_bytes())); - __ testptr(t, t); - __ jcc(Assembler::notZero, unlock); - - // Entry already unlocked, need to throw exception - __ MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); - __ should_not_reach_here(); - - __ bind(unlock); - __ unlock_object(rdx); - } - __ bind(L); - } - - // jvmti/dtrace support - // Note: This must happen _after_ handling/throwing any exceptions since - // the exception handler code notifies the runtime of method exits - // too. If this happens before, method entry/exit notifications are - // not properly paired (was bug - gri 11/22/99). - __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); - - // restore potential result in rdx:rax, call result handler to restore potential result in ST0 & handle result - __ pop(ltos); - __ movptr(t, Address(rbp, frame::interpreter_frame_result_handler_offset*wordSize)); - __ call(t); - - // remove activation - __ movptr(t, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp - __ leave(); // remove frame anchor - __ pop(rdi); // get return address - __ mov(rsp, t); // set sp to sender sp - __ jmp(rdi); - - if (inc_counter) { - // Handle overflow of counter and compile method - __ bind(invocation_counter_overflow); - generate_counter_overflow(&continue_after_compile); - } - - return entry_point; -} - -// -// Generic interpreted method entry to (asm) interpreter -// -address InterpreterGenerator::generate_normal_entry(bool synchronized) { - // determine code generation flags - bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; - - // rbx,: Method* - // rsi: sender sp - address entry_point = __ pc(); - - const Address constMethod (rbx, Method::const_offset()); - const Address access_flags (rbx, Method::access_flags_offset()); - const Address size_of_parameters(rdx, ConstMethod::size_of_parameters_offset()); - const Address size_of_locals (rdx, ConstMethod::size_of_locals_offset()); - - // get parameter size (always needed) - __ movptr(rdx, constMethod); - __ load_unsigned_short(rcx, size_of_parameters); - - // rbx,: Method* - // rcx: size of parameters - - // rsi: sender_sp (could differ from sp+wordSize if we were called via c2i ) - - __ load_unsigned_short(rdx, size_of_locals); // get size of locals in words - __ subl(rdx, rcx); // rdx = no. of additional locals - - // see if we've got enough room on the stack for locals plus overhead. - generate_stack_overflow_check(); - - // get return address - __ pop(rax); - - // compute beginning of parameters (rdi) - __ lea(rdi, Address(rsp, rcx, Interpreter::stackElementScale(), -wordSize)); - - // rdx - # of additional locals - // allocate space for locals - // explicitly initialize locals - { - Label exit, loop; - __ testl(rdx, rdx); - __ jcc(Assembler::lessEqual, exit); // do nothing if rdx <= 0 - __ bind(loop); - __ push((int32_t)NULL_WORD); // initialize local variables - __ decrement(rdx); // until everything initialized - __ jcc(Assembler::greater, loop); - __ bind(exit); - } - - // initialize fixed part of activation frame - generate_fixed_frame(false); - - // make sure method is not native & not abstract -#ifdef ASSERT - __ movl(rax, access_flags); - { - Label L; - __ testl(rax, JVM_ACC_NATIVE); - __ jcc(Assembler::zero, L); - __ stop("tried to execute native method as non-native"); - __ bind(L); - } - { Label L; - __ testl(rax, JVM_ACC_ABSTRACT); - __ jcc(Assembler::zero, L); - __ stop("tried to execute abstract method in interpreter"); - __ bind(L); - } -#endif - - // Since at this point in the method invocation the exception handler - // would try to exit the monitor of synchronized methods which hasn't - // been entered yet, we set the thread local variable - // _do_not_unlock_if_synchronized to true. The remove_activation will - // check this flag. - - __ get_thread(rax); - const Address do_not_unlock_if_synchronized(rax, - in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); - __ movbool(do_not_unlock_if_synchronized, true); - - __ profile_parameters_type(rax, rcx, rdx); - // increment invocation count & check for overflow - Label invocation_counter_overflow; - Label profile_method; - Label profile_method_continue; - if (inc_counter) { - generate_counter_incr(&invocation_counter_overflow, &profile_method, &profile_method_continue); - if (ProfileInterpreter) { - __ bind(profile_method_continue); - } - } - Label continue_after_compile; - __ bind(continue_after_compile); - - bang_stack_shadow_pages(false); - - // reset the _do_not_unlock_if_synchronized flag - __ get_thread(rax); - __ movbool(do_not_unlock_if_synchronized, false); - - // check for synchronized methods - // Must happen AFTER invocation_counter check and stack overflow check, - // so method is not locked if overflows. - // - if (synchronized) { - // Allocate monitor and lock method - lock_method(); - } else { - // no synchronization necessary -#ifdef ASSERT - { Label L; - __ movl(rax, access_flags); - __ testl(rax, JVM_ACC_SYNCHRONIZED); - __ jcc(Assembler::zero, L); - __ stop("method needs synchronization"); - __ bind(L); - } -#endif - } - - // start execution -#ifdef ASSERT - { Label L; - const Address monitor_block_top (rbp, - frame::interpreter_frame_monitor_block_top_offset * wordSize); - __ movptr(rax, monitor_block_top); - __ cmpptr(rax, rsp); - __ jcc(Assembler::equal, L); - __ stop("broken stack frame setup in interpreter"); - __ bind(L); - } -#endif - - // jvmti support - __ notify_method_entry(); - - __ dispatch_next(vtos); - - // invocation counter overflow - if (inc_counter) { - if (ProfileInterpreter) { - // We have decided to profile this method in the interpreter - __ bind(profile_method); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); - __ set_method_data_pointer_for_bcp(); - __ get_method(rbx); - __ jmp(profile_method_continue); - } - // Handle overflow of counter and compile method - __ bind(invocation_counter_overflow); - generate_counter_overflow(&continue_after_compile); - } - - return entry_point; -} - - -// These should never be compiled since the interpreter will prefer -// the compiled version to the intrinsic version. -bool AbstractInterpreter::can_be_compiled(methodHandle m) { - switch (method_kind(m)) { - case Interpreter::java_lang_math_sin : // fall thru - case Interpreter::java_lang_math_cos : // fall thru - case Interpreter::java_lang_math_tan : // fall thru - case Interpreter::java_lang_math_abs : // fall thru - case Interpreter::java_lang_math_log : // fall thru - case Interpreter::java_lang_math_log10 : // fall thru - case Interpreter::java_lang_math_sqrt : // fall thru - case Interpreter::java_lang_math_pow : // fall thru - case Interpreter::java_lang_math_exp : - return false; - default: - return true; - } -} - -// How much stack a method activation needs in words. -int AbstractInterpreter::size_top_interpreter_activation(Method* method) { - - const int stub_code = 4; // see generate_call_stub - // Save space for one monitor to get into the interpreted method in case - // the method is synchronized - int monitor_size = method->is_synchronized() ? - 1*frame::interpreter_frame_monitor_size() : 0; - - // total overhead size: entry_size + (saved rbp, thru expr stack bottom). - // be sure to change this if you add/subtract anything to/from the overhead area - const int overhead_size = -frame::interpreter_frame_initial_sp_offset; - - const int method_stack = (method->max_locals() + method->max_stack()) * - Interpreter::stackElementWords; - return overhead_size + method_stack + stub_code; -} - -//------------------------------------------------------------------------------------------------------------------------ -// Exceptions - -void TemplateInterpreterGenerator::generate_throw_exception() { - // Entry point in previous activation (i.e., if the caller was interpreted) - Interpreter::_rethrow_exception_entry = __ pc(); - const Register thread = rcx; - - // Restore sp to interpreter_frame_last_sp even though we are going - // to empty the expression stack for the exception processing. - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - // rax,: exception - // rdx: return address/pc that threw exception - __ restore_bcp(); // rsi points to call/send - __ restore_locals(); - - // Entry point for exceptions thrown within interpreter code - Interpreter::_throw_exception_entry = __ pc(); - // expression stack is undefined here - // rax,: exception - // rsi: exception bcp - __ verify_oop(rax); - - // expression stack must be empty before entering the VM in case of an exception - __ empty_expression_stack(); - __ empty_FPU_stack(); - // find exception handler address and preserve exception oop - __ call_VM(rdx, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), rax); - // rax,: exception handler entry point - // rdx: preserved exception oop - // rsi: bcp for exception handler - __ push_ptr(rdx); // push exception which is now the only value on the stack - __ jmp(rax); // jump to exception handler (may be _remove_activation_entry!) - - // If the exception is not handled in the current frame the frame is removed and - // the exception is rethrown (i.e. exception continuation is _rethrow_exception). - // - // Note: At this point the bci is still the bxi for the instruction which caused - // the exception and the expression stack is empty. Thus, for any VM calls - // at this point, GC will find a legal oop map (with empty expression stack). - - // In current activation - // tos: exception - // rsi: exception bcp - - // - // JVMTI PopFrame support - // - - Interpreter::_remove_activation_preserving_args_entry = __ pc(); - __ empty_expression_stack(); - __ empty_FPU_stack(); - // Set the popframe_processing bit in pending_popframe_condition indicating that we are - // currently handling popframe, so that call_VMs that may happen later do not trigger new - // popframe handling cycles. - __ get_thread(thread); - __ movl(rdx, Address(thread, JavaThread::popframe_condition_offset())); - __ orl(rdx, JavaThread::popframe_processing_bit); - __ movl(Address(thread, JavaThread::popframe_condition_offset()), rdx); - - { - // Check to see whether we are returning to a deoptimized frame. - // (The PopFrame call ensures that the caller of the popped frame is - // either interpreted or compiled and deoptimizes it if compiled.) - // In this case, we can't call dispatch_next() after the frame is - // popped, but instead must save the incoming arguments and restore - // them after deoptimization has occurred. - // - // Note that we don't compare the return PC against the - // deoptimization blob's unpack entry because of the presence of - // adapter frames in C2. - Label caller_not_deoptimized; - __ movptr(rdx, Address(rbp, frame::return_addr_offset * wordSize)); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), rdx); - __ testl(rax, rax); - __ jcc(Assembler::notZero, caller_not_deoptimized); - - // Compute size of arguments for saving when returning to deoptimized caller - __ get_method(rax); - __ movptr(rax, Address(rax, Method::const_offset())); - __ load_unsigned_short(rax, Address(rax, ConstMethod::size_of_parameters_offset())); - __ shlptr(rax, Interpreter::logStackElementSize); - __ restore_locals(); - __ subptr(rdi, rax); - __ addptr(rdi, wordSize); - // Save these arguments - __ get_thread(thread); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), thread, rax, rdi); - - __ remove_activation(vtos, rdx, - /* throw_monitor_exception */ false, - /* install_monitor_exception */ false, - /* notify_jvmdi */ false); - - // Inform deoptimization that it is responsible for restoring these arguments - __ get_thread(thread); - __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_force_deopt_reexecution_bit); - - // Continue in deoptimization handler - __ jmp(rdx); - - __ bind(caller_not_deoptimized); - } - - __ remove_activation(vtos, rdx, - /* throw_monitor_exception */ false, - /* install_monitor_exception */ false, - /* notify_jvmdi */ false); - - // Finish with popframe handling - // A previous I2C followed by a deoptimization might have moved the - // outgoing arguments further up the stack. PopFrame expects the - // mutations to those outgoing arguments to be preserved and other - // constraints basically require this frame to look exactly as - // though it had previously invoked an interpreted activation with - // no space between the top of the expression stack (current - // last_sp) and the top of stack. Rather than force deopt to - // maintain this kind of invariant all the time we call a small - // fixup routine to move the mutated arguments onto the top of our - // expression stack if necessary. - __ mov(rax, rsp); - __ movptr(rbx, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ get_thread(thread); - // PC must point into interpreter here - __ set_last_Java_frame(thread, noreg, rbp, __ pc()); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, rax, rbx); - __ get_thread(thread); - __ reset_last_Java_frame(thread, true, true); - // Restore the last_sp and null it out - __ movptr(rsp, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - - __ restore_bcp(); - __ restore_locals(); - // The method data pointer was incremented already during - // call profiling. We have to restore the mdp for the current bcp. - if (ProfileInterpreter) { - __ set_method_data_pointer_for_bcp(); - } - - // Clear the popframe condition flag - __ get_thread(thread); - __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive); - -#if INCLUDE_JVMTI - { - Label L_done; - const Register local0 = rdi; - - __ cmpb(Address(rsi, 0), Bytecodes::_invokestatic); - __ jcc(Assembler::notEqual, L_done); - - // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. - // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. - - __ get_method(rdx); - __ movptr(rax, Address(local0, 0)); - __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, rsi); - - __ testptr(rax, rax); - __ jcc(Assembler::zero, L_done); - - __ movptr(Address(rbx, 0), rax); - __ bind(L_done); - } -#endif // INCLUDE_JVMTI - - __ dispatch_next(vtos); - // end of PopFrame support - - Interpreter::_remove_activation_entry = __ pc(); - - // preserve exception over this code sequence - __ pop_ptr(rax); - __ get_thread(thread); - __ movptr(Address(thread, JavaThread::vm_result_offset()), rax); - // remove the activation (without doing throws on illegalMonitorExceptions) - __ remove_activation(vtos, rdx, false, true, false); - // restore exception - __ get_thread(thread); - __ get_vm_result(rax, thread); - - // Inbetween activations - previous activation type unknown yet - // compute continuation point - the continuation point expects - // the following registers set up: - // - // rax: exception - // rdx: return address/pc that threw exception - // rsp: expression stack of caller - // rbp: rbp, of caller - __ push(rax); // save exception - __ push(rdx); // save return address - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, rdx); - __ mov(rbx, rax); // save exception handler - __ pop(rdx); // restore return address - __ pop(rax); // restore exception - // Note that an "issuing PC" is actually the next PC after the call - __ jmp(rbx); // jump to exception handler of caller -} - - -// -// JVMTI ForceEarlyReturn support -// -address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { - address entry = __ pc(); - const Register thread = rcx; - - __ restore_bcp(); - __ restore_locals(); - __ empty_expression_stack(); - __ empty_FPU_stack(); - __ load_earlyret_value(state); - - __ get_thread(thread); - __ movptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset())); - const Address cond_addr(rcx, JvmtiThreadState::earlyret_state_offset()); - - // Clear the earlyret state - __ movl(cond_addr, JvmtiThreadState::earlyret_inactive); - - __ remove_activation(state, rsi, - false, /* throw_monitor_exception */ - false, /* install_monitor_exception */ - true); /* notify_jvmdi */ - __ jmp(rsi); - return entry; -} // end of ForceEarlyReturn support - - -//------------------------------------------------------------------------------------------------------------------------ -// Helper for vtos entry point generation - -void TemplateInterpreterGenerator::set_vtos_entry_points (Template* t, address& bep, address& cep, address& sep, address& aep, address& iep, address& lep, address& fep, address& dep, address& vep) { - assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); - Label L; - fep = __ pc(); __ push(ftos); __ jmp(L); - dep = __ pc(); __ push(dtos); __ jmp(L); - lep = __ pc(); __ push(ltos); __ jmp(L); - aep = __ pc(); __ push(atos); __ jmp(L); - bep = cep = sep = // fall through - iep = __ pc(); __ push(itos); // fall through - vep = __ pc(); __ bind(L); // fall through - generate_and_dispatch(t); -} - -//------------------------------------------------------------------------------------------------------------------------ -// Generation of individual instructions - -// helpers for generate_and_dispatch - - - -InterpreterGenerator::InterpreterGenerator(StubQueue* code) - : TemplateInterpreterGenerator(code) { - generate_all(); // down here so it can be "virtual" -} - -//------------------------------------------------------------------------------------------------------------------------ - -// Non-product code -#ifndef PRODUCT -address TemplateInterpreterGenerator::generate_trace_code(TosState state) { - address entry = __ pc(); - - // prepare expression stack - __ pop(rcx); // pop return address so expression stack is 'pure' - __ push(state); // save tosca - - // pass tosca registers as arguments & call tracer - __ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::trace_bytecode), rcx, rax, rdx); - __ mov(rcx, rax); // make sure return address is not destroyed by pop(state) - __ pop(state); // restore tosca - - // return - __ jmp(rcx); - - return entry; -} - - -void TemplateInterpreterGenerator::count_bytecode() { - __ incrementl(ExternalAddress((address) &BytecodeCounter::_counter_value)); -} - - -void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { - __ incrementl(ExternalAddress((address) &BytecodeHistogram::_counters[t->bytecode()])); -} - - -void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { - __ mov32(ExternalAddress((address) &BytecodePairHistogram::_index), rbx); - __ shrl(rbx, BytecodePairHistogram::log2_number_of_codes); - __ orl(rbx, ((int)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); - ExternalAddress table((address) BytecodePairHistogram::_counters); - Address index(noreg, rbx, Address::times_4); - __ incrementl(ArrayAddress(table, index)); -} - - -void TemplateInterpreterGenerator::trace_bytecode(Template* t) { - // Call a little run-time stub to avoid blow-up for each bytecode. - // The run-time runtime saves the right registers, depending on - // the tosca in-state for the given template. - assert(Interpreter::trace_code(t->tos_in()) != NULL, - "entry must have been generated"); - __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); -} - - -void TemplateInterpreterGenerator::stop_interpreter_at() { - Label L; - __ cmp32(ExternalAddress((address) &BytecodeCounter::_counter_value), - StopInterpreterAt); - __ jcc(Assembler::notEqual, L); - __ int3(); - __ bind(L); -} -#endif // !PRODUCT -#endif // CC_INTERP diff --git a/hotspot/src/cpu/x86/vm/templateTable_x86.cpp b/hotspot/src/cpu/x86/vm/templateTable_x86.cpp index 2ca11c6a1d7..b589e0100ce 100644 --- a/hotspot/src/cpu/x86/vm/templateTable_x86.cpp +++ b/hotspot/src/cpu/x86/vm/templateTable_x86.cpp @@ -43,8 +43,8 @@ #define __ _masm-> // Global Register Names -Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi); -Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi); +static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi); +static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi); // Platform-dependent initialization void TemplateTable::pd_initialize() { diff --git a/hotspot/src/cpu/zero/vm/interpreter_zero.cpp b/hotspot/src/cpu/zero/vm/interpreter_zero.cpp index 7172443db8f..c99e9391b62 100644 --- a/hotspot/src/cpu/zero/vm/interpreter_zero.cpp +++ b/hotspot/src/cpu/zero/vm/interpreter_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -38,7 +38,6 @@ #include "prims/jvmtiThreadState.hpp" #include "prims/methodHandles.hpp" #include "runtime/arguments.hpp" -#include "runtime/deoptimization.hpp" #include "runtime/frame.inline.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/stubRoutines.hpp" @@ -74,7 +73,3 @@ address InterpreterGenerator::generate_abstract_entry() { bool AbstractInterpreter::can_be_compiled(methodHandle m) { return true; } - -void Deoptimization::unwind_callee_save_values(frame* f, - vframeArray* vframe_array) { -} diff --git a/hotspot/src/share/vm/runtime/deoptimization.cpp b/hotspot/src/share/vm/runtime/deoptimization.cpp index f48a8fb7be2..3b061d36bf9 100644 --- a/hotspot/src/share/vm/runtime/deoptimization.cpp +++ b/hotspot/src/share/vm/runtime/deoptimization.cpp @@ -571,6 +571,23 @@ void Deoptimization::cleanup_deopt_info(JavaThread *thread, thread->dec_in_deopt_handler(); } +// Moved from cpu directories because none of the cpus has callee save values. +// If a cpu implements callee save values, move this to deoptimization_.cpp. +void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) { + + // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in + // the days we had adapter frames. When we deoptimize a situation where a + // compiled caller calls a compiled caller will have registers it expects + // to survive the call to the callee. If we deoptimize the callee the only + // way we can restore these registers is to have the oldest interpreter + // frame that we create restore these values. That is what this routine + // will accomplish. + + // At the moment we have modified c2 to not have any callee save registers + // so this problem does not exist and this routine is just a place holder. + + assert(f->is_interpreted_frame(), "must be interpreted"); +} // Return BasicType of value being returned JRT_LEAF(BasicType, Deoptimization::unpack_frames(JavaThread* thread, int exec_mode))