From 6862ecfe1653eb374f88a1a6c85963e94af2c04a Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Wed, 15 Apr 2015 11:36:42 +0200 Subject: [PATCH 01/13] 8077832: SA's dumpreplaydata, dumpcfg and buildreplayjars are broken SA code out of sync with hotspot code Reviewed-by: dsamersoff, sla, kvn --- .../src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java | 2 +- .../src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java | 2 +- .../src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java index 13ad04a9d30..83056d060a3 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java @@ -148,7 +148,7 @@ public class ciMethodData extends ciMetadata implements MethodDataInterface parametersTypeData() { Address base = getAddress().addOffsetTo(origField.getOffset()); int di = (int)parametersTypeDataDi.getValue(base); - if (di == -1) { + if (di == -1 || di == -2) { return null; } DataLayout dataLayout = new DataLayout(dataField.getValue(getAddress()), di); diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java index a055fe1619c..5cbc25e8ef4 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java @@ -328,7 +328,7 @@ public class ConstantPool extends Metadata implements ClassConstants { } public Symbol getUnresolvedStringAt(int which) { - return getSymbolAt(which); + return getSlotAt(which).getSymbol(); } // returns null, if not resolved. diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java index 4429754a5fe..920e6af0fb6 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java @@ -42,10 +42,10 @@ public class PhaseCFG extends Phase { private static synchronized void initialize(TypeDataBase db) throws WrongTypeException { Type type = db.lookupType("PhaseCFG"); - numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0); + numBlocksField = new CIntField(type.getCIntegerField("_number_of_blocks"), 0); blocksField = type.getAddressField("_blocks"); bbsField = type.getAddressField("_node_to_block_mapping"); - brootField = type.getAddressField("_broot"); + brootField = type.getAddressField("_root_block"); } private static CIntField numBlocksField; From f98a23137cfe6d1dc32e9db7666b556d6a7520dd Mon Sep 17 00:00:00 2001 From: Vladimir Ivanov Date: Fri, 17 Apr 2015 18:17:06 +0300 Subject: [PATCH 02/13] 8057967: CallSite dependency tracking scales devastatingly poorly Reviewed-by: jrose, roland, plevart, shade --- hotspot/src/share/vm/ci/ciCallSite.cpp | 19 ++ hotspot/src/share/vm/ci/ciCallSite.hpp | 1 + .../src/share/vm/classfile/javaClasses.cpp | 48 ++++- .../src/share/vm/classfile/javaClasses.hpp | 19 +- hotspot/src/share/vm/classfile/vmSymbols.hpp | 4 +- hotspot/src/share/vm/code/codeCache.cpp | 7 +- hotspot/src/share/vm/code/dependencies.cpp | 31 +-- hotspot/src/share/vm/code/dependencies.hpp | 8 +- hotspot/src/share/vm/code/nmethod.cpp | 1 + hotspot/src/share/vm/prims/methodHandles.cpp | 50 ++++- hotspot/src/share/vm/prims/methodHandles.hpp | 3 + .../jsr292/CallSiteDepContextTest.java | 179 ++++++++++++++++++ 12 files changed, 337 insertions(+), 33 deletions(-) create mode 100644 hotspot/test/compiler/jsr292/CallSiteDepContextTest.java diff --git a/hotspot/src/share/vm/ci/ciCallSite.cpp b/hotspot/src/share/vm/ci/ciCallSite.cpp index fb222fe3f5a..028a4ed724f 100644 --- a/hotspot/src/share/vm/ci/ciCallSite.cpp +++ b/hotspot/src/share/vm/ci/ciCallSite.cpp @@ -49,6 +49,25 @@ ciMethodHandle* ciCallSite::get_target() const { return CURRENT_ENV->get_object(method_handle_oop)->as_method_handle(); } +// ------------------------------------------------------------------ +// ciCallSite::get_context +// +// Return the target MethodHandle of this CallSite. +ciKlass* ciCallSite::get_context() { + assert(!is_constant_call_site(), ""); + + VM_ENTRY_MARK; + oop call_site_oop = get_oop(); + InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site_oop); + if (ctxk == NULL) { + // The call site doesn't have a context associated. Set it to the default context. + oop def_context_oop = java_lang_invoke_CallSite::default_context(); + java_lang_invoke_CallSite::set_context_cas(call_site_oop, def_context_oop, /*expected=*/NULL); + ctxk = MethodHandles::get_call_site_context(call_site_oop); + } + return (CURRENT_ENV->get_metadata(ctxk))->as_klass(); +} + // ------------------------------------------------------------------ // ciCallSite::print // diff --git a/hotspot/src/share/vm/ci/ciCallSite.hpp b/hotspot/src/share/vm/ci/ciCallSite.hpp index 063f1e3a5fe..040e894d0df 100644 --- a/hotspot/src/share/vm/ci/ciCallSite.hpp +++ b/hotspot/src/share/vm/ci/ciCallSite.hpp @@ -43,6 +43,7 @@ public: // Return the target MethodHandle of this CallSite. ciMethodHandle* get_target() const; + ciKlass* get_context(); void print(); }; diff --git a/hotspot/src/share/vm/classfile/javaClasses.cpp b/hotspot/src/share/vm/classfile/javaClasses.cpp index 42ea9b8230f..57a5be1978c 100644 --- a/hotspot/src/share/vm/classfile/javaClasses.cpp +++ b/hotspot/src/share/vm/classfile/javaClasses.cpp @@ -102,21 +102,22 @@ InjectedField* JavaClasses::get_injected(Symbol* class_name, int* field_count) { static bool find_field(InstanceKlass* ik, Symbol* name_symbol, Symbol* signature_symbol, fieldDescriptor* fd, - bool allow_super = false) { - if (allow_super) - return ik->find_field(name_symbol, signature_symbol, fd) != NULL; - else + bool is_static = false, bool allow_super = false) { + if (allow_super || is_static) { + return ik->find_field(name_symbol, signature_symbol, is_static, fd) != NULL; + } else { return ik->find_local_field(name_symbol, signature_symbol, fd); + } } // Helpful routine for computing field offsets at run time rather than hardcoding them static void compute_offset(int &dest_offset, Klass* klass_oop, Symbol* name_symbol, Symbol* signature_symbol, - bool allow_super = false) { + bool is_static = false, bool allow_super = false) { fieldDescriptor fd; InstanceKlass* ik = InstanceKlass::cast(klass_oop); - if (!find_field(ik, name_symbol, signature_symbol, &fd, allow_super)) { + if (!find_field(ik, name_symbol, signature_symbol, &fd, is_static, allow_super)) { ResourceMark rm; tty->print_cr("Invalid layout of %s at %s", ik->external_name(), name_symbol->as_C_string()); #ifndef PRODUCT @@ -2972,14 +2973,49 @@ int java_lang_invoke_MethodType::rtype_slot_count(oop mt) { // Support for java_lang_invoke_CallSite int java_lang_invoke_CallSite::_target_offset; +int java_lang_invoke_CallSite::_context_offset; +int java_lang_invoke_CallSite::_default_context_offset; void java_lang_invoke_CallSite::compute_offsets() { Klass* k = SystemDictionary::CallSite_klass(); if (k != NULL) { compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature()); + compute_offset(_context_offset, k, vmSymbols::context_name(), vmSymbols::sun_misc_Cleaner_signature()); + compute_offset(_default_context_offset, k, + vmSymbols::DEFAULT_CONTEXT_name(), vmSymbols::sun_misc_Cleaner_signature(), + /*is_static=*/true, /*allow_super=*/false); } } +oop java_lang_invoke_CallSite::context_volatile(oop call_site) { + assert(java_lang_invoke_CallSite::is_instance(call_site), ""); + + oop dep_oop = call_site->obj_field_volatile(_context_offset); + return dep_oop; +} + +void java_lang_invoke_CallSite::set_context_volatile(oop call_site, oop context) { + assert(java_lang_invoke_CallSite::is_instance(call_site), ""); + call_site->obj_field_put_volatile(_context_offset, context); +} + +bool java_lang_invoke_CallSite::set_context_cas(oop call_site, oop context, oop expected) { + assert(java_lang_invoke_CallSite::is_instance(call_site), ""); + HeapWord* context_addr = call_site->obj_field_addr(_context_offset); + oop res = oopDesc::atomic_compare_exchange_oop(context, context_addr, expected, true); + bool success = (res == expected); + if (success) { + update_barrier_set((void*)context_addr, context); + } + return success; +} + +oop java_lang_invoke_CallSite::default_context() { + InstanceKlass* ik = InstanceKlass::cast(SystemDictionary::CallSite_klass()); + oop def_context_oop = ik->java_mirror()->obj_field(_default_context_offset); + assert(!oopDesc::is_null(def_context_oop), ""); + return def_context_oop; +} // Support for java_security_AccessControlContext diff --git a/hotspot/src/share/vm/classfile/javaClasses.hpp b/hotspot/src/share/vm/classfile/javaClasses.hpp index 4c2759466f8..85a5ce6f307 100644 --- a/hotspot/src/share/vm/classfile/javaClasses.hpp +++ b/hotspot/src/share/vm/classfile/javaClasses.hpp @@ -961,7 +961,6 @@ class java_lang_ref_SoftReference: public java_lang_ref_Reference { static void set_clock(jlong value); }; - // Interface to java.lang.invoke.MethodHandle objects class MethodHandleEntry; @@ -1173,16 +1172,25 @@ class java_lang_invoke_CallSite: AllStatic { private: static int _target_offset; + static int _context_offset; + static int _default_context_offset; + static void compute_offsets(); public: // Accessors - static oop target( oop site); - static void set_target( oop site, oop target); + static oop target( oop site); + static void set_target( oop site, oop target); - static volatile oop target_volatile(oop site); - static void set_target_volatile(oop site, oop target); + static volatile oop target_volatile( oop site); + static void set_target_volatile( oop site, oop target); + + static oop context_volatile(oop site); + static void set_context_volatile(oop site, oop context); + static bool set_context_cas (oop site, oop context, oop expected); + + static oop default_context(); // Testers static bool is_subclass(Klass* klass) { @@ -1194,7 +1202,6 @@ public: static int target_offset_in_bytes() { return _target_offset; } }; - // Interface to java.security.AccessControlContext objects class java_security_AccessControlContext: AllStatic { diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index b550b04b78b..943eb27050d 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -292,6 +292,7 @@ template(setTargetNormal_name, "setTargetNormal") \ template(setTargetVolatile_name, "setTargetVolatile") \ template(setTarget_signature, "(Ljava/lang/invoke/MethodHandle;)V") \ + template(DEFAULT_CONTEXT_name, "DEFAULT_CONTEXT") \ NOT_LP64( do_alias(intptr_signature, int_signature) ) \ LP64_ONLY( do_alias(intptr_signature, long_signature) ) \ \ @@ -501,6 +502,7 @@ template(class_signature, "Ljava/lang/Class;") \ template(string_signature, "Ljava/lang/String;") \ template(reference_signature, "Ljava/lang/ref/Reference;") \ + template(sun_misc_Cleaner_signature, "Lsun/misc/Cleaner;") \ template(executable_signature, "Ljava/lang/reflect/Executable;") \ template(concurrenthashmap_signature, "Ljava/util/concurrent/ConcurrentHashMap;") \ template(String_StringBuilder_signature, "(Ljava/lang/String;)Ljava/lang/StringBuilder;") \ @@ -554,7 +556,7 @@ template(createGarbageCollectorMBean_signature, "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/management/GarbageCollectorMBean;") \ template(trigger_name, "trigger") \ template(clear_name, "clear") \ - template(trigger_method_signature, "(ILjava/lang/management/MemoryUsage;)V") \ + template(trigger_method_signature, "(ILjava/lang/management/MemoryUsage;)V") \ template(startAgent_name, "startAgent") \ template(startRemoteAgent_name, "startRemoteManagementAgent") \ template(startLocalAgent_name, "startLocalManagementAgent") \ diff --git a/hotspot/src/share/vm/code/codeCache.cpp b/hotspot/src/share/vm/code/codeCache.cpp index 25323c595f6..7e831bf3249 100644 --- a/hotspot/src/share/vm/code/codeCache.cpp +++ b/hotspot/src/share/vm/code/codeCache.cpp @@ -1067,8 +1067,11 @@ void CodeCache::flush_dependents_on(Handle call_site, Handle method_handle) { int marked = 0; { MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag); - InstanceKlass* call_site_klass = InstanceKlass::cast(call_site->klass()); - marked = call_site_klass->mark_dependent_nmethods(changes); + InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site()); + if (ctxk == NULL) { + return; // No dependencies to invalidate yet. + } + marked = ctxk->mark_dependent_nmethods(changes); } if (marked > 0) { // At least one nmethod has been marked for deoptimization diff --git a/hotspot/src/share/vm/code/dependencies.cpp b/hotspot/src/share/vm/code/dependencies.cpp index 6c94286c5a6..1f56103be5b 100644 --- a/hotspot/src/share/vm/code/dependencies.cpp +++ b/hotspot/src/share/vm/code/dependencies.cpp @@ -117,8 +117,9 @@ void Dependencies::assert_has_no_finalizable_subclasses(ciKlass* ctxk) { } void Dependencies::assert_call_site_target_value(ciCallSite* call_site, ciMethodHandle* method_handle) { - check_ctxk(call_site->klass()); - assert_common_2(call_site_target_value, call_site, method_handle); + ciKlass* ctxk = call_site->get_context(); + check_ctxk(ctxk); + assert_common_3(call_site_target_value, ctxk, call_site, method_handle); } // Helper function. If we are adding a new dep. under ctxk2, @@ -388,7 +389,7 @@ int Dependencies::_dep_args[TYPE_LIMIT] = { 3, // unique_concrete_subtypes_2 ctxk, k1, k2 3, // unique_concrete_methods_2 ctxk, m1, m2 1, // no_finalizable_subclasses ctxk - 2 // call_site_target_value call_site, method_handle + 3 // call_site_target_value ctxk, call_site, method_handle }; const char* Dependencies::dep_name(Dependencies::DepType dept) { @@ -594,7 +595,7 @@ void Dependencies::DepStream::log_dependency(Klass* witness) { const int nargs = argument_count(); GrowableArray* args = new GrowableArray(nargs); for (int j = 0; j < nargs; j++) { - if (type() == call_site_target_value) { + if (is_oop_argument(j)) { args->push(argument_oop(j)); } else { args->push(argument(j)); @@ -614,7 +615,7 @@ void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) { int nargs = argument_count(); GrowableArray* args = new GrowableArray(nargs); for (int j = 0; j < nargs; j++) { - if (type() == call_site_target_value) { + if (is_oop_argument(j)) { args->push(argument_oop(j)); } else { args->push(argument(j)); @@ -710,7 +711,7 @@ Metadata* Dependencies::DepStream::argument(int i) { * Returns a unique identifier for each dependency argument. */ uintptr_t Dependencies::DepStream::get_identifier(int i) { - if (has_oop_argument()) { + if (is_oop_argument(i)) { return (uintptr_t)(oopDesc*)argument_oop(i); } else { return (uintptr_t)argument(i); @@ -737,7 +738,7 @@ Klass* Dependencies::DepStream::context_type() { } // Some dependencies are using the klass of the first object - // argument as implicit context type (e.g. call_site_target_value). + // argument as implicit context type. { int ctxkj = dep_implicit_context_arg(type()); if (ctxkj >= 0) { @@ -1514,9 +1515,16 @@ Klass* Dependencies::check_has_no_finalizable_subclasses(Klass* ctxk, KlassDepCh return find_finalizable_subclass(search_at); } -Klass* Dependencies::check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes) { - assert(call_site ->is_a(SystemDictionary::CallSite_klass()), "sanity"); - assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "sanity"); +Klass* Dependencies::check_call_site_target_value(Klass* recorded_ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes) { + assert(call_site->is_a(SystemDictionary::CallSite_klass()), "sanity"); + assert(!oopDesc::is_null(method_handle), "sanity"); + + Klass* call_site_ctxk = MethodHandles::get_call_site_context(call_site); + assert(!Klass::is_null(call_site_ctxk), "call site context should be initialized already"); + if (recorded_ctxk != call_site_ctxk) { + // Stale context + return recorded_ctxk; + } if (changes == NULL) { // Validate all CallSites if (java_lang_invoke_CallSite::target(call_site) != method_handle) @@ -1531,7 +1539,6 @@ Klass* Dependencies::check_call_site_target_value(oop call_site, oop method_hand return NULL; // assertion still valid } - void Dependencies::DepStream::trace_and_log_witness(Klass* witness) { if (witness != NULL) { if (TraceDependencies) { @@ -1592,7 +1599,7 @@ Klass* Dependencies::DepStream::check_call_site_dependency(CallSiteDepChange* ch Klass* witness = NULL; switch (type()) { case call_site_target_value: - witness = check_call_site_target_value(argument_oop(0), argument_oop(1), changes); + witness = check_call_site_target_value(context_type(), argument_oop(1), argument_oop(2), changes); break; default: witness = NULL; diff --git a/hotspot/src/share/vm/code/dependencies.hpp b/hotspot/src/share/vm/code/dependencies.hpp index 421ecc767d1..cacc5cac095 100644 --- a/hotspot/src/share/vm/code/dependencies.hpp +++ b/hotspot/src/share/vm/code/dependencies.hpp @@ -174,7 +174,7 @@ class Dependencies: public ResourceObj { klass_types = all_types & ~non_klass_types, non_ctxk_types = (1 << evol_method), - implicit_ctxk_types = (1 << call_site_target_value), + implicit_ctxk_types = 0, explicit_ctxk_types = all_types & ~(non_ctxk_types | implicit_ctxk_types), max_arg_count = 3, // current maximum number of arguments (incl. ctxk) @@ -330,7 +330,7 @@ class Dependencies: public ResourceObj { static Klass* check_exclusive_concrete_methods(Klass* ctxk, Method* m1, Method* m2, KlassDepChange* changes = NULL); static Klass* check_has_no_finalizable_subclasses(Klass* ctxk, KlassDepChange* changes = NULL); - static Klass* check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes = NULL); + static Klass* check_call_site_target_value(Klass* recorded_ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes = NULL); // A returned Klass* is NULL if the dependency assertion is still // valid. A non-NULL Klass* is a 'witness' to the assertion // failure, a point in the class hierarchy where the assertion has @@ -496,7 +496,7 @@ class Dependencies: public ResourceObj { bool next(); DepType type() { return _type; } - bool has_oop_argument() { return type() == call_site_target_value; } + bool is_oop_argument(int i) { return type() == call_site_target_value && i > 0; } uintptr_t get_identifier(int i); int argument_count() { return dep_args(type()); } @@ -682,7 +682,7 @@ class CallSiteDepChange : public DepChange { _method_handle(method_handle) { assert(_call_site() ->is_a(SystemDictionary::CallSite_klass()), "must be"); - assert(_method_handle()->is_a(SystemDictionary::MethodHandle_klass()), "must be"); + assert(_method_handle.is_null() || _method_handle()->is_a(SystemDictionary::MethodHandle_klass()), "must be"); } // What kind of DepChange is this? diff --git a/hotspot/src/share/vm/code/nmethod.cpp b/hotspot/src/share/vm/code/nmethod.cpp index c2697277a62..03868e2d603 100644 --- a/hotspot/src/share/vm/code/nmethod.cpp +++ b/hotspot/src/share/vm/code/nmethod.cpp @@ -2325,6 +2325,7 @@ void nmethod::check_all_dependencies(DepChange& changes) { // Dependency checking failed. Print out information about the failed // dependency and finally fail with an assert. We can fail here, since // dependency checking is never done in a product build. + tty->print_cr("Failed dependency:"); changes.print(); nm->print(); nm->print_dependencies(); diff --git a/hotspot/src/share/vm/prims/methodHandles.cpp b/hotspot/src/share/vm/prims/methodHandles.cpp index b9daf4e9307..da2f055d1ea 100644 --- a/hotspot/src/share/vm/prims/methodHandles.cpp +++ b/hotspot/src/share/vm/prims/methodHandles.cpp @@ -939,6 +939,24 @@ int MethodHandles::find_MemberNames(KlassHandle k, return rfill + overflow; } +// Get context class for a CallSite instance: either extract existing context or use default one. +InstanceKlass* MethodHandles::get_call_site_context(oop call_site) { + // In order to extract a context the following traversal is performed: + // CallSite.context => Cleaner.referent => Class._klass => Klass + assert(java_lang_invoke_CallSite::is_instance(call_site), ""); + oop context_oop = java_lang_invoke_CallSite::context_volatile(call_site); + if (oopDesc::is_null(context_oop)) { + return NULL; // The context hasn't been initialized yet. + } + oop context_class_oop = java_lang_ref_Reference::referent(context_oop); + if (oopDesc::is_null(context_class_oop)) { + // The context reference was cleared by GC, so current dependency context + // isn't usable anymore. Context should be fetched from CallSite again. + return NULL; + } + return InstanceKlass::cast(java_lang_Class::as_Klass(context_class_oop)); +} + //------------------------------------------------------------------------------ // MemberNameTable // @@ -1231,7 +1249,7 @@ JVM_END JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) { Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh)); - Handle target (THREAD, JNIHandles::resolve(target_jh)); + Handle target (THREAD, JNIHandles::resolve_non_null(target_jh)); { // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); @@ -1243,7 +1261,7 @@ JVM_END JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) { Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh)); - Handle target (THREAD, JNIHandles::resolve(target_jh)); + Handle target (THREAD, JNIHandles::resolve_non_null(target_jh)); { // Walk all nmethods depending on this call site. MutexLocker mu(Compile_lock, thread); @@ -1253,6 +1271,33 @@ JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobjec } JVM_END +JVM_ENTRY(void, MHN_invalidateDependentNMethods(JNIEnv* env, jobject igcls, jobject call_site_jh)) { + Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh)); + { + // Walk all nmethods depending on this call site. + MutexLocker mu1(Compile_lock, thread); + + CallSiteDepChange changes(call_site(), Handle()); + + InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site()); + if (ctxk == NULL) { + return; // No dependencies to invalidate yet. + } + int marked = 0; + { + MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag); + marked = ctxk->mark_dependent_nmethods(changes); + } + java_lang_invoke_CallSite::set_context_volatile(call_site(), NULL); // Reset call site to initial state + if (marked > 0) { + // At least one nmethod has been marked for deoptimization + VM_Deoptimize op; + VMThread::execute(&op); + } + } +} +JVM_END + /** * Throws a java/lang/UnsupportedOperationException unconditionally. * This is required by the specification of MethodHandle.invoke if @@ -1306,6 +1351,7 @@ static JNINativeMethod MHN_methods[] = { {CC"objectFieldOffset", CC"("MEM")J", FN_PTR(MHN_objectFieldOffset)}, {CC"setCallSiteTargetNormal", CC"("CS""MH")V", FN_PTR(MHN_setCallSiteTargetNormal)}, {CC"setCallSiteTargetVolatile", CC"("CS""MH")V", FN_PTR(MHN_setCallSiteTargetVolatile)}, + {CC"invalidateDependentNMethods", CC"("CS")V", FN_PTR(MHN_invalidateDependentNMethods)}, {CC"staticFieldOffset", CC"("MEM")J", FN_PTR(MHN_staticFieldOffset)}, {CC"staticFieldBase", CC"("MEM")"OBJ, FN_PTR(MHN_staticFieldBase)}, {CC"getMemberVMInfo", CC"("MEM")"OBJ, FN_PTR(MHN_getMemberVMInfo)} diff --git a/hotspot/src/share/vm/prims/methodHandles.hpp b/hotspot/src/share/vm/prims/methodHandles.hpp index 24d949ad7d5..bf83724b2eb 100644 --- a/hotspot/src/share/vm/prims/methodHandles.hpp +++ b/hotspot/src/share/vm/prims/methodHandles.hpp @@ -68,6 +68,9 @@ class MethodHandles: AllStatic { // bit values for suppress argument to expand_MemberName: enum { _suppress_defc = 1, _suppress_name = 2, _suppress_type = 4 }; + // CallSite support + static InstanceKlass* get_call_site_context(oop call_site); + // Generate MethodHandles adapters. static bool generate_adapters(); diff --git a/hotspot/test/compiler/jsr292/CallSiteDepContextTest.java b/hotspot/test/compiler/jsr292/CallSiteDepContextTest.java new file mode 100644 index 00000000000..11e46ed03fd --- /dev/null +++ b/hotspot/test/compiler/jsr292/CallSiteDepContextTest.java @@ -0,0 +1,179 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8057967 + * @run main/bootclasspath -Xbatch java.lang.invoke.CallSiteDepContextTest + */ +package java.lang.invoke; + +import java.lang.ref.*; +import jdk.internal.org.objectweb.asm.*; +import sun.misc.Unsafe; + +import static jdk.internal.org.objectweb.asm.Opcodes.*; + +public class CallSiteDepContextTest { + static final Unsafe UNSAFE = Unsafe.getUnsafe(); + static final MethodHandles.Lookup LOOKUP = MethodHandles.Lookup.IMPL_LOOKUP; + static final String CLASS_NAME = "java/lang/invoke/Test"; + static final String METHOD_NAME = "m"; + static final MethodType TYPE = MethodType.methodType(int.class); + + static MutableCallSite mcs; + static MethodHandle bsmMH; + + static { + try { + bsmMH = LOOKUP.findStatic( + CallSiteDepContextTest.class, "bootstrap", + MethodType.methodType(CallSite.class, MethodHandles.Lookup.class, String.class, MethodType.class)); + } catch(Throwable e) { + throw new InternalError(e); + } + } + + public static CallSite bootstrap(MethodHandles.Lookup caller, + String invokedName, + MethodType invokedType) { + return mcs; + } + + static class T { + static int f1() { return 1; } + static int f2() { return 2; } + } + + static byte[] getClassFile(String suffix) { + ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS); + MethodVisitor mv; + cw.visit(52, ACC_PUBLIC | ACC_SUPER, CLASS_NAME + suffix, null, "java/lang/Object", null); + { + mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, METHOD_NAME, TYPE.toMethodDescriptorString(), null, null); + mv.visitCode(); + Handle bsm = new Handle(H_INVOKESTATIC, + "java/lang/invoke/CallSiteDepContextTest", "bootstrap", + bsmMH.type().toMethodDescriptorString()); + mv.visitInvokeDynamicInsn("methodName", TYPE.toMethodDescriptorString(), bsm); + mv.visitInsn(IRETURN); + mv.visitMaxs(0, 0); + mv.visitEnd(); + } + cw.visitEnd(); + return cw.toByteArray(); + } + + private static void execute(int expected, MethodHandle... mhs) throws Throwable { + for (int i = 0; i < 20_000; i++) { + for (MethodHandle mh : mhs) { + int r = (int) mh.invokeExact(); + if (r != expected) { + throw new Error(r + " != " + expected); + } + } + } + } + + public static void testSharedCallSite() throws Throwable { + Class cls1 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("CS_1"), null); + Class cls2 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("CS_2"), null); + + MethodHandle[] mhs = new MethodHandle[] { + LOOKUP.findStatic(cls1, METHOD_NAME, TYPE), + LOOKUP.findStatic(cls2, METHOD_NAME, TYPE) + }; + + mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE)); + execute(1, mhs); + mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE)); + execute(2, mhs); + } + + public static void testNonBoundCallSite() throws Throwable { + mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE)); + + // mcs.context == null + MethodHandle mh = mcs.dynamicInvoker(); + execute(1, mh); + + // mcs.context == cls1 + Class cls1 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("NonBound_1"), null); + MethodHandle mh1 = LOOKUP.findStatic(cls1, METHOD_NAME, TYPE); + + execute(1, mh1); + + mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE)); + + execute(2, mh, mh1); + } + + static ReferenceQueue rq = new ReferenceQueue(); + static PhantomReference ref; + + public static void testGC() throws Throwable { + mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE)); + + Class[] cls = new Class[] { + UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_1"), null), + UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_2"), null), + }; + + MethodHandle[] mhs = new MethodHandle[] { + LOOKUP.findStatic(cls[0], METHOD_NAME, TYPE), + LOOKUP.findStatic(cls[1], METHOD_NAME, TYPE), + }; + + // mcs.context == cls[0] + int r = (int) mhs[0].invokeExact(); + + execute(1, mhs); + + ref = new PhantomReference<>(cls[0], rq); + cls[0] = UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_3"), null); + mhs[0] = LOOKUP.findStatic(cls[0], METHOD_NAME, TYPE); + + do { + System.gc(); + try { + Reference ref1 = rq.remove(1000); + if (ref1 == ref) { + ref1.clear(); + System.gc(); // Ensure that the stale context is cleared + break; + } + } catch(InterruptedException e) { /* ignore */ } + } while (true); + + execute(1, mhs); + mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE)); + execute(2, mhs); + } + + public static void main(String[] args) throws Throwable { + testSharedCallSite(); + testNonBoundCallSite(); + testGC(); + System.out.println("TEST PASSED"); + } +} From 22425d93ca721069bd10dcaff4f97739bee70e10 Mon Sep 17 00:00:00 2001 From: Goetz Lindenmaier Date: Wed, 15 Apr 2015 14:51:56 +0200 Subject: [PATCH 03/13] 8077843: adlc: allow nodes that use TEMP inputs in expand rules Reviewed-by: kvn --- hotspot/src/share/vm/adlc/output_c.cpp | 27 +++++++++++--------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/hotspot/src/share/vm/adlc/output_c.cpp b/hotspot/src/share/vm/adlc/output_c.cpp index c9f925616b7..cc98ef59af4 100644 --- a/hotspot/src/share/vm/adlc/output_c.cpp +++ b/hotspot/src/share/vm/adlc/output_c.cpp @@ -1505,8 +1505,8 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { // Iterate over the instructions 'node' expands into ExpandRule *expand = node->_exprule; NameAndList *expand_instr = NULL; - for(expand->reset_instructions(); - (expand_instr = expand->iter_instructions()) != NULL; cnt++) { + for (expand->reset_instructions(); + (expand_instr = expand->iter_instructions()) != NULL; cnt++) { new_id = expand_instr->name(); InstructForm* expand_instruction = (InstructForm*)globalAD->globalNames()[new_id]; @@ -1517,30 +1517,25 @@ void ArchDesc::defineExpand(FILE *fp, InstructForm *node) { continue; } - if (expand_instruction->has_temps()) { - globalAD->syntax_err(node->_linenum, "In %s: expand rules using instructs with TEMPs aren't supported: %s", - node->_ident, new_id); - } - // Build the node for the instruction fprintf(fp,"\n %sNode *n%d = new %sNode();\n", new_id, cnt, new_id); // Add control edge for this node fprintf(fp," n%d->add_req(_in[0]);\n", cnt); // Build the operand for the value this node defines. Form *form = (Form*)_globalNames[new_id]; - assert( form, "'new_id' must be a defined form name"); + assert(form, "'new_id' must be a defined form name"); // Grab the InstructForm for the new instruction new_inst = form->is_instruction(); - assert( new_inst, "'new_id' must be an instruction name"); - if( node->is_ideal_if() && new_inst->is_ideal_if() ) { - fprintf(fp, " ((MachIfNode*)n%d)->_prob = _prob;\n",cnt); - fprintf(fp, " ((MachIfNode*)n%d)->_fcnt = _fcnt;\n",cnt); + assert(new_inst, "'new_id' must be an instruction name"); + if (node->is_ideal_if() && new_inst->is_ideal_if()) { + fprintf(fp, " ((MachIfNode*)n%d)->_prob = _prob;\n", cnt); + fprintf(fp, " ((MachIfNode*)n%d)->_fcnt = _fcnt;\n", cnt); } - if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) { - fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt); - fprintf(fp, " ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt); - fprintf(fp, " ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt); + if (node->is_ideal_fastlock() && new_inst->is_ideal_fastlock()) { + fprintf(fp, " ((MachFastLockNode*)n%d)->_counters = _counters;\n", cnt); + fprintf(fp, " ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n", cnt); + fprintf(fp, " ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n", cnt); } // Fill in the bottom_type where requested From 51fd71697817c07a0579e3904cecc501bf0aa4bf Mon Sep 17 00:00:00 2001 From: Sandhya Viswanathan Date: Fri, 17 Apr 2015 17:39:19 -0700 Subject: [PATCH 04/13] 8078113: 8011102 changes may cause incorrect results Replace Vzeroupper instruction in stubs with zeroing only used ymm registers. Reviewed-by: kvn --- hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 16 +++++++++++----- hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp | 3 ++- hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp | 6 ++++-- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 32af915065d..9663bd78893 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -6657,7 +6657,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, subl(cnt2, stride2); jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); // compare wide vectors tail bind(COMPARE_WIDE_TAIL); @@ -6672,7 +6672,7 @@ void MacroAssembler::string_compare(Register str1, Register str2, // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors. bind(VECTOR_NOT_EQUAL); // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); lea(str1, Address(str1, result, scale)); lea(str2, Address(str2, result, scale)); jmp(COMPARE_16_CHARS); @@ -6931,7 +6931,8 @@ void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Regist bind(DONE); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(vec1, vec1); + vpxor(vec2, vec2); } } @@ -7065,7 +7066,8 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, BIND(L_check_fill_8_bytes); // clean upper bits of YMM registers - vzeroupper(); + movdl(xtmp, value); + pshufd(xtmp, xtmp, 0); } else { // Fill 32-byte chunks pshufd(xtmp, xtmp, 0); @@ -7228,7 +7230,11 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, bind(L_copy_16_chars_exit); if (UseAVX >= 2) { // clean upper bits of YMM registers - vzeroupper(); + vpxor(tmp2Reg, tmp2Reg); + vpxor(tmp3Reg, tmp3Reg); + vpxor(tmp4Reg, tmp4Reg); + movdl(tmp1Reg, tmp5); + pshufd(tmp1Reg, tmp1Reg, 0); } subptr(len, 8); jccb(Assembler::greater, L_copy_8_chars_exit); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp index 3d8370f2c5e..9cbffb37176 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -835,7 +835,8 @@ class StubGenerator: public StubCodeGenerator { if (UseUnalignedLoadStores && (UseAVX >= 2)) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } __ addl(qword_count, 8); __ jccb(Assembler::zero, L_exit); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 122f94b5d20..5ac6727dce0 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -1352,7 +1352,8 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration @@ -1429,7 +1430,8 @@ class StubGenerator: public StubCodeGenerator { __ BIND(L_end); if (UseAVX >= 2) { // clean upper bits of YMM registers - __ vzeroupper(); + __ vpxor(xmm0, xmm0); + __ vpxor(xmm1, xmm1); } } else { // Copy 32-bytes per iteration From 05ea4dbf1e21cea357bcfcec7f63b2ef639aab8f Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Tue, 24 Mar 2015 10:25:09 +0100 Subject: [PATCH 05/13] 8069191: moving predicate out of loops may cause array accesses to bypass null check Remove CastPP nodes only during final graph reshape Reviewed-by: kvn, jrose --- hotspot/src/share/vm/opto/castnode.cpp | 31 --- hotspot/src/share/vm/opto/castnode.hpp | 6 - hotspot/src/share/vm/opto/compile.cpp | 43 +++- hotspot/src/share/vm/opto/gcm.cpp | 39 ++++ hotspot/src/share/vm/opto/matcher.cpp | 22 ++ hotspot/src/share/vm/opto/matcher.hpp | 2 + hotspot/src/share/vm/opto/memnode.cpp | 210 ------------------ hotspot/src/share/vm/opto/memnode.hpp | 4 - hotspot/src/share/vm/opto/narrowptrnode.cpp | 4 - hotspot/src/share/vm/opto/narrowptrnode.hpp | 1 - hotspot/src/share/vm/opto/node.cpp | 14 +- hotspot/src/share/vm/opto/node.hpp | 6 +- hotspot/src/share/vm/opto/phaseX.cpp | 26 --- .../loopopts/TestPredicateLostDependency.java | 84 +++++++ 14 files changed, 197 insertions(+), 295 deletions(-) create mode 100644 hotspot/test/compiler/loopopts/TestPredicateLostDependency.java diff --git a/hotspot/src/share/vm/opto/castnode.cpp b/hotspot/src/share/vm/opto/castnode.cpp index 3a36c6676a1..e20d13a3262 100644 --- a/hotspot/src/share/vm/opto/castnode.cpp +++ b/hotspot/src/share/vm/opto/castnode.cpp @@ -73,16 +73,6 @@ Node *ConstraintCastNode::Ideal(PhaseGVN *phase, bool can_reshape){ return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL; } -//------------------------------Ideal_DU_postCCP------------------------------- -// Throw away cast after constant propagation -Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - const Type *t = ccp->type(in(1)); - ccp->hash_delete(this); - set_type(t); // Turn into ID function - ccp->hash_insert(this); - return this; -} - uint CastIINode::size_of() const { return sizeof(*this); } @@ -164,13 +154,6 @@ const Type *CastIINode::Value(PhaseTransform *phase) const { return res; } -Node *CastIINode::Ideal_DU_postCCP(PhaseCCP *ccp) { - if (_carry_dependency) { - return NULL; - } - return ConstraintCastNode::Ideal_DU_postCCP(ccp); -} - #ifndef PRODUCT void CastIINode::dump_spec(outputStream *st) const { TypeNode::dump_spec(st); @@ -180,20 +163,6 @@ void CastIINode::dump_spec(outputStream *st) const { } #endif -//============================================================================= - -//------------------------------Ideal_DU_postCCP------------------------------- -// If not converting int->oop, throw away cast after constant propagation -Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - const Type *t = ccp->type(in(1)); - if (!t->isa_oop_ptr() || ((in(1)->is_DecodeN()) && Matcher::gen_narrow_oop_implicit_null_checks())) { - return NULL; // do not transform raw pointers or narrow oops - } - return ConstraintCastNode::Ideal_DU_postCCP(ccp); -} - - - //============================================================================= //------------------------------Identity--------------------------------------- // If input is already higher or equal to cast type, then this is an identity. diff --git a/hotspot/src/share/vm/opto/castnode.hpp b/hotspot/src/share/vm/opto/castnode.hpp index 8b79562b045..535b0e6610b 100644 --- a/hotspot/src/share/vm/opto/castnode.hpp +++ b/hotspot/src/share/vm/opto/castnode.hpp @@ -42,7 +42,6 @@ class ConstraintCastNode: public TypeNode { virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual int Opcode() const; virtual uint ideal_reg() const = 0; - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; //------------------------------CastIINode------------------------------------- @@ -63,7 +62,6 @@ class CastIINode: public ConstraintCastNode { virtual uint ideal_reg() const { return Op_RegI; } virtual Node *Identity( PhaseTransform *phase ); virtual const Type *Value( PhaseTransform *phase ) const; - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); #ifndef PRODUCT virtual void dump_spec(outputStream *st) const; #endif @@ -76,7 +74,6 @@ class CastPPNode: public ConstraintCastNode { CastPPNode (Node *n, const Type *t ): ConstraintCastNode(n, t) {} virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegP; } - virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; //------------------------------CheckCastPPNode-------------------------------- @@ -94,9 +91,6 @@ class CheckCastPPNode: public TypeNode { virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); virtual int Opcode() const; virtual uint ideal_reg() const { return Op_RegP; } - // No longer remove CheckCast after CCP as it gives me a place to hang - // the proper address type - which is required to compute anti-deps. - //virtual Node *Ideal_DU_postCCP( PhaseCCP * ); }; diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index d8f08b59f65..df2074dd710 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -2811,9 +2811,38 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { break; } -#ifdef _LP64 - case Op_CastPP: - if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { + case Op_CastPP: { + // Remove CastPP nodes to gain more freedom during scheduling but + // keep the dependency they encode as control or precedence edges + // (if control is set already) on memory operations. Some CastPP + // nodes don't have a control (don't carry a dependency): skip + // those. + if (n->in(0) != NULL) { + ResourceMark rm; + Unique_Node_List wq; + wq.push(n); + for (uint next = 0; next < wq.size(); ++next) { + Node *m = wq.at(next); + for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) { + Node* use = m->fast_out(i); + if (use->is_Mem() || use->is_EncodeNarrowPtr()) { + use->ensure_control_or_add_prec(n->in(0)); + } else if (use->in(0) == NULL) { + switch(use->Opcode()) { + case Op_AddP: + case Op_DecodeN: + case Op_DecodeNKlass: + case Op_CheckCastPP: + case Op_CastPP: + wq.push(use); + break; + } + } + } + } + } + const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); + if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) { Node* in1 = n->in(1); const Type* t = n->bottom_type(); Node* new_in1 = in1->clone(); @@ -2846,9 +2875,15 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { if (in1->outcnt() == 0) { in1->disconnect_inputs(NULL, this); } + } else { + n->subsume_by(n->in(1), this); + if (n->outcnt() == 0) { + n->disconnect_inputs(NULL, this); + } } break; - + } +#ifdef _LP64 case Op_CmpP: // Do this transformation here to preserve CmpPNode::sub() and // other TypePtr related Ideal optimizations (for example, ptr nullness). diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index 0486bf6885b..4dab736ac91 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -100,6 +100,9 @@ void PhaseCFG::replace_block_proj_ctrl( Node *n ) { } } +static bool is_dominator(Block* d, Block* n) { + return d->dom_lca(n) == d; +} //------------------------------schedule_pinned_nodes-------------------------- // Set the basic block for Nodes pinned into blocks @@ -122,6 +125,42 @@ void PhaseCFG::schedule_pinned_nodes(VectorSet &visited) { schedule_node_into_block(node, block); } + // If the node has precedence edges (added when CastPP nodes are + // removed in final_graph_reshaping), fix the control of the + // node to cover the precedence edges and remove the + // dependencies. + Node* n = NULL; + for (uint i = node->len()-1; i >= node->req(); i--) { + Node* m = node->in(i); + if (m == NULL) continue; + // Skip the precedence edge if the test that guarded a CastPP: + // - was optimized out during escape analysis + // (OptimizePtrCompare): the CastPP's control isn't an end of + // block. + // - is moved in the branch of a dominating If: the control of + // the CastPP is then a Region. + if (m->is_block_proj() || m->is_block_start()) { + node->rm_prec(i); + if (n == NULL) { + n = m; + } else { + Block* bn = get_block_for_node(n); + Block* bm = get_block_for_node(m); + assert(is_dominator(bn, bm) || is_dominator(bm, bn), "one must dominate the other"); + n = is_dominator(bn, bm) ? m : n; + } + } + } + if (n != NULL) { + assert(node->in(0), "control should have been set"); + Block* bn = get_block_for_node(n); + Block* bnode = get_block_for_node(node->in(0)); + assert(is_dominator(bn, bnode) || is_dominator(bnode, bn), "one must dominate the other"); + if (!is_dominator(bn, bnode)) { + node->set_req(0, n); + } + } + // process all inputs that are non NULL for (int i = node->req() - 1; i >= 0; --i) { if (node->in(i) != NULL) { diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index a0bf3eea694..e47ee7f0551 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -1049,6 +1049,15 @@ Node *Matcher::xform( Node *n, int max_stack ) { mstack.push(m, Visit, n, -1); } + // Handle precedence edges for interior nodes + for (i = n->len()-1; (uint)i >= n->req(); i--) { + Node *m = n->in(i); + if (m == NULL || C->node_arena()->contains(m)) continue; + n->rm_prec(i); + // set -1 to call add_prec() instead of set_req() during Step1 + mstack.push(m, Visit, n, -1); + } + // For constant debug info, I'd rather have unmatched constants. int cnt = n->req(); JVMState* jvms = n->jvms(); @@ -1738,6 +1747,14 @@ MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) { return ex; } +void Matcher::handle_precedence_edges(Node* n, MachNode *mach) { + for (uint i = n->req(); i < n->len(); i++) { + if (n->in(i) != NULL) { + mach->add_prec(n->in(i)); + } + } +} + void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) { // 'op' is what I am expecting to receive int op = _leftOp[rule]; @@ -1772,6 +1789,8 @@ void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *m uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) { + handle_precedence_edges(s->_leaf, mach); + if( s->_leaf->is_Load() ) { Node *mem2 = s->_leaf->in(MemNode::Memory); assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" ); @@ -1854,6 +1873,9 @@ void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) { mem = s->_leaf->in(MemNode::Memory); debug_only(_mem_node = s->_leaf;) } + + handle_precedence_edges(s->_leaf, mach); + if( s->_leaf->in(0) && s->_leaf->req() > 1) { if( !mach->in(0) ) mach->set_req(0,s->_leaf->in(0)); diff --git a/hotspot/src/share/vm/opto/matcher.hpp b/hotspot/src/share/vm/opto/matcher.hpp index d88771aeda6..41c6759b9eb 100644 --- a/hotspot/src/share/vm/opto/matcher.hpp +++ b/hotspot/src/share/vm/opto/matcher.hpp @@ -124,6 +124,8 @@ class Matcher : public PhaseTransform { // Mach node for ConP #NULL MachNode* _mach_null; + void handle_precedence_edges(Node* n, MachNode *mach); + public: int LabelRootDepth; // Convert ideal machine register to a register mask for spill-loads diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp index 683ae584710..81bb5ab81e7 100644 --- a/hotspot/src/share/vm/opto/memnode.cpp +++ b/hotspot/src/share/vm/opto/memnode.cpp @@ -652,216 +652,6 @@ const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_c } } -//------------------------adr_phi_is_loop_invariant---------------------------- -// A helper function for Ideal_DU_postCCP to check if a Phi in a counted -// loop is loop invariant. Make a quick traversal of Phi and associated -// CastPP nodes, looking to see if they are a closed group within the loop. -bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) { - // The idea is that the phi-nest must boil down to only CastPP nodes - // with the same data. This implies that any path into the loop already - // includes such a CastPP, and so the original cast, whatever its input, - // must be covered by an equivalent cast, with an earlier control input. - ResourceMark rm; - - // The loop entry input of the phi should be the unique dominating - // node for every Phi/CastPP in the loop. - Unique_Node_List closure; - closure.push(adr_phi->in(LoopNode::EntryControl)); - - // Add the phi node and the cast to the worklist. - Unique_Node_List worklist; - worklist.push(adr_phi); - if( cast != NULL ){ - if( !cast->is_ConstraintCast() ) return false; - worklist.push(cast); - } - - // Begin recursive walk of phi nodes. - while( worklist.size() ){ - // Take a node off the worklist - Node *n = worklist.pop(); - if( !closure.member(n) ){ - // Add it to the closure. - closure.push(n); - // Make a sanity check to ensure we don't waste too much time here. - if( closure.size() > 20) return false; - // This node is OK if: - // - it is a cast of an identical value - // - or it is a phi node (then we add its inputs to the worklist) - // Otherwise, the node is not OK, and we presume the cast is not invariant - if( n->is_ConstraintCast() ){ - worklist.push(n->in(1)); - } else if( n->is_Phi() ) { - for( uint i = 1; i < n->req(); i++ ) { - worklist.push(n->in(i)); - } - } else { - return false; - } - } - } - - // Quit when the worklist is empty, and we've found no offending nodes. - return true; -} - -//------------------------------Ideal_DU_postCCP------------------------------- -// Find any cast-away of null-ness and keep its control. Null cast-aways are -// going away in this pass and we need to make this memory op depend on the -// gating null check. -Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - return Ideal_common_DU_postCCP(ccp, this, in(MemNode::Address)); -} - -// I tried to leave the CastPP's in. This makes the graph more accurate in -// some sense; we get to keep around the knowledge that an oop is not-null -// after some test. Alas, the CastPP's interfere with GVN (some values are -// the regular oop, some are the CastPP of the oop, all merge at Phi's which -// cannot collapse, etc). This cost us 10% on SpecJVM, even when I removed -// some of the more trivial cases in the optimizer. Removing more useless -// Phi's started allowing Loads to illegally float above null checks. I gave -// up on this approach. CNC 10/20/2000 -// This static method may be called not from MemNode (EncodePNode calls it). -// Only the control edge of the node 'n' might be updated. -Node *MemNode::Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ) { - Node *skipped_cast = NULL; - // Need a null check? Regular static accesses do not because they are - // from constant addresses. Array ops are gated by the range check (which - // always includes a NULL check). Just check field ops. - if( n->in(MemNode::Control) == NULL ) { - // Scan upwards for the highest location we can place this memory op. - while( true ) { - switch( adr->Opcode() ) { - - case Op_AddP: // No change to NULL-ness, so peek thru AddP's - adr = adr->in(AddPNode::Base); - continue; - - case Op_DecodeN: // No change to NULL-ness, so peek thru - case Op_DecodeNKlass: - adr = adr->in(1); - continue; - - case Op_EncodeP: - case Op_EncodePKlass: - // EncodeP node's control edge could be set by this method - // when EncodeP node depends on CastPP node. - // - // Use its control edge for memory op because EncodeP may go away - // later when it is folded with following or preceding DecodeN node. - if (adr->in(0) == NULL) { - // Keep looking for cast nodes. - adr = adr->in(1); - continue; - } - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - case Op_CastPP: - // If the CastPP is useless, just peek on through it. - if( ccp->type(adr) == ccp->type(adr->in(1)) ) { - // Remember the cast that we've peeked though. If we peek - // through more than one, then we end up remembering the highest - // one, that is, if in a loop, the one closest to the top. - skipped_cast = adr; - adr = adr->in(1); - continue; - } - // CastPP is going away in this pass! We need this memory op to be - // control-dependent on the test that is guarding the CastPP. - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - case Op_Phi: - // Attempt to float above a Phi to some dominating point. - if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) { - // If we've already peeked through a Cast (which could have set the - // control), we can't float above a Phi, because the skipped Cast - // may not be loop invariant. - if (adr_phi_is_loop_invariant(adr, skipped_cast)) { - adr = adr->in(1); - continue; - } - } - - // Intentional fallthrough! - - // No obvious dominating point. The mem op is pinned below the Phi - // by the Phi itself. If the Phi goes away (no true value is merged) - // then the mem op can float, but not indefinitely. It must be pinned - // behind the controls leading to the Phi. - case Op_CheckCastPP: - // These usually stick around to change address type, however a - // useless one can be elided and we still need to pick up a control edge - if (adr->in(0) == NULL) { - // This CheckCastPP node has NO control and is likely useless. But we - // need check further up the ancestor chain for a control input to keep - // the node in place. 4959717. - skipped_cast = adr; - adr = adr->in(1); - continue; - } - ccp->hash_delete(n); - n->set_req(MemNode::Control, adr->in(0)); - ccp->hash_insert(n); - return n; - - // List of "safe" opcodes; those that implicitly block the memory - // op below any null check. - case Op_CastX2P: // no null checks on native pointers - case Op_Parm: // 'this' pointer is not null - case Op_LoadP: // Loading from within a klass - case Op_LoadN: // Loading from within a klass - case Op_LoadKlass: // Loading from within a klass - case Op_LoadNKlass: // Loading from within a klass - case Op_ConP: // Loading from a klass - case Op_ConN: // Loading from a klass - case Op_ConNKlass: // Loading from a klass - case Op_CreateEx: // Sucking up the guts of an exception oop - case Op_Con: // Reading from TLS - case Op_CMoveP: // CMoveP is pinned - case Op_CMoveN: // CMoveN is pinned - break; // No progress - - case Op_Proj: // Direct call to an allocation routine - case Op_SCMemProj: // Memory state from store conditional ops -#ifdef ASSERT - { - assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value"); - const Node* call = adr->in(0); - if (call->is_CallJava()) { - const CallJavaNode* call_java = call->as_CallJava(); - const TypeTuple *r = call_java->tf()->range(); - assert(r->cnt() > TypeFunc::Parms, "must return value"); - const Type* ret_type = r->field_at(TypeFunc::Parms); - assert(ret_type && ret_type->isa_ptr(), "must return pointer"); - // We further presume that this is one of - // new_instance_Java, new_array_Java, or - // the like, but do not assert for this. - } else if (call->is_Allocate()) { - // similar case to new_instance_Java, etc. - } else if (!call->is_CallLeaf()) { - // Projections from fetch_oop (OSR) are allowed as well. - ShouldNotReachHere(); - } - } -#endif - break; - default: - ShouldNotReachHere(); - } - break; - } - } - - return NULL; // No progress -} - - //============================================================================= // Should LoadNode::Ideal() attempt to remove control edges? bool LoadNode::can_remove_control() const { diff --git a/hotspot/src/share/vm/opto/memnode.hpp b/hotspot/src/share/vm/opto/memnode.hpp index d1a65d1ebdb..a15e1ca2759 100644 --- a/hotspot/src/share/vm/opto/memnode.hpp +++ b/hotspot/src/share/vm/opto/memnode.hpp @@ -84,10 +84,6 @@ public: // This one should probably be a phase-specific function: static bool all_controls_dominate(Node* dom, Node* sub); - // Find any cast-away of null-ness and keep its control. - static Node *Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ); - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); - virtual const class TypePtr *adr_type() const; // returns bottom_type of address // Shared code for Ideal methods: diff --git a/hotspot/src/share/vm/opto/narrowptrnode.cpp b/hotspot/src/share/vm/opto/narrowptrnode.cpp index 197d748f407..de88eb6393b 100644 --- a/hotspot/src/share/vm/opto/narrowptrnode.cpp +++ b/hotspot/src/share/vm/opto/narrowptrnode.cpp @@ -67,10 +67,6 @@ const Type *EncodePNode::Value( PhaseTransform *phase ) const { } -Node *EncodeNarrowPtrNode::Ideal_DU_postCCP( PhaseCCP *ccp ) { - return MemNode::Ideal_common_DU_postCCP(ccp, this, in(1)); -} - Node* DecodeNKlassNode::Identity(PhaseTransform* phase) { const Type *t = phase->type( in(1) ); if( t == Type::TOP ) return in(1); diff --git a/hotspot/src/share/vm/opto/narrowptrnode.hpp b/hotspot/src/share/vm/opto/narrowptrnode.hpp index 9b737f59837..feecbd0b906 100644 --- a/hotspot/src/share/vm/opto/narrowptrnode.hpp +++ b/hotspot/src/share/vm/opto/narrowptrnode.hpp @@ -39,7 +39,6 @@ class EncodeNarrowPtrNode : public TypeNode { } public: virtual uint ideal_reg() const { return Op_RegN; } - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); }; //------------------------------EncodeP-------------------------------- diff --git a/hotspot/src/share/vm/opto/node.cpp b/hotspot/src/share/vm/opto/node.cpp index 6ec1f96df40..35de94b414b 100644 --- a/hotspot/src/share/vm/opto/node.cpp +++ b/hotspot/src/share/vm/opto/node.cpp @@ -1387,12 +1387,6 @@ bool Node::remove_dead_region(PhaseGVN *phase, bool can_reshape) { return false; } -//------------------------------Ideal_DU_postCCP------------------------------- -// Idealize graph, using DU info. Must clone result into new-space -Node *Node::Ideal_DU_postCCP( PhaseCCP * ) { - return NULL; // Default to no change -} - //------------------------------hash------------------------------------------- // Hash function over Nodes. uint Node::hash() const { @@ -2081,6 +2075,14 @@ Node* Node::unique_ctrl_out() const { return found; } +void Node::ensure_control_or_add_prec(Node* c) { + if (in(0) == NULL) { + set_req(0, c); + } else if (in(0) != c) { + add_prec(c); + } +} + //============================================================================= //------------------------------yank------------------------------------------- // Find and remove diff --git a/hotspot/src/share/vm/opto/node.hpp b/hotspot/src/share/vm/opto/node.hpp index cefd5bd3846..2dfedbc085a 100644 --- a/hotspot/src/share/vm/opto/node.hpp +++ b/hotspot/src/share/vm/opto/node.hpp @@ -906,9 +906,6 @@ protected: bool remove_dead_region(PhaseGVN *phase, bool can_reshape); public: - // Idealize graph, using DU info. Done after constant propagation - virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp ); - // See if there is valid pipeline info static const Pipeline *pipeline_class(); virtual const Pipeline *pipeline() const; @@ -942,6 +939,9 @@ public: // Return the unique control out if only one. Null if none or more than one. Node* unique_ctrl_out() const; + // Set control or add control as precedence edge + void ensure_control_or_add_prec(Node* c); + //----------------- Code Generation // Ideal register class for Matching. Zero means unmatched instruction diff --git a/hotspot/src/share/vm/opto/phaseX.cpp b/hotspot/src/share/vm/opto/phaseX.cpp index 477ab2883e3..861a84f4d20 100644 --- a/hotspot/src/share/vm/opto/phaseX.cpp +++ b/hotspot/src/share/vm/opto/phaseX.cpp @@ -1605,21 +1605,6 @@ void PhaseCCP::do_transform() { C->set_root( transform(C->root())->as_Root() ); assert( C->top(), "missing TOP node" ); assert( C->root(), "missing root" ); - - // Eagerly remove castPP nodes here. CastPP nodes might not be - // removed in the subsequent IGVN phase if a node that changes - // in(1) of a castPP is processed prior to the castPP node. - for (uint i = 0; i < _worklist.size(); i++) { - Node* n = _worklist.at(i); - - if (n->is_ConstraintCast()) { - Node* nn = n->Identity(this); - if (nn != n) { - replace_node(n, nn); - --i; - } - } - } } //------------------------------transform-------------------------------------- @@ -1700,11 +1685,6 @@ Node *PhaseCCP::transform_once( Node *n ) { _worklist.push(n); // n re-enters the hash table via the worklist } - // Idealize graph using DU info. Must clone() into new-space. - // DU info is generally used to show profitability, progress or safety - // (but generally not needed for correctness). - Node *nn = n->Ideal_DU_postCCP(this); - // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks switch( n->Opcode() ) { case Op_FastLock: // Revisit FastLocks for lock coarsening @@ -1721,12 +1701,6 @@ Node *PhaseCCP::transform_once( Node *n ) { default: break; } - if( nn ) { - _worklist.push(n); - // Put users of 'n' onto worklist for second igvn transform - add_users_to_worklist(n); - return nn; - } return n; } diff --git a/hotspot/test/compiler/loopopts/TestPredicateLostDependency.java b/hotspot/test/compiler/loopopts/TestPredicateLostDependency.java new file mode 100644 index 00000000000..a9986c24155 --- /dev/null +++ b/hotspot/test/compiler/loopopts/TestPredicateLostDependency.java @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8069191 + * @summary predicate moved out of loops and CastPP removal causes dependency to be lost + * @run main/othervm -Xcomp -XX:CompileOnly=TestPredicateLostDependency.m1 -XX:+IgnoreUnrecognizedVMOptions -XX:+StressGCM TestPredicateLostDependency + * + */ + +public class TestPredicateLostDependency { + static class A { + int i; + } + + static class B extends A { + } + + static boolean crash = false; + + static boolean m2() { + return crash; + } + + static int m3(float[] arr) { + return 0; + } + + static float m1(A aa) { + float res = 0; + float[] arr = new float[10]; + for (int i = 0; i < 10; i++) { + if (m2()) { + arr = null; + } + m3(arr); + int j = arr.length; + int k = 0; + for (k = 9; k < j; k++) { + } + if (k == 10) { + if (aa instanceof B) { + } + } + res += arr[0]; + res += arr[1]; + } + return res; + } + + static public void main(String args[]) { + A a = new A(); + B b = new B(); + for (int i = 0; i < 20000; i++) { + m1(a); + } + crash = true; + try { + m1(a); + } catch (NullPointerException npe) {} + } +} From 5827e6ce0f3a6331cb1c3a4f24df789ce030d886 Mon Sep 17 00:00:00 2001 From: Goetz Lindenmaier Date: Wed, 15 Apr 2015 12:44:56 +0200 Subject: [PATCH 06/13] 8077838: Recent developments for ppc Power 8 recognition and instructions, math.*Exact intrinsics and rtm, C2 optimizations Reviewed-by: kvn, simonis --- hotspot/src/cpu/ppc/vm/assembler_ppc.cpp | 124 +++- hotspot/src/cpu/ppc/vm/assembler_ppc.hpp | 76 ++- .../src/cpu/ppc/vm/assembler_ppc.inline.hpp | 50 ++ hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp | 4 +- hotspot/src/cpu/ppc/vm/globals_ppc.hpp | 57 +- hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp | 7 +- hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp | 6 +- hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp | 6 +- hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp | 460 +++++++++++++- hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp | 51 +- .../cpu/ppc/vm/macroAssembler_ppc.inline.hpp | 34 +- hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp | 11 +- hotspot/src/cpu/ppc/vm/ppc.ad | 572 ++++++++++-------- .../cpu/ppc/vm/register_definitions_ppc.cpp | 15 +- hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp | 6 +- hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp | 57 +- hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp | 26 +- .../cpu/ppc/vm/templateInterpreter_ppc.cpp | 35 +- .../cpu/ppc/vm/templateInterpreter_ppc.hpp | 6 +- .../src/cpu/ppc/vm/templateTable_ppc_64.cpp | 19 +- hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp | 193 +++++- hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp | 23 +- hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp | 5 +- 23 files changed, 1390 insertions(+), 453 deletions(-) diff --git a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp index 6ab0f8a61bd..7b19ddb0018 100644 --- a/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,8 +85,7 @@ int Assembler::branch_destination(int inst, int pos) { } // Low-level andi-one-instruction-macro. -void Assembler::andi(Register a, Register s, const int ui16) { - assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); +void Assembler::andi(Register a, Register s, const long ui16) { if (is_power_of_2_long(((jlong) ui16)+1)) { // pow2minus1 clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); @@ -97,6 +96,7 @@ void Assembler::andi(Register a, Register s, const int ui16) { // negpow2 clrrdi(a, s, log2_long((jlong)-ui16)); } else { + assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); andi_(a, s, ui16); } } @@ -356,7 +356,6 @@ void Assembler::load_const(Register d, long x, Register tmp) { // 16 bit immediate offset. int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) { // Avoid accidentally trying to use R0 for indexed addressing. - assert(d != R0, "R0 not allowed"); assert_different_registers(d, tmp); short xa, xb, xc, xd; // Four 16-bit chunks of const. @@ -370,6 +369,58 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur return 0; } + int retval = 0; + if (return_simm16_rest) { + retval = xd; + x = rem << 16; + xd = 0; + } + + if (d == R0) { // Can't use addi. + if (is_simm(x, 32)) { // opt 2: simm32 + lis(d, x >> 16); + if (xd) ori(d, d, (unsigned short)xd); + } else { + // 64-bit value: x = xa xb xc xd + xa = (x >> 48) & 0xffff; + xb = (x >> 32) & 0xffff; + xc = (x >> 16) & 0xffff; + bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); + if (tmp == noreg || (xc == 0 && xd == 0)) { + if (xa_loaded) { + lis(d, xa); + if (xb) { ori(d, d, (unsigned short)xb); } + } else { + li(d, xb); + } + sldi(d, d, 32); + if (xc) { oris(d, d, (unsigned short)xc); } + if (xd) { ori( d, d, (unsigned short)xd); } + } else { + // Exploit instruction level parallelism if we have a tmp register. + bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0); + if (xa_loaded) { + lis(tmp, xa); + } + if (xc_loaded) { + lis(d, xc); + } + if (xa_loaded) { + if (xb) { ori(tmp, tmp, (unsigned short)xb); } + } else { + li(tmp, xb); + } + if (xc_loaded) { + if (xd) { ori(d, d, (unsigned short)xd); } + } else { + li(d, xd); + } + insrdi(d, tmp, 32, 0); + } + } + return retval; + } + xc = rem & 0xFFFF; // Next 16-bit chunk. rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. @@ -377,28 +428,27 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur lis(d, xc); } else { // High 32 bits needed. - if (tmp != noreg) { // opt 3: We have a temp reg. + if (tmp != noreg && (int)x != 0) { // opt 3: We have a temp reg. // No carry propagation between xc and higher chunks here (use logical instructions). xa = (x >> 48) & 0xffff; xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. - bool load_xa = (xa != 0) || (xb < 0); + bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0); bool return_xd = false; - if (load_xa) { lis(tmp, xa); } + if (xa_loaded) { lis(tmp, xa); } if (xc) { lis(d, xc); } - if (load_xa) { + if (xa_loaded) { if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0. } else { - li(tmp, xb); // non-negative + li(tmp, xb); } if (xc) { - if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi. - else if (xd) { addi(d, d, xd); } + if (xd) { addi(d, d, xd); } } else { li(d, xd); } insrdi(d, tmp, 32, 0); - return return_xd ? xd : 0; // non-negative + return retval; } xb = rem & 0xFFFF; // Next 16-bit chunk. @@ -417,11 +467,51 @@ int Assembler::load_const_optimized(Register d, long x, Register tmp, bool retur if (xc) { addis(d, d, xc); } } - // opt 5: Return offset to be inserted into following instruction. - if (return_simm16_rest) return xd; - if (xd) { addi(d, d, xd); } - return 0; + return retval; +} + +// We emit only one addition to s to optimize latency. +int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) { + assert(s != R0 && s != tmp, "unsupported"); + long rem = x; + + // Case 1: Can use mr or addi. + short xd = rem & 0xFFFF; // Lowest 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + if (xd == 0) { + if (d != s) { mr(d, s); } + return 0; + } + if (return_simm16_rest) { + return xd; + } + addi(d, s, xd); + return 0; + } + + // Case 2: Can use addis. + if (xd == 0) { + short xc = rem & 0xFFFF; // 2nd 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); + if (rem == 0) { + addis(d, s, xc); + return 0; + } + } + + // Other cases: load & add. + Register tmp1 = tmp, + tmp2 = noreg; + if ((d != tmp) && (d != s)) { + // Can use d. + tmp1 = d; + tmp2 = tmp; + } + int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest); + add(d, tmp1, s); + return simm16_rest; } #ifndef PRODUCT diff --git a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp index f1087dbc02c..6c7103aefa4 100644 --- a/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp @@ -224,10 +224,13 @@ class Assembler : public AbstractAssembler { ADDIS_OPCODE = (15u << OPCODE_SHIFT), ADDIC__OPCODE = (13u << OPCODE_SHIFT), ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1), + ADDME_OPCODE = (31u << OPCODE_SHIFT | 234u << 1), + ADDZE_OPCODE = (31u << OPCODE_SHIFT | 202u << 1), SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1), SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1), SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1), SUBFIC_OPCODE = (8u << OPCODE_SHIFT), + SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1), SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1), DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1), MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1), @@ -657,6 +660,9 @@ class Assembler : public AbstractAssembler { SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1), EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1), + // Wait instructions for polling. + WAIT_OPCODE = (31u << OPCODE_SHIFT | 62u << 1), + // Trap instructions TDI_OPCODE = (2u << OPCODE_SHIFT), TWI_OPCODE = (3u << OPCODE_SHIFT), @@ -666,8 +672,10 @@ class Assembler : public AbstractAssembler { // Atomics. LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), + LQARX_OPCODE = (31u << OPCODE_SHIFT | 276u << 1), STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), - STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1) + STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1), + STQCX_OPCODE = (31u << OPCODE_SHIFT | 182u << 1) }; @@ -1171,6 +1179,14 @@ class Assembler : public AbstractAssembler { inline void adde_( Register d, Register a, Register b); inline void subfe( Register d, Register a, Register b); inline void subfe_( Register d, Register a, Register b); + inline void addme( Register d, Register a); + inline void addme_( Register d, Register a); + inline void subfme( Register d, Register a); + inline void subfme_(Register d, Register a); + inline void addze( Register d, Register a); + inline void addze_( Register d, Register a); + inline void subfze( Register d, Register a); + inline void subfze_(Register d, Register a); inline void neg( Register d, Register a); inline void neg_( Register d, Register a); inline void mulli( Register d, Register a, int si16); @@ -1189,6 +1205,38 @@ class Assembler : public AbstractAssembler { inline void divw( Register d, Register a, Register b); inline void divw_( Register d, Register a, Register b); + // Fixed-Point Arithmetic Instructions with Overflow detection + inline void addo( Register d, Register a, Register b); + inline void addo_( Register d, Register a, Register b); + inline void subfo( Register d, Register a, Register b); + inline void subfo_( Register d, Register a, Register b); + inline void addco( Register d, Register a, Register b); + inline void addco_( Register d, Register a, Register b); + inline void subfco( Register d, Register a, Register b); + inline void subfco_( Register d, Register a, Register b); + inline void addeo( Register d, Register a, Register b); + inline void addeo_( Register d, Register a, Register b); + inline void subfeo( Register d, Register a, Register b); + inline void subfeo_( Register d, Register a, Register b); + inline void addmeo( Register d, Register a); + inline void addmeo_( Register d, Register a); + inline void subfmeo( Register d, Register a); + inline void subfmeo_(Register d, Register a); + inline void addzeo( Register d, Register a); + inline void addzeo_( Register d, Register a); + inline void subfzeo( Register d, Register a); + inline void subfzeo_(Register d, Register a); + inline void nego( Register d, Register a); + inline void nego_( Register d, Register a); + inline void mulldo( Register d, Register a, Register b); + inline void mulldo_( Register d, Register a, Register b); + inline void mullwo( Register d, Register a, Register b); + inline void mullwo_( Register d, Register a, Register b); + inline void divdo( Register d, Register a, Register b); + inline void divdo_( Register d, Register a, Register b); + inline void divwo( Register d, Register a, Register b); + inline void divwo_( Register d, Register a, Register b); + // extended mnemonics inline void li( Register d, int si16); inline void lis( Register d, int si16); @@ -1303,7 +1351,7 @@ class Assembler : public AbstractAssembler { inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg); // PPC 1, section 3.3.11, Fixed-Point Logical Instructions - void andi( Register a, Register s, int ui16); // optimized version + void andi( Register a, Register s, long ui16); // optimized version inline void andi_( Register a, Register s, int ui16); inline void andis_( Register a, Register s, int ui16); inline void ori( Register a, Register s, int ui16); @@ -1688,14 +1736,21 @@ class Assembler : public AbstractAssembler { inline void isync(); inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8) + // Wait instructions for polling. Attention: May result in SIGILL. + inline void wait(); + inline void waitrsv(); // >=Power7 + // atomics inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0); inline bool lxarx_hint_exclusive_access(); inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void lqarx( Register d, Register a, Register b, bool hint_exclusive_access = false); inline void stwcx_( Register s, Register a, Register b); inline void stdcx_( Register s, Register a, Register b); + inline void stqcx_( Register s, Register a, Register b); // Instructions for adjusting thread priority for simultaneous // multithreading (SMT) on Power5. @@ -2054,10 +2109,13 @@ class Assembler : public AbstractAssembler { // Atomics: use ra0mem to disallow R0 as base. inline void lwarx_unchecked(Register d, Register b, int eh1); inline void ldarx_unchecked(Register d, Register b, int eh1); + inline void lqarx_unchecked(Register d, Register b, int eh1); inline void lwarx( Register d, Register b, bool hint_exclusive_access); inline void ldarx( Register d, Register b, bool hint_exclusive_access); + inline void lqarx( Register d, Register b, bool hint_exclusive_access); inline void stwcx_(Register s, Register b); inline void stdcx_(Register s, Register b); + inline void stqcx_(Register s, Register b); inline void lfs( FloatRegister d, int si16); inline void lfsx( FloatRegister d, Register b); inline void lfd( FloatRegister d, int si16); @@ -2120,6 +2178,20 @@ class Assembler : public AbstractAssembler { return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest); } + // If return_simm16_rest, the return value needs to get added afterwards. + int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false); + inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { + return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest); + } + + // If return_simm16_rest, the return value needs to get added afterwards. + inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) { + return add_const_optimized(d, s, -x, tmp, return_simm16_rest); + } + inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) { + return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest); + } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY diff --git a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp index 5493f124371..e860dac7d43 100644 --- a/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp +++ b/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp @@ -100,6 +100,14 @@ inline void Assembler::adde( Register d, Register a, Register b) { emit_int32( inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::addme( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::addme_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::subfme( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::subfme_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::addze( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::addze_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::subfze( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::subfze_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } @@ -118,6 +126,38 @@ inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32( inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +// Fixed-Point Arithmetic Instructions with Overflow detection +inline void Assembler::addo( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addo_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfo( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfo_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addco( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addco_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfco( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addeo( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::addeo_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::subfeo( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::addmeo( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::addmeo_( Register d, Register a) { emit_int32(ADDME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::subfmeo( Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::subfmeo_(Register d, Register a) { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::addzeo( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::addzeo_( Register d, Register a) { emit_int32(ADDZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::subfzeo( Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::subfzeo_(Register d, Register a) { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::nego( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(0)); } +inline void Assembler::nego_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(1) | rc(1)); } +inline void Assembler::mulldo( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::mullwo( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::divdo( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::divdo_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } +inline void Assembler::divwo( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); } +inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); } + // extended mnemonics inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); } inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); } @@ -540,15 +580,22 @@ inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); } inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); } inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); } +// Wait instructions for polling. +inline void Assembler::wait() { emit_int32( WAIT_OPCODE); } +inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7 + // atomics // Use ra0mem to disallow R0 as base. inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } +inline void Assembler::stqcx_(Register s, Register a, Register b) { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } // Instructions for adjusting thread priority // for simultaneous multithreading (SMT) on POWER5. @@ -873,10 +920,13 @@ inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCOD // ra0 version inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1) { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } +inline void Assembler::stqcx_(Register s, Register b) { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); } // ra0 version inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); } diff --git a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp index 3b4b9e3660b..d797ef6eb8c 100644 --- a/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/c2_globals_ppc.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,7 +47,7 @@ define_pd_global(intx, ConditionalMoveLimit, 3); define_pd_global(intx, FLOATPRESSURE, 28); define_pd_global(intx, FreqInlineSize, 175); define_pd_global(intx, MinJumpTableSize, 10); -define_pd_global(intx, INTPRESSURE, 25); +define_pd_global(intx, INTPRESSURE, 26); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 16000); diff --git a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp index f2391d251eb..58c3a428327 100644 --- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -58,7 +58,7 @@ define_pd_global(bool, UseMembar, false); // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. -define_pd_global(uintx, TypeProfileLevel, 0); +define_pd_global(uintx, TypeProfileLevel, 111); // Platform dependent flag handling: flags only defined on this platform. #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ @@ -71,14 +71,26 @@ define_pd_global(uintx, TypeProfileLevel, 0); \ product(uintx, PowerArchitecturePPC64, 0, \ "CPU Version: x for PowerX. Currently recognizes Power5 to " \ - "Power7. Default is 0. CPUs newer than Power7 will be " \ - "recognized as Power7.") \ + "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \ \ /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \ /* indirect call by a direct call. */ \ product(bool, ReoptimizeCallSequences, true, \ "Reoptimize code-sequences of calls at runtime.") \ \ + /* Power 8: Configure Data Stream Control Register. */ \ + product(uint64_t,DSCR_PPC64, (uintx)-1, \ + "Power8 or later: Specify encoded value for Data Stream Control " \ + "Register") \ + product(uint64_t,DSCR_DPFD_PPC64, 8, \ + "Power8 or later: DPFD (default prefetch depth) value of the " \ + "Data Stream Control Register." \ + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ + product(uint64_t,DSCR_URG_PPC64, 8, \ + "Power8 or later: URG (depth attainment urgency) value of the " \ + "Data Stream Control Register." \ + " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch") \ + \ product(bool, UseLoadInstructionsForStackBangingPPC64, false, \ "Use load instructions for stack banging.") \ \ @@ -121,6 +133,41 @@ define_pd_global(uintx, TypeProfileLevel, 0); \ product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \ " Use this to ease debugging.") \ - + \ + /* Use Restricted Transactional Memory for lock eliding */ \ + product(bool, UseRTMLocking, false, \ + "Enable RTM lock eliding for inflated locks in compiled code") \ + \ + experimental(bool, UseRTMForStackLocks, false, \ + "Enable RTM lock eliding for stack locks in compiled code") \ + \ + product(bool, UseRTMDeopt, false, \ + "Perform deopt and recompilation based on RTM abort ratio") \ + \ + product(uintx, RTMRetryCount, 5, \ + "Number of RTM retries on lock abort or busy") \ + \ + experimental(intx, RTMSpinLoopCount, 100, \ + "Spin count for lock to become free before RTM retry") \ + \ + experimental(intx, RTMAbortThreshold, 1000, \ + "Calculate abort ratio after this number of aborts") \ + \ + experimental(intx, RTMLockingThreshold, 10000, \ + "Lock count at which to do RTM lock eliding without " \ + "abort ratio calculation") \ + \ + experimental(intx, RTMAbortRatio, 50, \ + "Lock abort ratio at which to stop use RTM lock eliding") \ + \ + experimental(intx, RTMTotalCountIncrRate, 64, \ + "Increment total RTM attempted lock count once every n times") \ + \ + experimental(intx, RTMLockingCalculationDelay, 0, \ + "Number of milliseconds to wait before start calculating aborts " \ + "for RTM locking") \ + \ + experimental(bool, UseRTMXendForLockBusy, true, \ + "Use RTM Xend instead of Xabort when lock busy") \ #endif // CPU_PPC_VM_GLOBALS_PPC_HPP diff --git a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp index b79b1582bb7..00330ef3461 100644 --- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp +++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.cpp @@ -446,7 +446,7 @@ void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset, } // Load object from cpool->resolved_references(index). -void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) { +void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) { assert_different_registers(result, index); get_constant_pool(result); @@ -469,7 +469,7 @@ void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result #endif // Add in the index. add(result, tmp, result); - load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result); + load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null); } // Generate a subtype check: branch to ok_is_subtype if sub_klass is @@ -876,7 +876,6 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // If condition is true we are done and hence we can store 0 in the displaced // header indicating it is a recursive lock. bne(CCR0, slow_case); - release(); std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() + BasicLock::displaced_header_offset_in_bytes(), monitor); b(done); @@ -1861,7 +1860,7 @@ void InterpreterMacroAssembler::profile_parameters_type(Register tmp1, Register const Register mdp = tmp1; add(mdp, tmp1, R28_mdx); - // Pffset of the current profile entry to update. + // Offset of the current profile entry to update. const Register entry_offset = tmp2; // entry_offset = array len in number of cells ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp); diff --git a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp index 555cc5ee9d8..2fa584503a9 100644 --- a/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp +++ b/hotspot/src/cpu/ppc/vm/interp_masm_ppc_64.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -85,7 +85,7 @@ class InterpreterMacroAssembler: public MacroAssembler { Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype); // Load object from cpool->resolved_references(index). - void load_resolved_reference_at_index(Register result, Register index); + void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL); void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1); void load_receiver(Register Rparam_count, Register Rrecv_dst); diff --git a/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp b/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp index 48864ae7213..e42e66c6914 100644 --- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -47,4 +47,4 @@ } #endif -#endif // CPU_PPC_VM_INTERPRETER_PPC_PP +#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP diff --git a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp index 89973056451..4d0e6df26b5 100644 --- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1455,7 +1455,7 @@ void MacroAssembler::cmpxchgw(ConditionRegister flag, Register dest_current_valu // Several special cases exist to avoid that unnecessary information is generated. // void MacroAssembler::cmpxchgd(ConditionRegister flag, - Register dest_current_value, Register compare_value, Register exchange_value, + Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, Register addr_base, int semantics, bool cmpxchgx_hint, Register int_flag_success, Label* failed_ext, bool contention_hint) { Label retry; @@ -1465,7 +1465,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag, // Save one branch if result is returned via register and result register is different from the other ones. bool use_result_reg = (int_flag_success!=noreg); - bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value && + bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() && int_flag_success!=exchange_value && int_flag_success!=addr_base); assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both"); @@ -1481,7 +1481,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag, // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM). if (contention_hint) { // Don't try to reserve if cmp fails. ld(dest_current_value, 0, addr_base); - cmpd(flag, dest_current_value, compare_value); + cmpd(flag, compare_value, dest_current_value); bne(flag, failed); } @@ -1489,7 +1489,7 @@ void MacroAssembler::cmpxchgd(ConditionRegister flag, bind(retry); ldarx(dest_current_value, addr_base, cmpxchgx_hint); - cmpd(flag, dest_current_value, compare_value); + cmpd(flag, compare_value, dest_current_value); if (UseStaticBranchPredictionInCompareAndSwapPPC64) { bne_predict_not_taken(flag, failed); } else { @@ -1873,7 +1873,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1909,7 +1908,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1946,7 +1944,6 @@ void MacroAssembler::biased_locking_enter(ConditionRegister cr_reg, Register obj assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0"); // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg). - fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ? cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg, /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg, /*where=*/obj_reg, @@ -1987,9 +1984,371 @@ void MacroAssembler::biased_locking_exit (ConditionRegister cr_reg, Register mar beq(cr_reg, done); } +// TM on PPC64. +void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) { + Label retry; + bind(retry); + ldarx(result, addr, /*hint*/ false); + addi(result, result, simm16); + stdcx_(result, addr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, retry); // stXcx_ sets CCR0 + } +} + +void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) { + Label retry; + bind(retry); + lwarx(result, addr, /*hint*/ false); + ori(result, result, uimm16); + stwcx_(result, addr); + if (UseStaticBranchPredictionInCompareAndSwapPPC64) { + bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0 + } else { + bne( CCR0, retry); // stXcx_ sets CCR0 + } +} + +#if INCLUDE_RTM_OPT + +// Update rtm_counters based on abort status +// input: abort_status +// rtm_counters (RTMLockingCounters*) +void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) { + // Mapping to keep PreciseRTMLockingStatistics similar to x86. + // x86 ppc (! means inverted, ? means not the same) + // 0 31 Set if abort caused by XABORT instruction. + // 1 ! 7 If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set. + // 2 13 Set if another logical processor conflicted with a memory address that was part of the transaction that aborted. + // 3 10 Set if an internal buffer overflowed. + // 4 ?12 Set if a debug breakpoint was hit. + // 5 ?32 Set if an abort occurred during execution of a nested transaction. + const int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too. + Assembler::tm_failure_persistent, // inverted: transient + Assembler::tm_trans_cf, + Assembler::tm_footprint_of, + Assembler::tm_non_trans_cf, + Assembler::tm_suspended}; + const bool tm_failure_inv[] = {false, true, false, false, false, false}; + assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!"); + + const Register addr_Reg = R0; + // Keep track of offset to where rtm_counters_Reg had pointed to. + int counters_offs = RTMLockingCounters::abort_count_offset(); + addi(addr_Reg, rtm_counters_Reg, counters_offs); + const Register temp_Reg = rtm_counters_Reg; + + //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically + ldx(temp_Reg, addr_Reg); + addi(temp_Reg, temp_Reg, 1); + stdx(temp_Reg, addr_Reg); + + if (PrintPreciseRTMLockingStatistics) { + int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs; + + //mftexasr(abort_status); done by caller + for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) { + counters_offs += counters_offs_delta; + li(temp_Reg, counters_offs_delta); // can't use addi with R0 + add(addr_Reg, addr_Reg, temp_Reg); // point to next counter + counters_offs_delta = sizeof(uintx); + + Label check_abort; + rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0); + if (tm_failure_inv[i]) { + bne(CCR0, check_abort); + } else { + beq(CCR0, check_abort); + } + //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically + ldx(temp_Reg, addr_Reg); + addi(temp_Reg, temp_Reg, 1); + stdx(temp_Reg, addr_Reg); + bind(check_abort); + } + } + li(temp_Reg, -counters_offs); // can't use addi with R0 + add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore +} + +// Branch if (random & (count-1) != 0), count is 2^n +// tmp and CR0 are killed +void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) { + mftb(tmp); + andi_(tmp, tmp, count-1); + bne(CCR0, brLabel); +} + +// Perform abort ratio calculation, set no_rtm bit if high ratio. +// input: rtm_counters_Reg (RTMLockingCounters* address) - KILLED +void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data) { + Label L_done, L_check_always_rtm1, L_check_always_rtm2; + + if (RTMLockingCalculationDelay > 0) { + // Delay calculation. + ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr()); + cmpdi(CCR0, rtm_counters_Reg, 0); + beq(CCR0, L_done); + load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload + } + // Abort ratio calculation only if abort_count > RTMAbortThreshold. + // Aborted transactions = abort_count * 100 + // All transactions = total_count * RTMTotalCountIncrRate + // Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio) + ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg); + cmpdi(CCR0, R0, RTMAbortThreshold); + blt(CCR0, L_check_always_rtm2); + mulli(R0, R0, 100); + + const Register tmpReg = rtm_counters_Reg; + ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); + mulli(tmpReg, tmpReg, RTMTotalCountIncrRate); + mulli(tmpReg, tmpReg, RTMAbortRatio); + cmpd(CCR0, R0, tmpReg); + blt(CCR0, L_check_always_rtm1); // jump to reload + if (method_data != NULL) { + // Set rtm_state to "no rtm" in MDO. + // Not using a metadata relocation. Method and Class Loader are kept alive anyway. + // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.) + load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); + atomic_ori_int(R0, tmpReg, NoRTM); + } + b(L_done); + + bind(L_check_always_rtm1); + load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload + bind(L_check_always_rtm2); + ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg); + cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate); + blt(CCR0, L_done); + if (method_data != NULL) { + // Set rtm_state to "always rtm" in MDO. + // Not using a metadata relocation. See above. + load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg); + atomic_ori_int(R0, tmpReg, UseRTM); + } + bind(L_done); +} + +// Update counters and perform abort ratio calculation. +// input: abort_status_Reg +void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, + bool profile_rtm) { + + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + // Update rtm counters based on state at abort. + // Reads abort_status_Reg, updates flags. + assert_different_registers(abort_status_Reg, temp_Reg); + load_const_optimized(temp_Reg, (address)rtm_counters, R0); + rtm_counters_update(abort_status_Reg, temp_Reg); + if (profile_rtm) { + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data); + } +} + +// Retry on abort if abort's status indicates non-persistent failure. +// inputs: retry_count_Reg +// : abort_status_Reg +// output: retry_count_Reg decremented by 1 +void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg, + Label& retryLabel, Label* checkRetry) { + Label doneRetry; + rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0); + bne(CCR0, doneRetry); + if (checkRetry) { bind(*checkRetry); } + addic_(retry_count_Reg, retry_count_Reg, -1); + blt(CCR0, doneRetry); + smt_yield(); // Can't use wait(). No permission (SIGILL). + b(retryLabel); + bind(doneRetry); +} + +// Spin and retry if lock is busy. +// inputs: box_Reg (monitor address) +// : retry_count_Reg +// output: retry_count_Reg decremented by 1 +// CTR is killed +void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) { + Label SpinLoop, doneRetry; + addic_(retry_count_Reg, retry_count_Reg, -1); + blt(CCR0, doneRetry); + li(R0, RTMSpinLoopCount); + mtctr(R0); + + bind(SpinLoop); + smt_yield(); // Can't use waitrsv(). No permission (SIGILL). + bdz(retryLabel); + ld(R0, 0, owner_addr_Reg); + cmpdi(CCR0, R0, 0); + bne(CCR0, SpinLoop); + b(retryLabel); + + bind(doneRetry); +} + +// Use RTM for normal stack locks. +// Input: objReg (object to lock) +void MacroAssembler::rtm_stack_locking(ConditionRegister flag, + Register obj, Register mark_word, Register tmp, + Register retry_on_abort_count_Reg, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated) { + assert(UseRTMForStackLocks, "why call this otherwise?"); + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + + if (RTMRetryCount > 0) { + load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort + bind(L_rtm_retry); + } + andi_(R0, mark_word, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased + bne(CCR0, IsInflated); + + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM"); + load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0); + //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically + ldx(mark_word, tmp); + addi(mark_word, mark_word, 1); + stdx(mark_word, tmp); + bind(L_noincrement); + } + tbegin_(); + beq(CCR0, L_on_abort); + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // Reload in transaction, conflicts need to be tracked. + andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked + beq(flag, DONE_LABEL); // all done if unlocked + + if (UseRTMXendForLockBusy) { + tend_(); + b(L_decrement_retry); + } else { + tabort_(); + } + bind(L_on_abort); + const Register abort_status_Reg = tmp; + mftexasr(abort_status_Reg); + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm); + } + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload + if (RTMRetryCount > 0) { + // Retry on lock abort if abort status is not permanent. + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry); + } else { + bind(L_decrement_retry); + } +} + +// Use RTM for inflating locks +// inputs: obj (object to lock) +// mark_word (current header - KILLED) +// boxReg (on-stack box address (displaced header location) - KILLED) +void MacroAssembler::rtm_inflated_locking(ConditionRegister flag, + Register obj, Register mark_word, Register boxReg, + Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL) { + assert(UseRTMLocking, "why call this otherwise?"); + Label L_rtm_retry, L_decrement_retry, L_on_abort; + // Clean monitor_value bit to get valid pointer. + int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value; + + // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark(). + std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg); + const Register tmpReg = boxReg; + const Register owner_addr_Reg = mark_word; + addi(owner_addr_Reg, mark_word, owner_offset); + + if (RTMRetryCount > 0) { + load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount); // Retry on lock busy. + load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort. + bind(L_rtm_retry); + } + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + Label L_noincrement; + if (RTMTotalCountIncrRate > 1) { + branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement); + } + assert(rtm_counters != NULL, "should not be NULL when profiling RTM"); + load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg); + //atomic_inc_ptr(R0, tmpReg); We don't increment atomically + ldx(tmpReg, R0); + addi(tmpReg, tmpReg, 1); + stdx(tmpReg, R0); + bind(L_noincrement); + } + tbegin_(); + beq(CCR0, L_on_abort); + // We don't reload mark word. Will only be reset at safepoint. + ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked. + cmpdi(flag, R0, 0); + beq(flag, DONE_LABEL); + + if (UseRTMXendForLockBusy) { + tend_(); + b(L_decrement_retry); + } else { + tabort_(); + } + bind(L_on_abort); + const Register abort_status_Reg = tmpReg; + mftexasr(abort_status_Reg); + if (PrintPreciseRTMLockingStatistics || profile_rtm) { + rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm); + // Restore owner_addr_Reg + ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); +#ifdef ASSERT + andi_(R0, mark_word, markOopDesc::monitor_value); + asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint. +#endif + addi(owner_addr_Reg, mark_word, owner_offset); + } + if (RTMRetryCount > 0) { + // Retry on lock abort if abort status is not permanent. + rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry); + } + + // Appears unlocked - try to swing _owner from null to non-null. + cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg, + MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, + MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true); + + if (RTMRetryCount > 0) { + // success done else retry + b(DONE_LABEL); + bind(L_decrement_retry); + // Spin and retry if lock is busy. + rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry); + } else { + bind(L_decrement_retry); + } +} + +#endif // INCLUDE_RTM_OPT + // "The box" is the space on the stack where we copy the object mark. void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, - Register temp, Register displaced_header, Register current_header) { + Register temp, Register displaced_header, Register current_header, + bool try_bias, + RTMLockingCounters* rtm_counters, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, + bool use_rtm, bool profile_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); assert(flag != CCR0, "bad condition register"); Label cont; @@ -2006,10 +2365,18 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register return; } - if (UseBiasedLocking) { + if (try_bias) { biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont); } +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header, + stack_rtm_counters, method_data, profile_rtm, + cont, object_has_monitor); + } +#endif // INCLUDE_RTM_OPT + // Handle existing monitor. if ((EmitSync & 0x02) == 0) { // The object has an existing monitor iff (mark & monitor_value) != 0. @@ -2066,14 +2433,22 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register bind(object_has_monitor); // The object's monitor m is unlocked iff m->owner == NULL, // otherwise m->owner may contain a thread or a stack address. - // + +#if INCLUDE_RTM_OPT + // Use the same RTM locking code in 32- and 64-bit VM. + if (use_rtm) { + rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header, + rtm_counters, method_data, profile_rtm, cont); + } else { +#endif // INCLUDE_RTM_OPT + // Try to CAS m->owner from NULL to current thread. addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value); li(displaced_header, 0); // CmpxchgX sets flag to cmpX(current, displaced). cmpxchgd(/*flag=*/flag, /*current_value=*/current_header, - /*compare_value=*/displaced_header, + /*compare_value=*/(intptr_t)0, /*exchange_value=*/R16_thread, /*where=*/temp, MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq, @@ -2095,6 +2470,10 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp, // "monitor->OwnerIsThread shouldn't be 0", -1); # endif + +#if INCLUDE_RTM_OPT + } // use_rtm() +#endif } bind(cont); @@ -2103,7 +2482,8 @@ void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register } void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, - Register temp, Register displaced_header, Register current_header) { + Register temp, Register displaced_header, Register current_header, + bool try_bias, bool use_rtm) { assert_different_registers(oop, box, temp, displaced_header, current_header); assert(flag != CCR0, "bad condition register"); Label cont; @@ -2115,10 +2495,24 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe return; } - if (UseBiasedLocking) { + if (try_bias) { biased_locking_exit(flag, oop, current_header, cont); } +#if INCLUDE_RTM_OPT + if (UseRTMForStackLocks && use_rtm) { + assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking"); + Label L_regular_unlock; + ld(current_header, oopDesc::mark_offset_in_bytes(), oop); // fetch markword + andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits + cmpwi(flag, R0, markOopDesc::unlocked_value); // bits = 001 unlocked + bne(flag, L_regular_unlock); // else RegularLock + tend_(); // otherwise end... + b(cont); // ... and we're done + bind(L_regular_unlock); + } +#endif + // Find the lock address and load the displaced header from the stack. ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box); @@ -2129,13 +2523,12 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe // Handle existing monitor. if ((EmitSync & 0x02) == 0) { // The object has an existing monitor iff (mark & monitor_value) != 0. + RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done ld(current_header, oopDesc::mark_offset_in_bytes(), oop); - andi(temp, current_header, markOopDesc::monitor_value); - cmpdi(flag, temp, 0); - bne(flag, object_has_monitor); + andi_(R0, current_header, markOopDesc::monitor_value); + bne(CCR0, object_has_monitor); } - // Check if it is still a light weight lock, this is is true if we see // the stack address of the basicLock in the markOop of the object. // Cmpxchg sets flag to cmpd(current_header, box). @@ -2158,6 +2551,20 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe bind(object_has_monitor); addi(current_header, current_header, -markOopDesc::monitor_value); // monitor ld(temp, ObjectMonitor::owner_offset_in_bytes(), current_header); + + // It's inflated. +#if INCLUDE_RTM_OPT + if (use_rtm) { + Label L_regular_inflated_unlock; + // Clean monitor_value bit to get valid pointer + cmpdi(flag, temp, 0); + bne(flag, L_regular_inflated_unlock); + tend_(); + b(cont); + bind(L_regular_inflated_unlock); + } +#endif + ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header); xorr(temp, R16_thread, temp); // Will be 0 if we are the owner. orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions. @@ -2441,6 +2848,8 @@ void MacroAssembler::get_vm_result(Register oop_result) { // oop_result // R16_thread->in_bytes(JavaThread::vm_result_offset()) + verify_thread(); + ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread); li(R0, 0); std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread); @@ -2462,26 +2871,24 @@ void MacroAssembler::get_vm_result_2(Register metadata_result) { std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread); } - -void MacroAssembler::encode_klass_not_null(Register dst, Register src) { +Register MacroAssembler::encode_klass_not_null(Register dst, Register src) { Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided. if (Universe::narrow_klass_base() != 0) { // Use dst as temp if it is free. - load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg); - sub(dst, current, R0); + sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0); current = dst; } if (Universe::narrow_klass_shift() != 0) { srdi(dst, current, Universe::narrow_klass_shift()); current = dst; } - mr_if_needed(dst, current); // Move may be required. + return current; } void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) { if (UseCompressedClassPointers) { - encode_klass_not_null(ck, klass); - stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop); + Register compressedKlass = encode_klass_not_null(ck, klass); + stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop); } else { std(klass, oopDesc::klass_offset_in_bytes(), dst_oop); } @@ -2514,8 +2921,7 @@ void MacroAssembler::decode_klass_not_null(Register dst, Register src) { sldi(shifted_src, src, Universe::narrow_klass_shift()); } if (Universe::narrow_klass_base() != 0) { - load_const(R0, Universe::narrow_klass_base()); - add(dst, shifted_src, R0); + add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0); } } diff --git a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp index 46216782c97..2ed004aba44 100644 --- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,6 +27,7 @@ #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP #include "asm/assembler.hpp" +#include "runtime/rtmLocking.hpp" #include "utilities/macros.hpp" // MacroAssembler extends Assembler by a few frequently used macros. @@ -432,8 +433,8 @@ class MacroAssembler: public Assembler { int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, bool contention_hint = false); void cmpxchgd(ConditionRegister flag, - Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base, - int semantics, bool cmpxchgx_hint = false, + Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value, + Register addr_base, int semantics, bool cmpxchgx_hint = false, Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false); // interface method calling @@ -506,8 +507,42 @@ class MacroAssembler: public Assembler { // biased locking exit case failed. void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done); - void compiler_fast_lock_object( ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); - void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3); + void atomic_inc_ptr(Register addr, Register result, int simm16 = 1); + void atomic_ori_int(Register addr, Register result, int uimm16); + +#if INCLUDE_RTM_OPT + void rtm_counters_update(Register abort_status, Register rtm_counters); + void branch_on_random_using_tb(Register tmp, int count, Label& brLabel); + void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters, + Metadata* method_data); + void rtm_profiling(Register abort_status_Reg, Register temp_Reg, + RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm); + void rtm_retry_lock_on_abort(Register retry_count, Register abort_status, + Label& retryLabel, Label* checkRetry = NULL); + void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel); + void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp, + Register retry_on_abort_count, + RTMLockingCounters* stack_rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL, Label& IsInflated); + void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box, + Register retry_on_busy_count, Register retry_on_abort_count, + RTMLockingCounters* rtm_counters, + Metadata* method_data, bool profile_rtm, + Label& DONE_LABEL); +#endif + + void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box, + Register tmp1, Register tmp2, Register tmp3, + bool try_bias = UseBiasedLocking, + RTMLockingCounters* rtm_counters = NULL, + RTMLockingCounters* stack_rtm_counters = NULL, + Metadata* method_data = NULL, + bool use_rtm = false, bool profile_rtm = false); + + void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, + Register tmp1, Register tmp2, Register tmp3, + bool try_bias = UseBiasedLocking, bool use_rtm = false); // Support for serializing memory accesses between threads void serialize_memory(Register thread, Register tmp1, Register tmp2); @@ -576,7 +611,7 @@ class MacroAssembler: public Assembler { Register tmp = noreg); // Null allowed. - inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg); + inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL); // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong. // src == d allowed. @@ -593,7 +628,7 @@ class MacroAssembler: public Assembler { void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified. static int instr_size_for_decode_klass_not_null(); void decode_klass_not_null(Register dst, Register src = noreg); - void encode_klass_not_null(Register dst, Register src = noreg); + Register encode_klass_not_null(Register dst, Register src = noreg); // Load common heap base into register. void reinit_heapbase(Register d, Register tmp = noreg); diff --git a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp index f5d19dff066..a52931d860d 100644 --- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp +++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -333,19 +333,29 @@ inline void MacroAssembler::store_heap_oop_not_null(Register d, RegisterOrConsta } } -inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) { +inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) { if (UseCompressedOops) { lwz(d, offs, s1); - decode_heap_oop(d); + if (is_null != NULL) { + cmpwi(CCR0, d, 0); + beq(CCR0, *is_null); + decode_heap_oop_not_null(d); + } else { + decode_heap_oop(d); + } } else { ld(d, offs, s1); + if (is_null != NULL) { + cmpdi(CCR0, d, 0); + beq(CCR0, *is_null); + } } } inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) { Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided. if (Universe::narrow_oop_base_overlaps()) { - sub(d, current, R30); + sub_const_optimized(d, current, Universe::narrow_oop_base(), R0); current = d; } if (Universe::narrow_oop_shift() != 0) { @@ -358,7 +368,7 @@ inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register sr inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) { if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d && Universe::narrow_oop_shift() != 0) { - mr(d, R30); + load_const_optimized(d, Universe::narrow_oop_base(), R0); rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift()); return d; } @@ -369,7 +379,7 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr current = d; } if (Universe::narrow_oop_base() != NULL) { - add(d, current, R30); + add_const_optimized(d, current, Universe::narrow_oop_base(), R0); current = d; } return current; // Decoded oop is in this register. @@ -377,11 +387,19 @@ inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register sr inline void MacroAssembler::decode_heap_oop(Register d) { Label isNull; + bool use_isel = false; if (Universe::narrow_oop_base() != NULL) { cmpwi(CCR0, d, 0); - beq(CCR0, isNull); + if (VM_Version::has_isel()) { + use_isel = true; + } else { + beq(CCR0, isNull); + } } decode_heap_oop_not_null(d); + if (use_isel) { + isel_0(d, CCR0, Assembler::equal); + } bind(isNull); } diff --git a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp index 640813e7d3e..63fd6070272 100644 --- a/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/methodHandles_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,9 +27,6 @@ // These definitions are inlined into class MethodHandles. // Adapters -//static unsigned int adapter_code_size() { -// return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0); -//} enum /* platform_dependent_constants */ { adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000)) }; @@ -45,7 +42,9 @@ public: static void verify_method_handle(MacroAssembler* _masm, Register mh_reg, Register temp_reg, Register temp2_reg) { - Unimplemented(); + verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), + temp_reg, temp2_reg, + "reference is a MH"); } static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; diff --git a/hotspot/src/cpu/ppc/vm/ppc.ad b/hotspot/src/cpu/ppc/vm/ppc.ad index 4c64d96a3e4..388b102ee11 100644 --- a/hotspot/src/cpu/ppc/vm/ppc.ad +++ b/hotspot/src/cpu/ppc/vm/ppc.ad @@ -447,8 +447,8 @@ reg_class bits32_reg_rw( R26, R27, R28, -/*R29*/ // global TOC -/*R30*/ // Narrow Oop Base +/*R29,*/ // global TOC + R30, R31 ); @@ -484,58 +484,11 @@ reg_class bits32_reg_ro( R26, R27, R28, -/*R29*/ -/*R30*/ // Narrow Oop Base +/*R29,*/ + R30, R31 ); -// Complement-required-in-pipeline operands for narrow oops. -reg_class bits32_reg_ro_not_complement ( -/*R0*/ // R0 - R1, // SP - R2, // TOC - R3, - R4, - R5, - R6, - R7, - R8, - R9, - R10, - R11, - R12, -/*R13,*/ // system thread id - R14, - R15, - R16, // R16_thread - R17, - R18, - R19, - R20, - R21, - R22, -/*R23, - R24, - R25, - R26, - R27, - R28,*/ -/*R29,*/ // TODO: let allocator handle TOC!! -/*R30,*/ - R31 -); - -// Complement-required-in-pipeline operands for narrow oops. -// See 64-bit declaration. -reg_class bits32_reg_ro_complement ( - R23, - R24, - R25, - R26, - R27, - R28 -); - reg_class rscratch1_bits32_reg(R11); reg_class rscratch2_bits32_reg(R12); reg_class rarg1_bits32_reg(R3); @@ -591,8 +544,8 @@ reg_class bits64_reg_rw( R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -629,8 +582,8 @@ reg_class bits64_reg_leaf_call( R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -667,8 +620,8 @@ reg_class bits64_constant_table_base( R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ -/*R30_H, R30*/ +/*R29_H, R29,*/ + R30_H, R30, R31_H, R31 ); @@ -704,64 +657,11 @@ reg_class bits64_reg_ro( R26_H, R26, R27_H, R27, R28_H, R28, -/*R29_H, R29*/ // TODO: let allocator handle TOC!! -/*R30_H, R30,*/ +/*R29_H, R29,*/ // TODO: let allocator handle TOC!! + R30_H, R30, R31_H, R31 ); -// Complement-required-in-pipeline operands. -reg_class bits64_reg_ro_not_complement ( -/*R0_H, R0*/ // R0 - R1_H, R1, // SP - R2_H, R2, // TOC - R3_H, R3, - R4_H, R4, - R5_H, R5, - R6_H, R6, - R7_H, R7, - R8_H, R8, - R9_H, R9, - R10_H, R10, - R11_H, R11, - R12_H, R12, -/*R13_H, R13*/ // system thread id - R14_H, R14, - R15_H, R15, - R16_H, R16, // R16_thread - R17_H, R17, - R18_H, R18, - R19_H, R19, - R20_H, R20, - R21_H, R21, - R22_H, R22, -/*R23_H, R23, - R24_H, R24, - R25_H, R25, - R26_H, R26, - R27_H, R27, - R28_H, R28,*/ -/*R29_H, R29*/ // TODO: let allocator handle TOC!! -/*R30_H, R30,*/ - R31_H, R31 -); - -// Complement-required-in-pipeline operands. -// This register mask is used for the trap instructions that implement -// the null checks on AIX. The trap instruction first computes the -// complement of the value it shall trap on. Because of this, the -// instruction can not be scheduled in the same cycle as an other -// instruction reading the normal value of the same register. So we -// force the value to check into 'bits64_reg_ro_not_complement' -// and then copy it to 'bits64_reg_ro_complement' for the trap. -reg_class bits64_reg_ro_complement ( - R23_H, R23, - R24_H, R24, - R25_H, R25, - R26_H, R26, - R27_H, R27, - R28_H, R28 -); - // ---------------------------- // Special Class for Condition Code Flags Register @@ -777,6 +677,17 @@ reg_class int_flags( CCR7 ); +reg_class int_flags_ro( + CCR0, + CCR1, + CCR2, + CCR3, + CCR4, + CCR5, + CCR6, + CCR7 +); + reg_class int_flags_CR0(CCR0); reg_class int_flags_CR1(CCR1); reg_class int_flags_CR6(CCR6); @@ -2876,7 +2787,7 @@ encode %{ // Use release_store for card-marking to ensure that previous // oop-stores are visible before the card-mark change. - enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{ + enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); // FIXME: Implement this as a cmove and use a fixed condition code // register which is written on every transition to compiled code, @@ -2897,8 +2808,8 @@ encode %{ // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the // StoreStore barrier conditionally. __ lwz(R0, 0, $releaseFieldAddr$$Register); - __ cmpwi(CCR0, R0, 0); - __ beq_predict_taken(CCR0, skip_storestore); + __ cmpwi($crx$$CondRegister, R0, 0); + __ beq_predict_taken($crx$$CondRegister, skip_storestore); #endif __ li(R0, 0); __ membar(Assembler::StoreStore); @@ -3108,7 +3019,7 @@ encode %{ nodes->push(n2); %} - enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{ + enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3123,7 +3034,7 @@ encode %{ __ bind(done); %} - enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{ + enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3269,7 +3180,7 @@ encode %{ __ bind(done); %} - enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{ + enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{ // TODO: PPC port $archOpcode(ppc64Opcode_cmove); MacroAssembler _masm(&cbuf); @@ -3281,7 +3192,7 @@ encode %{ __ bind(done); %} - enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // TODO: PPC port $archOpcode(ppc64Opcode_bc); MacroAssembler _masm(&cbuf); @@ -3309,7 +3220,7 @@ encode %{ l); %} - enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // The scheduler doesn't know about branch shortening, so we set the opcode // to ppc64Opcode_bc in order to hide this detail from the scheduler. // TODO: PPC port $archOpcode(ppc64Opcode_bc); @@ -3341,7 +3252,7 @@ encode %{ %} // Branch used with Power6 scheduling (can be shortened without changing the node). - enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{ + enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{ // The scheduler doesn't know about branch shortening, so we set the opcode // to ppc64Opcode_bc in order to hide this detail from the scheduler. // TODO: PPC port $archOpcode(ppc64Opcode_bc); @@ -4700,6 +4611,15 @@ operand flagsReg() %{ interface(REG_INTER); %} +operand flagsRegSrc() %{ + constraint(ALLOC_IN_RC(int_flags_ro)); + match(RegFlags); + match(flagsReg); + match(flagsRegCR0); + format %{ %} + interface(REG_INTER); +%} + // Condition Code Flag Register CR0 operand flagsRegCR0() %{ constraint(ALLOC_IN_RC(int_flags_CR0)); @@ -4783,6 +4703,13 @@ operand iRegN2P(iRegNsrc reg) %{ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits32_reg_ro)); match(DecodeN reg); + format %{ "$reg" %} + interface(REG_INTER) +%} + +operand iRegN2P_klass(iRegNsrc reg) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits32_reg_ro)); match(DecodeNKlass reg); format %{ "$reg" %} interface(REG_INTER) @@ -4839,6 +4766,19 @@ operand indirectNarrow(iRegNsrc reg) %{ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(DecodeN reg); + op_cost(100); + format %{ "[$reg]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp(0x0); + %} +%} + +operand indirectNarrow_klass(iRegNsrc reg) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(DecodeNKlass reg); op_cost(100); format %{ "[$reg]" %} @@ -4855,6 +4795,19 @@ operand indOffset16Narrow(iRegNsrc reg, immL16 offset) %{ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeN reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeNKlass reg) offset); op_cost(100); format %{ "[$reg + $offset]" %} @@ -4871,6 +4824,19 @@ operand indOffset16NarrowAlg4(iRegNsrc reg, immL16Alg4 offset) %{ predicate(false /* TODO: PPC port MatchDecodeNodes*/); constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeN reg) offset); + op_cost(100); + format %{ "[$reg + $offset]" %} + interface(MEMORY_INTER) %{ + base($reg); + index(0x0); + scale(0x0); + disp($offset); + %} +%} + +operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{ + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); + constraint(ALLOC_IN_RC(bits64_reg_ro)); match(AddP (DecodeNKlass reg) offset); op_cost(100); format %{ "[$reg + $offset]" %} @@ -4998,9 +4964,9 @@ operand cmpOp() %{ // encoding and format. The classic case of this is memory operands. // Indirect is not included since its use is limited to Compare & Swap. -opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow); +opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass); // Memory operand where offsets are 4-aligned. Required for ld, std. -opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4); +opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass); opclass indirectMemory(indirect, indirectNarrow); // Special opclass for I and ConvL2I. @@ -5009,7 +4975,7 @@ opclass iRegIsrc_iRegL2Isrc(iRegIsrc, iRegL2Isrc); // Operand classes to match encode and decode. iRegN_P2N is only used // for storeN. I have never seen an encode node elsewhere. opclass iRegN_P2N(iRegNsrc, iRegP2N); -opclass iRegP_N2P(iRegPsrc, iRegN2P); +opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass); //----------PIPELINE----------------------------------------------------------- @@ -5593,6 +5559,19 @@ instruct loadN2P_unscaled(iRegPdst dst, memory mem) %{ ins_pipe(pipe_class_memory); %} +instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{ + match(Set dst (DecodeNKlass (LoadNKlass mem))); + // SAPJVM GL 2014-05-21 Differs. + predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 && + _kids[0]->_leaf->as_Load()->is_unordered()); + ins_cost(MEMORY_REF_COST); + + format %{ "LWZ $dst, $mem \t// DecodeN (unscaled)" %} + size(4); + ins_encode( enc_lwz(dst, mem) ); + ins_pipe(pipe_class_memory); +%} + // Load Pointer instruct loadP(iRegPdst dst, memoryAlg4 mem) %{ match(Set dst (LoadP mem)); @@ -5669,8 +5648,9 @@ instruct loadF(regF dst, memory mem) %{ %} // Load Float acquire. -instruct loadF_ac(regF dst, memory mem) %{ +instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{ match(Set dst (LoadF mem)); + effect(TEMP cr0); ins_cost(3*MEMORY_REF_COST); format %{ "LFS $dst, $mem \t// acquire\n\t" @@ -5705,8 +5685,9 @@ instruct loadD(regD dst, memory mem) %{ %} // Load Double - aligned acquire. -instruct loadD_ac(regD dst, memory mem) %{ +instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{ match(Set dst (LoadD mem)); + effect(TEMP cr0); ins_cost(3*MEMORY_REF_COST); format %{ "LFD $dst, $mem \t// acquire\n\t" @@ -6034,11 +6015,10 @@ instruct clearMs32b(iRegNdst dst, iRegNsrc src) %{ instruct loadBase(iRegLdst dst) %{ effect(DEF dst); - format %{ "MR $dst, r30_heapbase" %} - size(4); + format %{ "LoadConst $dst, heapbase" %} ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_or); - __ mr($dst$$Register, R30); + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} @@ -6114,7 +6094,7 @@ instruct loadConNKlass_lo(iRegNdst dst, immNKlass_NM src1, iRegNsrc src2) %{ effect(TEMP src2); ins_cost(DEFAULT_COST); - format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %} + format %{ "ORI $dst, $src1, $src2 \t// narrow klass lo" %} size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_ori); @@ -6563,8 +6543,9 @@ instruct storeD(memory mem, regD src) %{ // do a releasing store. For this, it gets the address of // CMSCollectorCardTableModRefBSExt::_requires_release as input. // (Using releaseFieldAddr in the match rule is a hack.) -instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{ +instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{ match(Set mem (StoreCM mem releaseFieldAddr)); + effect(TEMP crx); predicate(false); ins_cost(MEMORY_REF_COST); @@ -6572,7 +6553,7 @@ instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{ ins_cannot_rematerialize(true); format %{ "STB #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %} - ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) ); + ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) ); ins_pipe(pipe_class_memory); %} @@ -6589,8 +6570,9 @@ instruct storeCM_CMS_ExEx(memory mem, immI_0 zero) %{ expand %{ immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %} iRegLdst releaseFieldAddress; + flagsReg crx; loadConL_Ex(releaseFieldAddress, baseImm); - storeCM_CMS(mem, releaseFieldAddress); + storeCM_CMS(mem, releaseFieldAddress, crx); %} %} @@ -6639,39 +6621,34 @@ instruct encodeP_sub(iRegPdst dst, iRegPdst src) %{ predicate(false); format %{ "SUB $dst, $src, oop_base \t// encode" %} - size(4); ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_subf); - __ subf($dst$$Register, R30, $src$$Register); + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} // Conditional sub base. -instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! match(Set dst (EncodeP (Binary crx src1))); predicate(false); - ins_variable_size_depending_on_alignment(true); - format %{ "BEQ $crx, done\n\t" - "SUB $dst, $src1, R30 \t// encode: subtract base if != NULL\n" + "SUB $dst, $src1, heapbase \t// encode: subtract base if != NULL\n" "done:" %} - size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8); ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + // TODO: PPC port $archOpcode(ppc64Opcode_compound); Label done; __ beq($crx$$CondRegister, done); - __ subf($dst$$Register, R30, $src1$$Register); - // TODO PPC port __ endgroup_if_needed(_size == 12); + __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0); __ bind(done); %} ins_pipe(pipe_class_default); %} // Power 7 can use isel instruction -instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! match(Set dst (EncodeP (Binary crx src1))); predicate(false); @@ -6777,42 +6754,37 @@ instruct decodeN_add(iRegPdst dst, iRegPdst src) %{ match(Set dst (DecodeN src)); predicate(false); - format %{ "ADD $dst, $src, R30 \t// DecodeN, add oop base" %} - size(4); + format %{ "ADD $dst, $src, heapbase \t// DecodeN, add oop base" %} ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_add); - __ add($dst$$Register, $src$$Register, R30); + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); %} ins_pipe(pipe_class_default); %} // conditianal add base for expand -instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{ // The match rule is needed to make it a 'MachTypeNode'! // NOTICE that the rule is nonsense - we just have to make sure that: // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) // - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC. - match(Set dst (DecodeN (Binary crx src1))); + match(Set dst (DecodeN (Binary crx src))); predicate(false); - ins_variable_size_depending_on_alignment(true); - format %{ "BEQ $crx, done\n\t" - "ADD $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n" + "ADD $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n" "done:" %} - size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8); ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_cmove); + // TODO: PPC port $archOpcode(ppc64Opcode_compound); Label done; __ beq($crx$$CondRegister, done); - __ add($dst$$Register, $src1$$Register, R30); - // TODO PPC port __ endgroup_if_needed(_size == 12); + __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0); __ bind(done); %} ins_pipe(pipe_class_default); %} -instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{ +instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{ // The match rule is needed to make it a 'MachTypeNode'! // NOTICE that the rule is nonsense - we just have to make sure that: // - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp) @@ -6888,7 +6860,7 @@ instruct decodeN_Disjoint_notNull_Ex(iRegPdst dst, iRegNsrc src) %{ Universe::narrow_oop_base_disjoint()); ins_cost(DEFAULT_COST); - format %{ "MOV $dst, R30 \t\n" + format %{ "MOV $dst, heapbase \t\n" "RLDIMI $dst, $src, shift, 32-shift \t// decode with disjoint base" %} postalloc_expand %{ loadBaseNode *n1 = new loadBaseNode(); @@ -6946,7 +6918,7 @@ instruct decodeN_Disjoint_isel_Ex(iRegPdst dst, iRegNsrc src, flagsReg crx) %{ assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!"); ra_->set_oop(n_cond_set, true); - + ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx)); ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this)); @@ -7303,7 +7275,7 @@ instruct membar_CPUOrder() %{ //----------Conditional Move--------------------------------------------------- // Cmove using isel. -instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ +instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7321,7 +7293,7 @@ instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ +instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7335,7 +7307,7 @@ instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{ +instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{ match(Set dst (CMoveI (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7349,7 +7321,7 @@ instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{ %} // Cmove using isel. -instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ +instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7367,7 +7339,7 @@ instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ +instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7381,7 +7353,7 @@ instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{ +instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{ match(Set dst (CMoveL (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7395,7 +7367,7 @@ instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{ %} // Cmove using isel. -instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ +instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7414,7 +7386,7 @@ instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ %} // Conditional move for RegN. Only cmov(reg, reg). -instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ +instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7428,7 +7400,7 @@ instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{ +instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{ match(Set dst (CMoveN (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7442,7 +7414,7 @@ instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{ %} // Cmove using isel. -instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{ +instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); predicate(VM_Version::has_isel()); ins_cost(DEFAULT_COST); @@ -7460,7 +7432,7 @@ instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{ ins_pipe(pipe_class_default); %} -instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{ +instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); predicate(!VM_Version::has_isel()); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7474,7 +7446,7 @@ instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{ ins_pipe(pipe_class_default); %} -instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{ +instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{ match(Set dst (CMoveP (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7487,7 +7459,7 @@ instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{ ins_pipe(pipe_class_default); %} -instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{ +instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{ match(Set dst (CMoveF (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7509,7 +7481,7 @@ instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{ ins_pipe(pipe_class_default); %} -instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{ +instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{ match(Set dst (CMoveD (Binary cmp crx) (Binary dst src))); ins_cost(DEFAULT_COST+BRANCH_COST); @@ -7542,8 +7514,9 @@ instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{ // Mem_ptr must be a memory operand, else this node does not get // Flag_needs_anti_dependence_check set by adlc. If this is not set this node // can be rematerialized which leads to errors. -instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{ +instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{ match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal))); + effect(TEMP cr0); format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); @@ -7560,16 +7533,16 @@ instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLs // Mem_ptr must be a memory operand, else this node does not get // Flag_needs_anti_dependence_check set by adlc. If this is not set this node // can be rematerialized which leads to errors. -instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ - match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal))); - format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} +instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{ + match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal))); + ins_cost(2*MEMORY_REF_COST); + + format %{ "STDCX_ if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %} ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_compound); - __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register, - MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(), - noreg, NULL, true); + // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_); + __ stdcx_($newVal$$Register, $mem_ptr$$Register); %} - ins_pipe(pipe_class_default); + ins_pipe(pipe_class_memory); %} // Implement LoadPLocked. Must be ordered against changes of the memory location @@ -7577,13 +7550,14 @@ instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPs // Don't know whether this is ever used. instruct loadPLocked(iRegPdst dst, memory mem) %{ match(Set dst (LoadPLocked mem)); - ins_cost(MEMORY_REF_COST); + ins_cost(2*MEMORY_REF_COST); - format %{ "LD $dst, $mem \t// loadPLocked\n\t" - "TWI $dst\n\t" - "ISYNC" %} - size(12); - ins_encode( enc_ld_ac(dst, mem) ); + format %{ "LDARX $dst, $mem \t// loadPLocked\n\t" %} + size(4); + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_ldarx); + __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update()); + %} ins_pipe(pipe_class_memory); %} @@ -7593,8 +7567,9 @@ instruct loadPLocked(iRegPdst dst, memory mem) %{ // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))" cannot be // matched. -instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{ +instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7607,8 +7582,9 @@ instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc ins_pipe(pipe_class_default); %} -instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{ +instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7621,8 +7597,9 @@ instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc ins_pipe(pipe_class_default); %} -instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{ +instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7635,8 +7612,9 @@ instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc ins_pipe(pipe_class_default); %} -instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{ +instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{ match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2))); + effect(TEMP cr0); format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode %{ @@ -7649,48 +7627,54 @@ instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc ins_pipe(pipe_class_default); %} -instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ +instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddI mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndAddI $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndAddI(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ +instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndAddL mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndAddL $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndAddL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{ +instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetI mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetI $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{ +instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetL mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetL $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{ +instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetP mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetP $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetL(res, mem_ptr, src) ); ins_pipe(pipe_class_default); %} -instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{ +instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{ match(Set res (GetAndSetN mem_ptr src)); + effect(TEMP cr0); format %{ "GetAndSetN $res, $mem_ptr, $src" %} // Variable size: instruction count smaller if regs are disjoint. ins_encode( enc_GetAndSetI(res, mem_ptr, src) ); @@ -7898,18 +7882,8 @@ instruct subI_reg_reg(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ %} // Immediate Subtraction -// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal), -// so this rule seems to be unused. -instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{ - match(Set dst (SubI src1 src2)); - format %{ "SUBI $dst, $src1, $src2" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_addi); - __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); - %} - ins_pipe(pipe_class_default); -%} +// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), +// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16. // SubI from constant (using subfic). instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{ @@ -7989,22 +7963,6 @@ instruct subI_regL_regL(iRegIdst dst, iRegLsrc src1, iRegLsrc src2) %{ ins_pipe(pipe_class_default); %} -// Immediate Subtraction -// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal), -// so this rule seems to be unused. -// No constant pool entries required. -instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{ - match(Set dst (SubL src1 src2)); - - format %{ "SUBI $dst, $src1, $src2 \t// long" %} - size(4); - ins_encode %{ - // TODO: PPC port $archOpcode(ppc64Opcode_addi); - __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1)); - %} - ins_pipe(pipe_class_default); -%} - // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for // positive longs and 0xF...F for negative ones. instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{ @@ -8165,7 +8123,7 @@ instruct divI_reg_regnotMinus1(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{ +instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{ effect(USE_DEF dst, USE src1, USE crx); predicate(false); @@ -8228,7 +8186,7 @@ instruct divL_reg_regnotMinus1(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{ +instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{ effect(USE_DEF dst, USE src1, USE crx); predicate(false); @@ -8281,7 +8239,7 @@ instruct modI_reg_reg_Ex(iRegIdst dst, iRegIsrc src1, iRegIsrc src2) %{ %} // Long Remainder with registers -instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{ +instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ match(Set dst (ModL src1 src2)); ins_cost(10*DEFAULT_COST); @@ -9011,7 +8969,6 @@ instruct andL_reg_reg(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{ instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{ match(Set dst (AndL src1 src2)); effect(KILL cr0); - ins_cost(DEFAULT_COST); format %{ "ANDI $dst, $src1, $src2 \t// long" %} size(4); @@ -9803,7 +9760,7 @@ instruct convD2IRaw_regD(regD dst, regD src) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{ +instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE src); predicate(false); @@ -9817,7 +9774,7 @@ instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{ ins_pipe(pipe_class_default); %} -instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{ +instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE mem); predicate(false); @@ -9972,7 +9929,7 @@ instruct convF2LRaw_regF(regF dst, regF src) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{ +instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE src); predicate(false); @@ -9986,7 +9943,7 @@ instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{ ins_pipe(pipe_class_default); %} -instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{ +instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{ // no match-rule, false predicate effect(DEF dst, USE crx, USE mem); predicate(false); @@ -10255,7 +10212,6 @@ instruct testI_reg_imm(flagsRegCR0 cr0, iRegIsrc src1, uimmI16 src2, immI_0 zero size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_andi_); - // FIXME: avoid andi_ ? __ andi_(R0, $src1$$Register, $src2$$constant); %} ins_pipe(pipe_class_compare); @@ -10302,13 +10258,12 @@ instruct testL_reg_imm(flagsRegCR0 cr0, iRegLsrc src1, uimmL16 src2, immL_0 zero size(4); ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_andi_); - // FIXME: avoid andi_ ? __ andi_(R0, $src1$$Register, $src2$$constant); %} ins_pipe(pipe_class_compare); %} -instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{ +instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{ // no match-rule, false predicate effect(DEF dst, USE crx); predicate(false); @@ -10332,7 +10287,7 @@ instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{ ins_pipe(pipe_class_compare); %} -instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{ +instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{ // no match-rule, false predicate effect(DEF dst, USE crx); predicate(false); @@ -10622,8 +10577,9 @@ instruct cmpP_reg_imm16(flagsReg crx, iRegPsrc src1, immL16 src2) %{ //----------Float Compares---------------------------------------------------- instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{ + // Needs matchrule, see cmpDUnordered. + match(Set crx (CmpF src1 src2)); // no match-rule, false predicate - effect(DEF crx, USE src1, USE src2); predicate(false); format %{ "cmpFUrd $crx, $src1, $src2" %} @@ -10731,8 +10687,14 @@ instruct cmpF3_reg_reg_ExEx(iRegIdst dst, regF src1, regF src2) %{ %} instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{ - // no match-rule, false predicate - effect(DEF crx, USE src1, USE src2); + // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the + // node right before the conditional move using it. + // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7, + // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle + // crashed in register allocation where the flags Reg between cmpDUnoredered and a + // conditional move was supposed to be spilled. + match(Set crx (CmpD src1 src2)); + // False predicate, shall not be matched. predicate(false); format %{ "cmpFUrd $crx, $src1, $src2" %} @@ -10830,7 +10792,7 @@ instruct branch(label labl) %{ %} // Conditional Near Branch -instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchConFar'. match(If cmp crx); effect(USE lbl); @@ -10853,7 +10815,7 @@ instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{ // expensive. // // Conditional Far Branch -instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchCon'. match(If cmp crx); effect(USE crx, USE lbl); @@ -10871,7 +10833,7 @@ instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{ %} // Conditional Branch used with Power6 scheduler (can be far or short). -instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{ +instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{ // Same match rule as `branchCon'. match(If cmp crx); effect(USE crx, USE lbl); @@ -10890,7 +10852,7 @@ instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{ ins_pipe(pipe_class_default); %} -instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); ins_cost(BRANCH_COST); @@ -10904,7 +10866,7 @@ instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{ ins_pipe(pipe_class_default); %} -instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); predicate(!false /* TODO: PPC port HB_Schedule */); @@ -10920,7 +10882,7 @@ instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{ %} // Conditional Branch used with Power6 scheduler (can be far or short). -instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{ +instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{ match(CountedLoopEnd cmp crx); effect(USE labl); predicate(false /* TODO: PPC port HB_Schedule */); @@ -10969,13 +10931,36 @@ instruct partialSubtypeCheck(iRegPdst result, iRegP_N2P subklass, iRegP_N2P supe instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ match(Set crx (FastLock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); - // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking); + predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm()); format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, - $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining + // If locking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_locking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +// Separate version for TM. Use bound register for box to enable USE_KILL. +instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastLock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box); + predicate(Compile::current()->use_rtm()); + + format %{ "FASTLOCK $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + /*Biased Locking*/ false, + _rtm_counters, _stack_rtm_counters, + ((Method*)(ra_->C->method()->constant_encoding()))->method_data(), + /*TM*/ true, ra_->C->profile_rtm()); // If locking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_locking_Java for the case where crx is 'NE'. @@ -10986,12 +10971,33 @@ instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iR instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ match(Set crx (FastUnlock oop box)); effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + predicate(!Compile::current()->use_rtm()); format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2" %} ins_encode %{ // TODO: PPC port $archOpcode(ppc64Opcode_compound); __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, - $tmp3$$Register, $tmp1$$Register, $tmp2$$Register); + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + UseBiasedLocking && !UseOptoBiasInlining, + false); + // If unlocking was successfull, crx should indicate 'EQ'. + // The compiler generates a branch to the runtime call to + // _complete_monitor_unlocking_Java for the case where crx is 'NE'. + %} + ins_pipe(pipe_class_compare); +%} + +instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{ + match(Set crx (FastUnlock oop box)); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); + predicate(Compile::current()->use_rtm()); + + format %{ "FASTUNLOCK $oop, $box, $tmp1, $tmp2 (TM)" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register, + $tmp3$$Register, $tmp1$$Register, $tmp2$$Register, + /*Biased Locking*/ false, /*TM*/ true); // If unlocking was successfull, crx should indicate 'EQ'. // The compiler generates a branch to the runtime call to // _complete_monitor_unlocking_Java for the case where crx is 'NE'. @@ -11658,6 +11664,66 @@ instruct repl2F_immF0(iRegLdst dst, immF_0 zero) %{ ins_pipe(pipe_class_default); %} + +//----------Overflow Math Instructions----------------------------------------- + +// Note that we have to make sure that XER.SO is reset before using overflow instructions. +// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc). +// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.) + +instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowAddL op1 op2)); + + format %{ "add_ $op1, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ addo_(R0, $op1$$Register, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowSubL op1 op2)); + + format %{ "subfo_ R0, $op2, $op1\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ subfo_(R0, $op2$$Register, $op1$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{ + match(Set cr0 (OverflowSubL zero op2)); + + format %{ "nego_ R0, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ nego_(R0, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + +instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{ + match(Set cr0 (OverflowMulL op1 op2)); + + format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %} + ins_encode %{ + // TODO: PPC port $archOpcode(ppc64Opcode_compound); + __ li(R0, 0); + __ mtxer(R0); // clear XER.SO + __ mulldo_(R0, $op1$$Register, $op2$$Register); + %} + ins_pipe(pipe_class_default); +%} + + // ============================================================================ // Safepoint Instruction diff --git a/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp b/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp index a009ea06f77..6b002d2efd2 100644 --- a/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/register_definitions_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -23,19 +23,10 @@ * */ -// make sure the defines don't screw up the declarations later on in this file +// Make sure the defines don't screw up the declarations later on in this file. #define DONT_USE_REGISTER_DEFINES -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" #include "asm/register.hpp" -#include "register_ppc.hpp" -#ifdef TARGET_ARCH_MODEL_ppc_32 -# include "interp_masm_ppc_32.hpp" -#endif -#ifdef TARGET_ARCH_MODEL_ppc_64 -# include "interp_masm_ppc_64.hpp" -#endif REGISTER_DEFINITION(Register, noreg); diff --git a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp index 4f365a46a05..d163ebac6b9 100644 --- a/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/relocInfo_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2013 SAP AG. All rights reserved. + * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,14 +25,12 @@ #include "precompiled.hpp" #include "asm/assembler.inline.hpp" -#include "assembler_ppc.inline.hpp" #include "code/relocInfo.hpp" #include "nativeInst_ppc.hpp" #include "oops/oop.inline.hpp" #include "runtime/safepoint.hpp" void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { - bool copy_back_to_oop_pool = true; // TODO: PPC port // The following comment is from the declaration of DataRelocation: // // "The "o" (displacement) argument is relevant only to split relocations diff --git a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp index 68dda7d3e2b..5519405dbcd 100644 --- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,7 @@ #include "code/debugInfoRec.hpp" #include "code/icBuffer.hpp" #include "code/vtableStubs.hpp" +#include "frame_ppc.hpp" #include "interpreter/interpreter.hpp" #include "interpreter/interp_masm.hpp" #include "oops/compiledICHolder.hpp" @@ -194,8 +195,8 @@ static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { RegisterSaver_LiveIntReg( R27 ), RegisterSaver_LiveIntReg( R28 ), RegisterSaver_LiveIntReg( R29 ), - RegisterSaver_LiveIntReg( R31 ), - RegisterSaver_LiveIntReg( R30 ), // r30 must be the last register + RegisterSaver_LiveIntReg( R30 ), + RegisterSaver_LiveIntReg( R31 ), // must be the last register (see save/restore functions below) }; OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm, @@ -229,29 +230,30 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {"); - // Save r30 in the last slot of the not yet pushed frame so that we + // Save r31 in the last slot of the not yet pushed frame so that we // can use it as scratch reg. - __ std(R30, -reg_size, R1_SP); + __ std(R31, -reg_size, R1_SP); assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size), "consistency check"); // save the flags // Do the save_LR_CR by hand and adjust the return pc if requested. - __ mfcr(R30); - __ std(R30, _abi(cr), R1_SP); + __ mfcr(R31); + __ std(R31, _abi(cr), R1_SP); switch (return_pc_location) { - case return_pc_is_lr: __ mflr(R30); break; - case return_pc_is_r4: __ mr(R30, R4); break; + case return_pc_is_lr: __ mflr(R31); break; + case return_pc_is_r4: __ mr(R31, R4); break; case return_pc_is_thread_saved_exception_pc: - __ ld(R30, thread_(saved_exception_pc)); break; + __ ld(R31, thread_(saved_exception_pc)); break; default: ShouldNotReachHere(); } - if (return_pc_adjustment != 0) - __ addi(R30, R30, return_pc_adjustment); - __ std(R30, _abi(lr), R1_SP); + if (return_pc_adjustment != 0) { + __ addi(R31, R31, return_pc_adjustment); + } + __ std(R31, _abi(lr), R1_SP); // push a new frame - __ push_frame(frame_size_in_bytes, R30); + __ push_frame(frame_size_in_bytes, R31); // save all registers (ints and floats) offset = register_save_offset; @@ -261,7 +263,7 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble switch (reg_type) { case RegisterSaver::int_reg: { - if (reg_num != 30) { // We spilled R30 right at the beginning. + if (reg_num != 31) { // We spilled R31 right at the beginning. __ std(as_Register(reg_num), offset, R1_SP); } break; @@ -272,8 +274,8 @@ OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssemble } case RegisterSaver::special_reg: { if (reg_num == SR_CTR_SpecialRegisterEnumValue) { - __ mfctr(R30); - __ std(R30, offset, R1_SP); + __ mfctr(R31); + __ std(R31, offset, R1_SP); } else { Unimplemented(); } @@ -321,7 +323,7 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, switch (reg_type) { case RegisterSaver::int_reg: { - if (reg_num != 30) // R30 restored at the end, it's the tmp reg! + if (reg_num != 31) // R31 restored at the end, it's the tmp reg! __ ld(as_Register(reg_num), offset, R1_SP); break; } @@ -332,8 +334,8 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, case RegisterSaver::special_reg: { if (reg_num == SR_CTR_SpecialRegisterEnumValue) { if (restore_ctr) { // Nothing to do here if ctr already contains the next address. - __ ld(R30, offset, R1_SP); - __ mtctr(R30); + __ ld(R31, offset, R1_SP); + __ mtctr(R31); } } else { Unimplemented(); @@ -350,10 +352,10 @@ void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm, __ pop_frame(); // restore the flags - __ restore_LR_CR(R30); + __ restore_LR_CR(R31); // restore scratch register's value - __ ld(R30, -reg_size, R1_SP); + __ ld(R31, -reg_size, R1_SP); BLOCK_COMMENT("} restore_live_registers_and_pop_frame"); } @@ -2021,6 +2023,8 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ push_frame(frame_size_in_bytes, r_temp_1); // Push the c2n adapter's frame. frame_done_pc = (intptr_t)__ pc(); + __ verify_thread(); + // Native nmethod wrappers never take possesion of the oop arguments. // So the caller will gc the arguments. // The only thing we need an oopMap for is if the call is static. @@ -2594,7 +2598,7 @@ int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) } uint SharedRuntime::out_preserve_stack_slots() { -#ifdef COMPILER2 +#if defined(COMPILER1) || defined(COMPILER2) return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size; #else return 0; @@ -2868,11 +2872,6 @@ void SharedRuntime::generate_deopt_blob() { __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread); __ BIND(skip_restore_excp); - // reload narrro_oop_base - if (UseCompressedOops && Universe::narrow_oop_base() != 0) { - __ load_const_optimized(R30, Universe::narrow_oop_base()); - } - __ pop_frame(); // stack: (deoptee, optional i2c, caller of deoptee, ...). diff --git a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp index c7a9d06627d..4ddf83ba943 100644 --- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp @@ -261,9 +261,6 @@ class StubGenerator: public StubCodeGenerator { // global toc register __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1); - // Load narrow oop base. - __ reinit_heapbase(R30, R11_scratch1); - // Remember the senderSP so we interpreter can pop c2i arguments off of the stack // when called via a c2i. @@ -418,6 +415,23 @@ class StubGenerator: public StubCodeGenerator { // or native call stub. The pending exception in Thread is // converted into a Java-level exception. // + // Read: + // + // LR: The pc the runtime library callee wants to return to. + // Since the exception occurred in the callee, the return pc + // from the point of view of Java is the exception pc. + // thread: Needed for method handles. + // + // Invalidate: + // + // volatile registers (except below). + // + // Update: + // + // R4_ARG2: exception + // + // (LR is unchanged and is live out). + // address generate_forward_exception() { StubCodeMark mark(this, "StubRoutines", "forward_exception"); address start = __ pc(); @@ -1256,9 +1270,9 @@ class StubGenerator: public StubCodeGenerator { Register tmp3 = R8_ARG6; #if defined(ABI_ELFv2) - address nooverlap_target = aligned ? - StubRoutines::arrayof_jbyte_disjoint_arraycopy() : - StubRoutines::jbyte_disjoint_arraycopy(); + address nooverlap_target = aligned ? + StubRoutines::arrayof_jbyte_disjoint_arraycopy() : + StubRoutines::jbyte_disjoint_arraycopy(); #else address nooverlap_target = aligned ? ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() : diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp index 432a96d8268..2789be2aa55 100644 --- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2014 SAP AG. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -264,11 +264,11 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* __ cmpdi(CCR0, Rmdo, 0); __ beq(CCR0, no_mdo); - // Increment invocation counter in the MDO. - const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); - __ lwz(Rscratch2, mdo_ic_offs, Rmdo); + // Increment backedge counter in the MDO. + const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + __ lwz(Rscratch2, mdo_bc_offs, Rmdo); __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mdo_ic_offs, Rmdo); + __ stw(Rscratch2, mdo_bc_offs, Rmdo); __ load_const_optimized(Rscratch1, mask, R0); __ and_(Rscratch1, Rscratch2, Rscratch1); __ bne(CCR0, done); @@ -276,12 +276,12 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label* } // Increment counter in MethodCounters*. - const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); + const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset()); __ bind(no_mdo); __ get_method_counters(R19_method, R3_counters, done); - __ lwz(Rscratch2, mo_ic_offs, R3_counters); + __ lwz(Rscratch2, mo_bc_offs, R3_counters); __ addi(Rscratch2, Rscratch2, increment); - __ stw(Rscratch2, mo_ic_offs, R3_counters); + __ stw(Rscratch2, mo_bc_offs, R3_counters); __ load_const_optimized(Rscratch1, mask, R0); __ and_(Rscratch1, Rscratch2, Rscratch1); __ beq(CCR0, *overflow); @@ -611,12 +611,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call, Regist // For others we can use a normal (native) entry. inline bool math_entry_available(AbstractInterpreter::MethodKind kind) { - // Provide math entry with debugging on demand. - // Note: Debugging changes which code will get executed: - // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call. - // Not debugging and enabled InlineIntrinics: processor instruction will get used. - // Result might differ slightly due to rounding etc. - if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry. + if (!InlineIntrinsics) return false; return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) || (kind==Interpreter::java_lang_math_abs)); @@ -628,15 +623,8 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M return Interpreter::entry_for_kind(Interpreter::zerolocals); } - Label Lslow_path; - const Register Rjvmti_mode = R11_scratch1; address entry = __ pc(); - // Provide math entry with debugging on demand. - __ lwz(Rjvmti_mode, thread_(interp_only_mode)); - __ cmpwi(CCR0, Rjvmti_mode, 0); - __ bne(CCR0, Lslow_path); // jvmti_mode!=0 - __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp); // Pop c2i arguments (if any) off when we return. @@ -659,9 +647,6 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M // And we're done. __ blr(); - // Provide slow path for JVMTI case. - __ bind(Lslow_path); - __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2); __ flush(); return entry; diff --git a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp index eb817b0a256..4450dd71897 100644 --- a/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2013, 2014 SAP AG. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +34,7 @@ // Run with +PrintInterpreter to get the VM to print out the size. // Max size with JVMTI - const static int InterpreterCodeSize = 210*K; + const static int InterpreterCodeSize = 230*K; #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP diff --git a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp index a6505333fc6..9cede857915 100644 --- a/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp +++ b/hotspot/src/cpu/ppc/vm/templateTable_ppc_64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved. * Copyright 2013, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -375,23 +375,22 @@ void TemplateTable::fast_aldc(bool wide) { int index_size = wide ? sizeof(u2) : sizeof(u1); const Register Rscratch = R11_scratch1; - Label resolved; + Label is_null; // We are resolved if the resolved reference cache entry contains a // non-null object (CallSite, etc.) __ get_cache_index_at_bcp(Rscratch, 1, index_size); // Load index. - __ load_resolved_reference_at_index(R17_tos, Rscratch); - __ cmpdi(CCR0, R17_tos, 0); - __ bne(CCR0, resolved); + __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null); + __ verify_oop(R17_tos); + __ dispatch_epilog(atos, Bytecodes::length_for(bytecode())); + + __ bind(is_null); __ load_const_optimized(R3_ARG1, (int)bytecode()); address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); // First time invocation - must resolve first. __ call_VM(R17_tos, entry, R3_ARG1); - - __ align(32, 12); - __ bind(resolved); __ verify_oop(R17_tos); } @@ -3795,9 +3794,9 @@ void TemplateTable::instanceof() { transition(atos, itos); Label Ldone, Lis_null, Lquicked, Lresolved; - Register Roffset = R5_ARG3, + Register Roffset = R6_ARG4, RobjKlass = R4_ARG2, - RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register. + RspecifiedKlass = R5_ARG3, Rcpool = R11_scratch1, Rtags = R12_scratch2; diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp index 3803c6e8013..357b3d359bd 100644 --- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -32,12 +32,13 @@ #include "runtime/os.hpp" #include "runtime/stubCodeGenerator.hpp" #include "utilities/defaultStream.hpp" +#include "utilities/globalDefinitions.hpp" #include "vm_version_ppc.hpp" # include int VM_Version::_features = VM_Version::unknown_m; -int VM_Version::_measured_cache_line_size = 128; // default value +int VM_Version::_measured_cache_line_size = 32; // pessimistic init value const char* VM_Version::_features_str = ""; bool VM_Version::_is_determine_features_test_running = false; @@ -55,7 +56,9 @@ void VM_Version::initialize() { // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features. if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) { - if (VM_Version::has_popcntw()) { + if (VM_Version::has_lqarx()) { + FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8); + } else if (VM_Version::has_popcntw()) { FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7); } else if (VM_Version::has_cmpb()) { FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6); @@ -66,8 +69,14 @@ void VM_Version::initialize() { } } guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 || - PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7, - "PowerArchitecturePPC64 should be 0, 5, 6 or 7"); + PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 || + PowerArchitecturePPC64 == 8, + "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8"); + + // Power 8: Configure Data Stream Control Register. + if (PowerArchitecturePPC64 >= 8) { + config_dscr(); + } if (!UseSIGTRAP) { MSG(TrapBasedICMissChecks); @@ -97,7 +106,7 @@ void VM_Version::initialize() { // Create and print feature-string. char buf[(num_features+1) * 16]; // Max 16 chars per feature. jio_snprintf(buf, sizeof(buf), - "ppc64%s%s%s%s%s%s%s%s", + "ppc64%s%s%s%s%s%s%s%s%s%s%s%s", (has_fsqrt() ? " fsqrt" : ""), (has_isel() ? " isel" : ""), (has_lxarxeh() ? " lxarxeh" : ""), @@ -106,11 +115,17 @@ void VM_Version::initialize() { (has_popcntb() ? " popcntb" : ""), (has_popcntw() ? " popcntw" : ""), (has_fcfids() ? " fcfids" : ""), - (has_vand() ? " vand" : "") + (has_vand() ? " vand" : ""), + (has_lqarx() ? " lqarx" : ""), + (has_vcipher() ? " vcipher" : ""), + (has_vpmsumb() ? " vpmsumb" : ""), + (has_tcheck() ? " tcheck" : "") // Make sure number of %s matches num_features! ); _features_str = os::strdup(buf); - NOT_PRODUCT(if (Verbose) print_features();); + if (Verbose) { + print_features(); + } // PPC64 supports 8-byte compare-exchange operations (see // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr) @@ -171,6 +186,58 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + // Adjust RTM (Restricted Transactional Memory) flags. + if (!has_tcheck() && UseRTMLocking) { + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + // VM_Version_init() is executed after UseBiasedLocking is used + // in Thread::allocate(). + vm_exit_during_initialization("RTM instructions are not available on this CPU"); + } + + if (UseRTMLocking) { +#if INCLUDE_RTM_OPT + if (!UnlockExperimentalVMOptions) { + vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. " + "It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); + } else { + warning("UseRTMLocking is only available as experimental option on this platform."); + } + if (!FLAG_IS_CMDLINE(UseRTMLocking)) { + // RTM locking should be used only for applications with + // high lock contention. For now we do not use it by default. + vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); + } + if (!is_power_of_2(RTMTotalCountIncrRate)) { + warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64"); + FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64); + } + if (RTMAbortRatio < 0 || RTMAbortRatio > 100) { + warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50"); + FLAG_SET_DEFAULT(RTMAbortRatio, 50); + } + FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM. + guarantee(RTMSpinLoopCount > 0, "unsupported"); +#else + // Only C2 does RTM locking optimization. + // Can't continue because UseRTMLocking affects UseBiasedLocking flag + // setting during arguments processing. See use_biased_locking(). + vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); +#endif + } else { // !UseRTMLocking + if (UseRTMForStackLocks) { + if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { + warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); + } + FLAG_SET_DEFAULT(UseRTMForStackLocks, false); + } + if (UseRTMDeopt) { + FLAG_SET_DEFAULT(UseRTMDeopt, false); + } + if (PrintPreciseRTMLockingStatistics) { + FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); + } + } // This machine does not allow unaligned memory accesses if (UseUnalignedAccesses) { @@ -180,6 +247,27 @@ void VM_Version::initialize() { } } +bool VM_Version::use_biased_locking() { +#if INCLUDE_RTM_OPT + // RTM locking is most useful when there is high lock contention and + // low data contention. With high lock contention the lock is usually + // inflated and biased locking is not suitable for that case. + // RTM locking code requires that biased locking is off. + // Note: we can't switch off UseBiasedLocking in get_processor_features() + // because it is used by Thread::allocate() which is called before + // VM_Version::initialize(). + if (UseRTMLocking && UseBiasedLocking) { + if (FLAG_IS_DEFAULT(UseBiasedLocking)) { + FLAG_SET_DEFAULT(UseBiasedLocking, false); + } else { + warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." ); + UseBiasedLocking = false; + } + } +#endif + return UseBiasedLocking; +} + void VM_Version::print_features() { tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size()); } @@ -443,16 +531,19 @@ void VM_Version::determine_features() { // Don't use R0 in ldarx. // Keep R3_ARG1 unmodified, it contains &field (see below). // Keep R4_ARG2 unmodified, it contains offset = 0 (see below). - a->fsqrt(F3, F4); // code[0] -> fsqrt_m - a->fsqrts(F3, F4); // code[1] -> fsqrts_m - a->isel(R7, R5, R6, 0); // code[2] -> isel_m - a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m - a->cmpb(R7, R5, R6); // code[4] -> bcmp - //a->mftgpr(R7, F3); // code[5] -> mftgpr - a->popcntb(R7, R5); // code[6] -> popcntb - a->popcntw(R7, R5); // code[7] -> popcntw - a->fcfids(F3, F4); // code[8] -> fcfids - a->vand(VR0, VR0, VR0); // code[9] -> vand + a->fsqrt(F3, F4); // code[0] -> fsqrt_m + a->fsqrts(F3, F4); // code[1] -> fsqrts_m + a->isel(R7, R5, R6, 0); // code[2] -> isel_m + a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m + a->cmpb(R7, R5, R6); // code[4] -> cmpb + a->popcntb(R7, R5); // code[5] -> popcntb + a->popcntw(R7, R5); // code[6] -> popcntw + a->fcfids(F3, F4); // code[7] -> fcfids + a->vand(VR0, VR0, VR0); // code[8] -> vand + a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9] -> lqarx_m + a->vcipher(VR0, VR1, VR2); // code[10] -> vcipher + a->vpmsumb(VR0, VR1, VR2); // code[11] -> vpmsumb + a->tcheck(0); // code[12] -> tcheck a->blr(); // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it. @@ -491,11 +582,14 @@ void VM_Version::determine_features() { if (code[feature_cntr++]) features |= isel_m; if (code[feature_cntr++]) features |= lxarxeh_m; if (code[feature_cntr++]) features |= cmpb_m; - //if(code[feature_cntr++])features |= mftgpr_m; if (code[feature_cntr++]) features |= popcntb_m; if (code[feature_cntr++]) features |= popcntw_m; if (code[feature_cntr++]) features |= fcfids_m; if (code[feature_cntr++]) features |= vand_m; + if (code[feature_cntr++]) features |= lqarx_m; + if (code[feature_cntr++]) features |= vcipher_m; + if (code[feature_cntr++]) features |= vpmsumb_m; + if (code[feature_cntr++]) features |= tcheck_m; // Print the detection code. if (PrintAssembly) { @@ -507,6 +601,69 @@ void VM_Version::determine_features() { _features = features; } +// Power 8: Configure Data Stream Control Register. +void VM_Version::config_dscr() { + assert(has_tcheck(), "Only execute on Power 8 or later!"); + + // 7 InstWords for each call (function descriptor + blr instruction). + const int code_size = (2+2*7)*BytesPerInstWord; + + // Allocate space for the code. + ResourceMark rm; + CodeBuffer cb("config_dscr", code_size, 0); + MacroAssembler* a = new MacroAssembler(&cb); + + // Emit code. + uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd(); + uint32_t *code = (uint32_t *)a->pc(); + a->mfdscr(R3); + a->blr(); + + void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd(); + a->mtdscr(R3); + a->blr(); + + uint32_t *code_end = (uint32_t *)a->pc(); + a->flush(); + + // Print the detection code. + if (PrintAssembly) { + ttyLocker ttyl; + tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code); + Disassembler::decode((u_char*)code, (u_char*)code_end, tty); + } + + // Apply the configuration if needed. + uint64_t dscr_val = (*get_dscr)(); + if (Verbose) { + tty->print_cr("dscr value was 0x%lx" , dscr_val); + } + bool change_requested = false; + if (DSCR_PPC64 != (uintx)-1) { + dscr_val = DSCR_PPC64; + change_requested = true; + } + if (DSCR_DPFD_PPC64 <= 7) { + uint64_t mask = 0x7; + if ((dscr_val & mask) != DSCR_DPFD_PPC64) { + dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64); + change_requested = true; + } + } + if (DSCR_URG_PPC64 <= 7) { + uint64_t mask = 0x7 << 6; + if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) { + dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6); + change_requested = true; + } + } + if (change_requested) { + (*set_dscr)(dscr_val); + if (Verbose) { + tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)()); + } + } +} static int saved_features = 0; diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp index 2bbfdddb21f..6fc76e4cd41 100644 --- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,10 @@ protected: popcntw, fcfids, vand, - dcba, + lqarx, + vcipher, + vpmsumb, + tcheck, num_features // last entry to count features }; enum Feature_Flag_Set { @@ -55,7 +58,10 @@ protected: popcntw_m = (1 << popcntw), fcfids_m = (1 << fcfids ), vand_m = (1 << vand ), - dcba_m = (1 << dcba ), + lqarx_m = (1 << lqarx ), + vcipher_m = (1 << vcipher), + vpmsumb_m = (1 << vpmsumb), + tcheck_m = (1 << tcheck ), all_features_m = -1 }; static int _features; @@ -65,12 +71,16 @@ protected: static void print_features(); static void determine_features(); // also measures cache line size + static void config_dscr(); // Power 8: Configure Data Stream Control Register. static void determine_section_size(); static void power6_micro_bench(); public: // Initialization static void initialize(); + // Override Abstract_VM_Version implementation + static bool use_biased_locking(); + static bool is_determine_features_test_running() { return _is_determine_features_test_running; } // CPU instruction support static bool has_fsqrt() { return (_features & fsqrt_m) != 0; } @@ -82,7 +92,10 @@ public: static bool has_popcntw() { return (_features & popcntw_m) != 0; } static bool has_fcfids() { return (_features & fcfids_m) != 0; } static bool has_vand() { return (_features & vand_m) != 0; } - static bool has_dcba() { return (_features & dcba_m) != 0; } + static bool has_lqarx() { return (_features & lqarx_m) != 0; } + static bool has_vcipher() { return (_features & vcipher_m) != 0; } + static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; } + static bool has_tcheck() { return (_features & tcheck_m) != 0; } static const char* cpu_features() { return _features_str; } diff --git a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp index 5931afd5c32..0165fb22e34 100644 --- a/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp +++ b/hotspot/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,6 @@ */ #include "precompiled.hpp" -#include "asm/assembler.hpp" #include "asm/macroAssembler.inline.hpp" #include "code/vtableStubs.hpp" #include "interp_masm_ppc_64.hpp" From b30f24676eb5ce81d12f3b05f4dac064150149f1 Mon Sep 17 00:00:00 2001 From: Vladimir Ivanov Date: Wed, 22 Apr 2015 21:33:55 +0300 Subject: [PATCH 07/13] 8078309: compiler/jsr292/MHInlineTest.java failed with java.lang.RuntimeException: 'MHInlineTest$A::protected_x (3 bytes) virtual call' found in stdout Reviewed-by: jrose --- hotspot/test/compiler/jsr292/MHInlineTest.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hotspot/test/compiler/jsr292/MHInlineTest.java b/hotspot/test/compiler/jsr292/MHInlineTest.java index 0587ab6ad9e..a167671b283 100644 --- a/hotspot/test/compiler/jsr292/MHInlineTest.java +++ b/hotspot/test/compiler/jsr292/MHInlineTest.java @@ -48,15 +48,12 @@ public class MHInlineTest { // The test is applicable only to C2 (present in Server VM). if (analyzer.getStderr().contains("Server VM")) { analyzer.shouldContain("MHInlineTest$B::public_x (3 bytes) inline (hot)"); - analyzer.shouldContain( "MHInlineTest$B::protected_x (3 bytes) inline (hot)"); - analyzer.shouldContain( "MHInlineTest$B::package_x (3 bytes) inline (hot)"); + analyzer.shouldContain("MHInlineTest$B::protected_x (3 bytes) inline (hot)"); + analyzer.shouldContain("MHInlineTest$B::package_x (3 bytes) inline (hot)"); analyzer.shouldContain("MHInlineTest$A::package_final_x (3 bytes) inline (hot)"); analyzer.shouldContain("MHInlineTest$B::private_x (3 bytes) inline (hot)"); analyzer.shouldContain("MHInlineTest$B::private_static_x (3 bytes) inline (hot)"); analyzer.shouldContain("MHInlineTest$A::package_static_x (3 bytes) inline (hot)"); - - analyzer.shouldNotContain("MHInlineTest$A::protected_x (3 bytes) virtual call"); - analyzer.shouldNotContain("MHInlineTest$A::package_x (3 bytes) virtual call"); } } @@ -179,6 +176,7 @@ public class MHInlineTest { throw new Error(throwable); } } + static class Launcher { public static void main(String[] args) throws Exception { for (int i = 0; i < 20_000; i++) { From dc67bb0a0e77935b31e250fca52ee42021245873 Mon Sep 17 00:00:00 2001 From: Aleksey Shipilev Date: Wed, 22 Apr 2015 19:10:03 +0300 Subject: [PATCH 08/13] 8076987: C1 should support conditional card marks (UseCondCardMark) Reviewed-by: iveresov, roland --- hotspot/src/share/vm/c1/c1_LIRGenerator.cpp | 23 ++++++++++++++++----- hotspot/src/share/vm/opto/c2_globals.hpp | 3 --- hotspot/src/share/vm/runtime/globals.hpp | 3 +++ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index d040ccadd6b..5c8ddd01f8e 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -1606,13 +1606,26 @@ void LIRGenerator::CardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* } else { __ unsigned_shift_right(addr, CardTableModRefBS::card_shift, tmp); } + + LIR_Address* card_addr; if (can_inline_as_constant(card_table_base)) { - __ move(LIR_OprFact::intConst(0), - new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE)); + card_addr = new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE); } else { - __ move(LIR_OprFact::intConst(0), - new LIR_Address(tmp, load_constant(card_table_base), - T_BYTE)); + card_addr = new LIR_Address(tmp, load_constant(card_table_base), T_BYTE); + } + + LIR_Opr dirty = LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val()); + if (UseCondCardMark) { + LIR_Opr cur_value = new_register(T_INT); + __ move(card_addr, cur_value); + + LabelObj* L_already_dirty = new LabelObj(); + __ cmp(lir_cond_equal, cur_value, dirty); + __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label()); + __ move(dirty, card_addr); + __ branch_destination(L_already_dirty->label()); + } else { + __ move(dirty, card_addr); } #endif } diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp index db4e30bec3c..59517e40499 100644 --- a/hotspot/src/share/vm/opto/c2_globals.hpp +++ b/hotspot/src/share/vm/opto/c2_globals.hpp @@ -593,9 +593,6 @@ develop(bool, PoisonOSREntry, true, \ "Detect abnormal calls to OSR code") \ \ - product(bool, UseCondCardMark, false, \ - "Check for already marked card before updating card table") \ - \ develop(bool, SoftMatchFailure, trueInProduct, \ "If the DFA fails to match a node, print a message and bail out") \ \ diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 97f956f0cd1..c4815a992fe 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -2235,6 +2235,9 @@ class CommandLineFlags { "When +ReduceInitialCardMarks, explicitly defer any that " \ "may arise from new_pre_store_barrier") \ \ + product(bool, UseCondCardMark, false, \ + "Check for already marked card before updating card table") \ + \ diagnostic(bool, VerifyRememberedSets, false, \ "Verify GC remembered sets") \ \ From 879707ab3108a2eb37fa69e10adb5e5440829662 Mon Sep 17 00:00:00 2001 From: Goetz Lindenmaier Date: Thu, 23 Apr 2015 09:27:02 +0200 Subject: [PATCH 09/13] 8078482: ppc: pass thread to throw_AbstractMethodError Also improve check for Safepoints in signal handler. Reviewed-by: kvn, simonis --- hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp | 8 ++++---- hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp | 10 +++++++--- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp index be85db68c82..7f301e255b0 100644 --- a/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/interpreter_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -427,7 +427,6 @@ address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type return entry; } - // Call an accessor method (assuming it is resolved, otherwise drop into // vanilla (slow path) entry. address InterpreterGenerator::generate_jump_to_normal_entry(void) { @@ -473,7 +472,8 @@ address InterpreterGenerator::generate_abstract_entry(void) { // This is not a leaf but we have a JavaFrameAnchor now and we will // check (create) exceptions afterward so this is ok. - __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError)); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError), + R16_thread); // Pop the C frame and restore LR. __ pop_frame(); diff --git a/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp b/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp index c742b8a0074..326fd326de4 100644 --- a/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp +++ b/hotspot/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. - * Copyright 2012, 2014 SAP AG. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2015 SAP AG. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -298,6 +298,7 @@ JVM_handle_linux_signal(int sig, goto report_and_die; } + CodeBlob *cb = NULL; // Handle signal from NativeJump::patch_verified_entry(). if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) || (!TrapBasedNotEntrantChecks && sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) { @@ -313,7 +314,10 @@ JVM_handle_linux_signal(int sig, // especially when we try to read from the safepoint polling page. So the check // (address)info->si_addr == os::get_standard_polling_page() // doesn't work for us. We use: - ((NativeInstruction*)pc)->is_safepoint_poll()) { + ((NativeInstruction*)pc)->is_safepoint_poll() && + CodeCache::contains((void*) pc) && + ((cb = CodeCache::find_blob(pc)) != NULL) && + cb->is_nmethod()) { if (TraceTraps) { tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc)); } From a452b030ce1c13fd44eb0483173ab599391cfa40 Mon Sep 17 00:00:00 2001 From: Zoltan Majo Date: Mon, 27 Apr 2015 10:49:43 +0200 Subject: [PATCH 10/13] 8068945: Use RBP register as proper frame pointer in JIT compiled code on x86 Introduce the PreserveFramePointer flag to control if RBP is used as the frame pointer or as a general purpose register. Reviewed-by: kvn, roland, dlong, enevill, shade --- .../sun/jvm/hotspot/runtime/x86/X86Frame.java | 23 +- .../src/cpu/aarch64/vm/globals_aarch64.hpp | 2 + hotspot/src/cpu/ppc/vm/globals_ppc.hpp | 2 + hotspot/src/cpu/sparc/vm/globals_sparc.hpp | 2 + hotspot/src/cpu/x86/vm/assembler_x86.hpp | 6 +- hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp | 7 +- .../src/cpu/x86/vm/c1_MacroAssembler_x86.cpp | 3 + hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp | 12 +- hotspot/src/cpu/x86/vm/frame_x86.cpp | 30 +- hotspot/src/cpu/x86/vm/frame_x86.hpp | 13 +- hotspot/src/cpu/x86/vm/frame_x86.inline.hpp | 2 +- hotspot/src/cpu/x86/vm/globals_x86.hpp | 2 + hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 9 + hotspot/src/cpu/x86/vm/methodHandles_x86.cpp | 2 +- hotspot/src/cpu/x86/vm/runtime_x86_32.cpp | 4 - .../src/cpu/x86/vm/sharedRuntime_x86_64.cpp | 12 +- hotspot/src/cpu/x86/vm/x86.ad | 15 - hotspot/src/cpu/x86/vm/x86_32.ad | 133 ++--- hotspot/src/cpu/x86/vm/x86_64.ad | 482 +++++++++++------- hotspot/src/share/vm/c1/c1_GraphBuilder.cpp | 2 +- hotspot/src/share/vm/c1/c1_LIR.cpp | 5 +- hotspot/src/share/vm/c1/c1_LIR.hpp | 6 +- hotspot/src/share/vm/c1/c1_LIRGenerator.cpp | 15 +- hotspot/src/share/vm/opto/bytecodeInfo.cpp | 8 +- hotspot/src/share/vm/prims/forte.cpp | 112 ++-- hotspot/src/share/vm/runtime/globals.hpp | 6 +- .../src/share/vm/runtime/sharedRuntime.cpp | 2 +- hotspot/src/share/vm/runtime/vframe.hpp | 20 +- 28 files changed, 519 insertions(+), 418 deletions(-) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java index 0d6d5328967..558aa4a8dbd 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java @@ -314,26 +314,17 @@ public class X86Frame extends Frame { //------------------------------------------------------------------------------ // frame::adjust_unextended_sp private void adjustUnextendedSP() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. CodeBlob cb = cb(); NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull(); if (senderNm != null) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (senderNm.isDeoptMhEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP())); - raw_unextendedSP = getFP(); - } - else if (senderNm.isDeoptEntry(getPC())) { - // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp)); - } - else if (senderNm.isMethodHandleReturn(getPC())) { - raw_unextendedSP = getFP(); + // If the sender PC is a deoptimization point, get the original PC. + if (senderNm.isDeoptEntry(getPC()) || + senderNm.isDeoptMhEntry(getPC())) { + // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp)); } } } diff --git a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp index d0baef39889..c073735a7e0 100644 --- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp @@ -68,6 +68,8 @@ define_pd_global(bool, RewriteFrequentPairs, false); define_pd_global(bool, UseMembar, true); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(uintx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread diff --git a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp index f2391d251eb..705267859a2 100644 --- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp +++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp @@ -55,6 +55,8 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. diff --git a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp index 2873f441f9d..800e6ef5a74 100644 --- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp @@ -74,6 +74,8 @@ define_pd_global(bool, RewriteFrequentPairs, true); define_pd_global(bool, UseMembar, false); +define_pd_global(bool, PreserveFramePointer, false); + // GC Ergo Flags define_pd_global(size_t, CMSYoungGenPerWorker, 16*M); // default max size of CMS young gen, per GC worker thread diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index b7ab95df487..742d6c5eed9 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -142,8 +142,10 @@ REGISTER_DECLARATION(Register, r15_thread, r15); // callee-saved #endif // _LP64 -// JSR 292 fixed register usages: -REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp); +// JSR 292 +// On x86, the SP does not have to be saved when invoking method handle intrinsics +// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg. +REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg); // Address is an abstraction used to represent a memory location // using any of the amd64 addressing modes with one object. diff --git a/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp b/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp index f75eca72868..bec7eed65e9 100644 --- a/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_FrameMap_x86.cpp @@ -343,14 +343,13 @@ LIR_Opr FrameMap::stack_pointer() { return FrameMap::rsp_opr; } - // JSR 292 +// On x86, there is no need to save the SP, because neither +// method handle intrinsics, nor compiled lambda forms modify it. LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { - assert(rbp == rbp_mh_SP_save, "must be same register"); - return rbp_opr; + return LIR_OprFact::illegalOpr; } - bool FrameMap::validate_frame() { return true; } diff --git a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp index 0c8d0767ed2..cf1a59a30b3 100644 --- a/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp @@ -360,6 +360,9 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by generate_stack_overflow_check(bang_size_in_bytes); push(rbp); + if (PreserveFramePointer) { + mov(rbp, rsp); + } #ifdef TIERED // c2 leaves fpu stack dirty. Clean it on entry if (UseSSE < 2 ) { diff --git a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp index fa7e8c3dd0f..152ad2e0189 100644 --- a/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp +++ b/hotspot/src/cpu/x86/vm/c1_Runtime1_x86.cpp @@ -754,14 +754,9 @@ OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) { // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP // since we do a leave anyway. - // Pop the return address since we are possibly changing SP (restoring from BP). + // Pop the return address. __ leave(); __ pop(rcx); - - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); __ jmp(rcx); // jump to exception handler break; default: ShouldNotReachHere(); @@ -832,11 +827,6 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // the pop is also necessary to simulate the effect of a ret(0) __ pop(exception_pc); - // Restore SP from BP if the exception PC is a method handle call site. - NOT_LP64(__ get_thread(thread);) - __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // continue at exception handler (return address removed) // note: do *not* remove arguments when unwinding the // activation since the caller assumes having diff --git a/hotspot/src/cpu/x86/vm/frame_x86.cpp b/hotspot/src/cpu/x86/vm/frame_x86.cpp index 654f0689ec2..525b13e6844 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.cpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp @@ -224,7 +224,8 @@ bool frame::safe_for_sender(JavaThread *thread) { if (sender_blob->is_nmethod()) { nmethod* nm = sender_blob->as_nmethod_or_null(); if (nm != NULL) { - if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) { + if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || + nm->method()->is_method_handle_intrinsic()) { return false; } } @@ -391,10 +392,9 @@ frame frame::sender_for_entry_frame(RegisterMap* map) const { // frame::verify_deopt_original_pc // // Verifies the calculated original PC of a deoptimization PC for the -// given unextended SP. The unextended SP might also be the saved SP -// for MethodHandle call sites. +// given unextended SP. #ifdef ASSERT -void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) { +void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) { frame fr; // This is ugly but it's better than to change {get,set}_original_pc @@ -404,33 +404,23 @@ void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool address original_pc = nm->get_original_pc(&fr); assert(nm->insts_contains(original_pc), "original PC must be in nmethod"); - assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be"); } #endif //------------------------------------------------------------------------------ // frame::adjust_unextended_sp void frame::adjust_unextended_sp() { - // If we are returning to a compiled MethodHandle call site, the - // saved_fp will in fact be a saved value of the unextended SP. The - // simplest way to tell whether we are returning to such a call site - // is as follows: + // On x86, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null(); if (sender_nm != NULL) { - // If the sender PC is a deoptimization point, get the original - // PC. For MethodHandle call site the unextended_sp is stored in - // saved_fp. - if (sender_nm->is_deopt_mh_entry(_pc)) { - DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp)); - _unextended_sp = _fp; - } - else if (sender_nm->is_deopt_entry(_pc)) { + // If the sender PC is a deoptimization point, get the original PC. + if (sender_nm->is_deopt_entry(_pc) || + sender_nm->is_deopt_mh_entry(_pc)) { DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp)); } - else if (sender_nm->is_method_handle_return(_pc)) { - _unextended_sp = _fp; - } } } diff --git a/hotspot/src/cpu/x86/vm/frame_x86.hpp b/hotspot/src/cpu/x86/vm/frame_x86.hpp index 602b8ff186f..3b7cee89f0e 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.hpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.hpp @@ -76,11 +76,11 @@ // [locals and parameters ] // <- sender sp -// [1] When the c++ interpreter calls a new method it returns to the frame +// [1] When the C++ interpreter calls a new method it returns to the frame // manager which allocates a new frame on the stack. In that case there // is no real callee of this newly allocated frame. The frame manager is -// aware of the additional frame(s) and will pop them as nested calls -// complete. Howevers tTo make it look good in the debugger the frame +// aware of the additional frame(s) and will pop them as nested calls +// complete. However, to make it look good in the debugger the frame // manager actually installs a dummy pc pointing to RecursiveInterpreterActivation // with a fake interpreter_state* parameter to make it easy to debug // nested calls. @@ -88,7 +88,7 @@ // Note that contrary to the layout for the assembly interpreter the // expression stack allocated for the C++ interpreter is full sized. // However this is not as bad as it seems as the interpreter frame_manager -// will truncate the unused space on succesive method calls. +// will truncate the unused space on successive method calls. // // ------------------------------ C++ interpreter ---------------------------------------- @@ -167,10 +167,7 @@ #ifdef ASSERT // Used in frame::sender_for_{interpreter,compiled}_frame - static void verify_deopt_original_pc( nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false); - static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) { - verify_deopt_original_pc(nm, unextended_sp, true); - } + static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp); #endif public: diff --git a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp index 0f1839692d5..7f6a99ad2d6 100644 --- a/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp +++ b/hotspot/src/cpu/x86/vm/frame_x86.inline.hpp @@ -94,7 +94,7 @@ inline frame::frame(intptr_t* sp, intptr_t* fp) { // find_blob call. This is also why we can have no asserts on the validity // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler // -> pd_last_frame should use a specialized version of pd_last_frame which could - // call a specilaized frame constructor instead of this one. + // call a specialized frame constructor instead of this one. // Then we could use the assert below. However this assert is of somewhat dubious // value. // assert(_pc != NULL, "no pc?"); diff --git a/hotspot/src/cpu/x86/vm/globals_x86.hpp b/hotspot/src/cpu/x86/vm/globals_x86.hpp index a6d0fbbb336..61b168bfbbc 100644 --- a/hotspot/src/cpu/x86/vm/globals_x86.hpp +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp @@ -82,6 +82,8 @@ define_pd_global(size_t, CMSYoungGenPerWorker, 64*M); // default max size of CM define_pd_global(uintx, TypeProfileLevel, 111); +define_pd_global(bool, PreserveFramePointer, false); + #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ \ develop(bool, IEEEPrecision, true, \ diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 9663bd78893..2b59daf59d1 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -6090,6 +6090,10 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_ // We always push rbp, so that on return to interpreter rbp, will be // restored correctly and we can correct the stack. push(rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + mov(rbp, rsp); + } // Remove word for ebp framesize -= wordSize; @@ -6104,6 +6108,11 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_ // Save RBP register now. framesize -= wordSize; movptr(Address(rsp, framesize), rbp); + // Save caller's stack pointer into RBP if the frame pointer is preserved. + if (PreserveFramePointer) { + movptr(rbp, rsp); + addptr(rbp, framesize + wordSize); + } } if (VerifyStackAtCalls) { // Majik cookie to verify stack depth diff --git a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp index b3776ed8c3f..65a2f3bf30b 100644 --- a/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp +++ b/hotspot/src/cpu/x86/vm/methodHandles_x86.cpp @@ -374,7 +374,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, // member_reg - MemberName that was the trailing argument // temp1_recv_klass - klass of stacked receiver, if needed // rsi/r13 - interpreter linkage (if interpreted) - // rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled) + // rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled) Label L_incompatible_class_change_error; switch (iid) { diff --git a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp index 1cc10d76622..36457cb8178 100644 --- a/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/runtime_x86_32.cpp @@ -126,10 +126,6 @@ void OptoRuntime::generate_exception_blob() { // rax: exception handler for given - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax, (could be deopt blob) // rdx - throwing pc, deopt blob will need it. diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index afc137b69c2..79849142554 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -3393,8 +3393,8 @@ void OptoRuntime::generate_exception_blob() { // Save callee-saved registers. See x86_64.ad. - // rbp is an implicitly saved callee saved register (i.e. the calling - // convention will save restore it in prolog/epilog) Other than that + // rbp is an implicitly saved callee saved register (i.e., the calling + // convention will save/restore it in the prolog/epilog). Other than that // there are no callee save registers now that adapter frames are gone. __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp); @@ -3436,9 +3436,9 @@ void OptoRuntime::generate_exception_blob() { // Restore callee-saved registers - // rbp is an implicitly saved callee saved register (i.e. the calling + // rbp is an implicitly saved callee-saved register (i.e., the calling // convention will save restore it in prolog/epilog) Other than that - // there are no callee save registers no that adapter frames are gone. + // there are no callee save registers now that adapter frames are gone. __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt)); @@ -3447,10 +3447,6 @@ void OptoRuntime::generate_exception_blob() { // rax: exception handler - // Restore SP from BP if the exception PC is a MethodHandle call site. - __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0); - __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save); - // We have a handler in rax (could be deopt blob). __ mov(r8, rax); diff --git a/hotspot/src/cpu/x86/vm/x86.ad b/hotspot/src/cpu/x86/vm/x86.ad index 68b42b840bd..5c99066206e 100644 --- a/hotspot/src/cpu/x86/vm/x86.ad +++ b/hotspot/src/cpu/x86/vm/x86.ad @@ -930,21 +930,6 @@ static inline jdouble replicate8_imm(int con, int width) { encode %{ - enc_class preserve_SP %{ - debug_only(int off0 = cbuf.insts_size()); - MacroAssembler _masm(&cbuf); - // RBP is preserved across all calls, even compiled calls. - // Use it to preserve RSP in places where the callee might change the SP. - __ movptr(rbp_mh_SP_save, rsp); - debug_only(int off1 = cbuf.insts_size()); - assert(off1 - off0 == preserve_SP_size(), "correct size prediction"); - %} - - enc_class restore_SP %{ - MacroAssembler _masm(&cbuf); - __ movptr(rsp, rbp_mh_SP_save); - %} - enc_class call_epilog %{ if (VerifyStackAtCalls) { // Check that stack depth is unchanged: find majik cookie on stack diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index e8b54389258..709c908c3b4 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -123,50 +123,94 @@ alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ ) // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // +// Class for no registers (empty set). +reg_class no_reg(); + // Class for all registers -reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); +reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); +// Class for all registers (excluding EBP) +reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); +// Dynamic register class that selects at runtime between register classes +// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). +// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; +reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); + // Class for general registers -reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX); -// Class for general registers which may be used for implicit null checks on win95 -// Also safe for use by tailjump. We don't want to allocate in rbp, -reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX); +reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); +// Class for general registers (excluding EBP). +// This register class can be used for implicit null checks on win95. +// It is also safe for use by tailjumps (we don't want to allocate in ebp). +// Used also if the PreserveFramePointer flag is true. +reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); +// Dynamic register class that selects between int_reg and int_reg_no_ebp. +reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of "X" registers reg_class int_x_reg(EBX, ECX, EDX, EAX); + // Class of registers that can appear in an address with no offset. // EBP and ESP require an extra instruction byte for zero offset. // Used in fast-unlock reg_class p_reg(EDX, EDI, ESI, EBX); -// Class for general registers not including ECX -reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX); -// Class for general registers not including EAX + +// Class for general registers excluding ECX +reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); +// Class for general registers excluding ECX (and EBP) +reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); +// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. +reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); + +// Class for general registers excluding EAX reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); -// Class for general registers not including EAX or EBX. -reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP); + +// Class for general registers excluding EAX and EBX. +reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); +// Class for general registers excluding EAX and EBX (and EBP) +reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); +// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. +reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of EAX (for multiply and divide operations) reg_class eax_reg(EAX); + // Class of EBX (for atomic add) reg_class ebx_reg(EBX); + // Class of ECX (for shift and JCXZ operations and cmpLTMask) reg_class ecx_reg(ECX); + // Class of EDX (for multiply and divide operations) reg_class edx_reg(EDX); + // Class of EDI (for synchronization) reg_class edi_reg(EDI); + // Class of ESI (for synchronization) reg_class esi_reg(ESI); -// Singleton class for interpreter's stack pointer -reg_class ebp_reg(EBP); + // Singleton class for stack pointer reg_class sp_reg(ESP); + // Singleton class for instruction pointer // reg_class ip_reg(EIP); + // Class of integer register pairs -reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI ); +reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); +// Class of integer register pairs (excluding EBP and EDI); +reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); +// Dynamic register class that selects between long_reg and long_reg_no_ebp. +reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); + // Class of integer register pairs that aligns with calling convention reg_class eadx_reg( EAX,EDX ); reg_class ebcx_reg( ECX,EBX ); + // Not AX or DX, used in divides -reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP ); +reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); +// Not AX or DX (and neither EBP), used in divides +reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); +// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. +reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); // Floating point registers. Notice FPR0 is not a choice. // FPR0 is not ever allocated; we use clever encodings to fake @@ -240,18 +284,11 @@ static int pre_call_resets_size() { return size; } -static int preserve_SP_size() { - return 2; // op, rm(reg/reg) -} - // !!!!! Special hack to get all type of calls to specify the byte offset // from the start of the call to the point where the return address // will point. int MachCallStaticJavaNode::ret_addr_offset() { - int offset = 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points - if (_method_handle_invoke) - offset += preserve_SP_size(); - return offset; + return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points } int MachCallDynamicJavaNode::ret_addr_offset() { @@ -283,15 +320,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const { return round_to(current_offset, alignment_required()) - current_offset; } -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaHandleNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += preserve_SP_size(); // skip mov rbp, rsp - current_offset += 1; // skip call opcode byte - return round_to(current_offset, alignment_required()) - current_offset; -} - // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { @@ -523,6 +551,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("PUSH EBP\t# Save EBP"); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); + } if (framesize) { st->print("\n\t"); st->print("SUB ESP, #%d\t# Create frame",framesize); @@ -532,6 +564,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("\n\t"); framesize -= wordSize; st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("MOV EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize)); + } } if (VerifyStackAtCalls) { @@ -1489,7 +1525,7 @@ RegMask Matcher::modL_proj_mask() { } const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return EBP_REG_mask(); + return NO_REG_mask(); } // Returns true if the high 32 bits of the value is known to be zero. @@ -3735,7 +3771,7 @@ operand eRegP() %{ // On windows95, EBP is not safe to use for implicit null tests. operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(int_reg_no_rbp)); + constraint(ALLOC_IN_RC(int_reg_no_ebp)); match(RegP); match(eAXRegP); match(eBXRegP); @@ -3824,13 +3860,6 @@ operand eDIRegP(eRegP reg) %{ interface(REG_INTER); %} -operand eBPRegP() %{ - constraint(ALLOC_IN_RC(ebp_reg)); - match(RegP); - format %{ "EBP" %} - interface(REG_INTER); -%} - operand eRegL() %{ constraint(ALLOC_IN_RC(long_reg)); match(RegL); @@ -12615,7 +12644,6 @@ instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, // compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); - predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke()); effect(USE meth); ins_cost(300); @@ -12629,29 +12657,6 @@ instruct CallStaticJavaDirect(method meth) %{ ins_alignment(4); %} -// Call Java Static Instruction (method handle version) -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{ - match(CallStaticJava); - predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke()); - effect(USE meth); - // EBP is saved by all callees (for interpreter stack correction). - // We use it here for a similar purpose, in {preserve,restore}_SP. - - ins_cost(300); - format %{ "CALL,static/MethodHandle " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - preserve_SP, - Java_Static_Call( meth ), - restore_SP, - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - // Call Java Dynamic Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index b32ba52b789..9abe58f5371 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -166,42 +166,67 @@ alloc_class chunk0(R10, R10_H, // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // -// Class for all pointer registers (including RSP) -reg_class any_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - RSP, RSP_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R12, R12_H, - R13, R13_H, - R14, R14_H, - R15, R15_H); +// Empty register class. +reg_class no_reg(); -// Class for all pointer registers except RSP -reg_class ptr_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); +// Class for all pointer registers (including RSP and RBP) +reg_class any_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + RSP, RSP_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H); -// Class for all pointer registers except RAX and RSP -reg_class ptr_no_rax_reg(RDX, RDX_H, - RBP, RBP_H, +// Class for all pointer registers (including RSP, but excluding RBP) +reg_class any_reg_no_rbp(RAX, RAX_H, + RDX, RDX_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + RSP, RSP_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R12, R12_H, + R13, R13_H, + R14, R14_H, + R15, R15_H); + +// Dynamic register class that selects at runtime between register classes +// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). +// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp; +reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all pointer registers (excluding RSP) +reg_class ptr_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RSP and RBP) +reg_class ptr_reg_no_rbp(RAX, RAX_H, + RDX, RDX_H, RDI, RDI_H, RSI, RSI_H, RCX, RCX_H, @@ -213,31 +238,66 @@ reg_class ptr_no_rax_reg(RDX, RDX_H, R13, R13_H, R14, R14_H); -reg_class ptr_no_rbp_reg(RDX, RDX_H, - RAX, RAX_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); +// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp. +reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %}); -// Class for all pointer registers except RAX, RBX and RSP -reg_class ptr_no_rax_rbx_reg(RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); +// Class for all pointer registers (excluding RAX and RSP) +reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RAX, RSP, and RBP) +reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp. +reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all pointer registers (excluding RAX, RBX, and RSP) +reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP) +reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp. +reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %}); // Singleton class for RAX pointer register reg_class ptr_rax_reg(RAX, RAX_H); @@ -251,59 +311,29 @@ reg_class ptr_rsi_reg(RSI, RSI_H); // Singleton class for RDI pointer register reg_class ptr_rdi_reg(RDI, RDI_H); -// Singleton class for RBP pointer register -reg_class ptr_rbp_reg(RBP, RBP_H); - // Singleton class for stack pointer reg_class ptr_rsp_reg(RSP, RSP_H); // Singleton class for TLS pointer reg_class ptr_r15_reg(R15, R15_H); -// Class for all long registers (except RSP) -reg_class long_reg(RAX, RAX_H, - RDX, RDX_H, - RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); +// Class for all long registers (excluding RSP) +reg_class long_reg_with_rbp(RAX, RAX_H, + RDX, RDX_H, + RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); -// Class for all long registers except RAX, RDX (and RSP) -reg_class long_no_rax_rdx_reg(RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RCX, RCX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all long registers except RCX (and RSP) -reg_class long_no_rcx_reg(RBP, RBP_H, - RDI, RDI_H, - RSI, RSI_H, - RAX, RAX_H, - RDX, RDX_H, - RBX, RBX_H, - R8, R8_H, - R9, R9_H, - R10, R10_H, - R11, R11_H, - R13, R13_H, - R14, R14_H); - -// Class for all long registers except RAX (and RSP) -reg_class long_no_rax_reg(RBP, RBP_H, +// Class for all long registers (excluding RSP and RBP) +reg_class long_reg_no_rbp(RAX, RAX_H, RDX, RDX_H, RDI, RDI_H, RSI, RSI_H, @@ -316,6 +346,67 @@ reg_class long_no_rax_reg(RBP, RBP_H, R13, R13_H, R14, R14_H); +// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp. +reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all long registers (excluding RAX, RDX and RSP) +reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all long registers (excluding RAX, RDX, RSP, and RBP) +reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H, + RSI, RSI_H, + RCX, RCX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp. +reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all long registers (excluding RCX and RSP) +reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H, + RDI, RDI_H, + RSI, RSI_H, + RAX, RAX_H, + RDX, RDX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Class for all long registers (excluding RCX, RSP, and RBP) +reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H, + RSI, RSI_H, + RAX, RAX_H, + RDX, RDX_H, + RBX, RBX_H, + R8, R8_H, + R9, R9_H, + R10, R10_H, + R11, R11_H, + R13, R13_H, + R14, R14_H); + +// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp. +reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %}); + // Singleton class for RAX long register reg_class long_rax_reg(RAX, RAX_H); @@ -325,27 +416,27 @@ reg_class long_rcx_reg(RCX, RCX_H); // Singleton class for RDX long register reg_class long_rdx_reg(RDX, RDX_H); -// Class for all int registers (except RSP) -reg_class int_reg(RAX, - RDX, - RBP, - RDI, - RSI, - RCX, - RBX, - R8, - R9, - R10, - R11, - R13, - R14); +// Class for all int registers (excluding RSP) +reg_class int_reg_with_rbp(RAX, + RDX, + RBP, + RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); -// Class for all int registers except RCX (and RSP) -reg_class int_no_rcx_reg(RAX, +// Class for all int registers (excluding RSP and RBP) +reg_class int_reg_no_rbp(RAX, RDX, - RBP, RDI, RSI, + RCX, RBX, R8, R9, @@ -354,18 +445,66 @@ reg_class int_no_rcx_reg(RAX, R13, R14); -// Class for all int registers except RAX, RDX (and RSP) -reg_class int_no_rax_rdx_reg(RBP, - RDI, - RSI, - RCX, - RBX, - R8, - R9, - R10, - R11, - R13, - R14); +// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp. +reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all int registers (excluding RCX and RSP) +reg_class int_no_rcx_reg_with_rbp(RAX, + RDX, + RBP, + RDI, + RSI, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Class for all int registers (excluding RCX, RSP, and RBP) +reg_class int_no_rcx_reg_no_rbp(RAX, + RDX, + RDI, + RSI, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp. +reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %}); + +// Class for all int registers (excluding RAX, RDX, and RSP) +reg_class int_no_rax_rdx_reg_with_rbp(RBP, + RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Class for all int registers (excluding RAX, RDX, RSP, and RBP) +reg_class int_no_rax_rdx_reg_no_rbp(RDI, + RSI, + RCX, + RBX, + R8, + R9, + R10, + R11, + R13, + R14); + +// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp. +reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %}); // Singleton class for RAX int register reg_class int_rax_reg(RAX); @@ -396,9 +535,6 @@ source %{ #define __ _masm. -static int preserve_SP_size() { - return 3; // rex.w, op, rm(reg/reg) -} static int clear_avx_size() { return (Compile::current()->max_vector_size() > 16) ? 3 : 0; // vzeroupper } @@ -409,9 +545,7 @@ static int clear_avx_size() { int MachCallStaticJavaNode::ret_addr_offset() { int offset = 5; // 5 bytes from start of call to where return address points - offset += clear_avx_size(); - if (_method_handle_invoke) - offset += preserve_SP_size(); + offset += clear_avx_size(); return offset; } @@ -448,16 +582,6 @@ int CallStaticJavaDirectNode::compute_padding(int current_offset) const return round_to(current_offset, alignment_required()) - current_offset; } -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaHandleNode::compute_padding(int current_offset) const -{ - current_offset += preserve_SP_size(); // skip mov rbp, rsp - current_offset += clear_avx_size(); // skip vzeroupper - current_offset += 1; // skip call opcode byte - return round_to(current_offset, alignment_required()) - current_offset; -} - // The address of the call instruction needs to be 4-byte aligned to // ensure that it does not span a cache line so that it can be patched. int CallDynamicJavaDirectNode::compute_padding(int current_offset) const @@ -724,6 +848,10 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("# stack bang (%d bytes)", bangsize); st->print("\n\t"); st->print("pushq rbp\t# Save rbp"); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("movq rbp, rsp\t# Save the caller's SP into rbp"); + } if (framesize) { st->print("\n\t"); st->print("subq rsp, #%d\t# Create frame",framesize); @@ -732,7 +860,11 @@ void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { st->print("subq rsp, #%d\t# Create frame",framesize); st->print("\n\t"); framesize -= wordSize; - st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize); + st->print("movq [rsp + #%d], rbp\t# Save rbp",framesize); + if (PreserveFramePointer) { + st->print("\n\t"); + st->print("movq rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize)); + } } if (VerifyStackAtCalls) { @@ -1598,8 +1730,9 @@ RegMask Matcher::modL_proj_mask() { return LONG_RDX_REG_mask(); } +// Register for saving SP into on method handle invokes. Not used on x86_64. const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return PTR_RBP_REG_mask(); + return NO_REG_mask(); } %} @@ -3202,7 +3335,7 @@ operand no_rax_rdx_RegI() // Pointer Register operand any_RegP() %{ - constraint(ALLOC_IN_RC(any_reg)); + constraint(ALLOC_IN_RC(any_reg)); match(RegP); match(rax_RegP); match(rbx_RegP); @@ -3224,8 +3357,8 @@ operand rRegP() match(rbx_RegP); match(rdi_RegP); match(rsi_RegP); - match(rbp_RegP); - match(r15_RegP); // See Q&A below about r15_RegP. + match(rbp_RegP); // See Q&A below about + match(r15_RegP); // r15_RegP and rbp_RegP. format %{ %} interface(REG_INTER); @@ -3241,11 +3374,14 @@ operand rRegN() %{ // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP? // Answer: Operand match rules govern the DFA as it processes instruction inputs. -// It's fine for an instruction input which expects rRegP to match a r15_RegP. +// It's fine for an instruction input that expects rRegP to match a r15_RegP. // The output of an instruction is controlled by the allocator, which respects // register class masks, not match rules. Unless an instruction mentions // r15_RegP or any_RegP explicitly as its output, r15 will not be considered // by the allocator as an input. +// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true, +// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a +// result, RBP is not included in the output of the instruction either. operand no_rax_RegP() %{ @@ -3259,9 +3395,11 @@ operand no_rax_RegP() interface(REG_INTER); %} +// This operand is not allowed to use RBP even if +// RBP is not used to hold the frame pointer. operand no_rbp_RegP() %{ - constraint(ALLOC_IN_RC(ptr_no_rbp_reg)); + constraint(ALLOC_IN_RC(ptr_reg_no_rbp)); match(RegP); match(rbx_RegP); match(rsi_RegP); @@ -3338,16 +3476,6 @@ operand rdi_RegP() interface(REG_INTER); %} -operand rbp_RegP() -%{ - constraint(ALLOC_IN_RC(ptr_rbp_reg)); - match(RegP); - match(rRegP); - - format %{ %} - interface(REG_INTER); -%} - operand r15_RegP() %{ constraint(ALLOC_IN_RC(ptr_r15_reg)); @@ -11410,7 +11538,6 @@ instruct safePoint_poll_far(rFlagsReg cr, rRegP poll) // compute_padding() functions will have to be adjusted. instruct CallStaticJavaDirect(method meth) %{ match(CallStaticJava); - predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke()); effect(USE meth); ins_cost(300); @@ -11421,27 +11548,6 @@ instruct CallStaticJavaDirect(method meth) %{ ins_alignment(4); %} -// Call Java Static Instruction (method handle version) -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{ - match(CallStaticJava); - predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke()); - effect(USE meth); - // RBP is saved by all callees (for interpreter stack correction). - // We use it here for a similar purpose, in {preserve,restore}_SP. - - ins_cost(300); - format %{ "call,static/MethodHandle " %} - opcode(0xE8); /* E8 cd */ - ins_encode(clear_avx, preserve_SP, - Java_Static_Call(meth), - restore_SP, - call_epilog); - ins_pipe(pipe_slow); - ins_alignment(4); -%} - // Call Java Dynamic Instruction // Note: If this code changes, the corresponding ret_addr_offset() and // compute_padding() functions will have to be adjusted. diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp index 8960feb2226..9a7c291b886 100644 --- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp +++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp @@ -4083,7 +4083,7 @@ bool GraphBuilder::try_method_handle_inline(ciMethod* callee) { ValueType* type = apop()->type(); if (type->is_constant()) { ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget(); - // If the target is another method handle invoke try recursivly to get + // If the target is another method handle invoke, try to recursively get // a better target. if (target->is_method_handle_intrinsic()) { if (try_method_handle_inline(target)) { diff --git a/hotspot/src/share/vm/c1/c1_LIR.cpp b/hotspot/src/share/vm/c1/c1_LIR.cpp index d58e3c85b42..7d2b4f3e883 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.cpp +++ b/hotspot/src/share/vm/c1/c1_LIR.cpp @@ -458,7 +458,7 @@ void LIR_OpRTCall::verify() const { //-------------------visits-------------------------- // complete rework of LIR instruction visitor. -// The virtual calls for each instruction type is replaced by a big +// The virtual call for each instruction type is replaced by a big // switch that adds the operands for each instruction void LIR_OpVisitState::visit(LIR_Op* op) { @@ -825,7 +825,8 @@ void LIR_OpVisitState::visit(LIR_Op* op) { } if (opJavaCall->_info) do_info(opJavaCall->_info); - if (opJavaCall->is_method_handle_invoke()) { + if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr && + opJavaCall->is_method_handle_invoke()) { opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr(); do_temp(opJavaCall->_method_handle_invoke_SP_save_opr); } diff --git a/hotspot/src/share/vm/c1/c1_LIR.hpp b/hotspot/src/share/vm/c1/c1_LIR.hpp index 90a47c8b47e..4affbfb0826 100644 --- a/hotspot/src/share/vm/c1/c1_LIR.hpp +++ b/hotspot/src/share/vm/c1/c1_LIR.hpp @@ -1219,10 +1219,8 @@ class LIR_OpJavaCall: public LIR_OpCall { // JSR 292 support. bool is_invokedynamic() const { return code() == lir_dynamic_call; } bool is_method_handle_invoke() const { - return - method()->is_compiled_lambda_form() // Java-generated adapter - || - method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic + return method()->is_compiled_lambda_form() || // Java-generated lambda form + method()->is_method_handle_intrinsic(); // JVM-generated MH intrinsic } intptr_t vtable_offset() const { diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp index 5c8ddd01f8e..cc35fc3ae30 100644 --- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp @@ -2875,7 +2875,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) { // g) lock result registers and emit call operation // // Before issuing a call, we must spill-save all values on stack -// that are in caller-save register. "spill-save" moves thos registers +// that are in caller-save register. "spill-save" moves those registers // either in a free callee-save register or spills them if no free // callee save register is available. // @@ -2883,7 +2883,7 @@ LIRItemList* LIRGenerator::invoke_visit_arguments(Invoke* x) { // - if invoked between e) and f), we may lock callee save // register in "spill-save" that destroys the receiver register // before f) is executed -// - if we rearange the f) to be earlier, by loading %o0, it +// - if we rearrange f) to be earlier (by loading %o0) it // may destroy a value on the stack that is currently in %o0 // and is waiting to be spilled // - if we keep the receiver locked while doing spill-save, @@ -2916,14 +2916,16 @@ void LIRGenerator::do_Invoke(Invoke* x) { assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match"); // JSR 292 - // Preserve the SP over MethodHandle call sites. + // Preserve the SP over MethodHandle call sites, if needed. ciMethod* target = x->target(); bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant? target->is_method_handle_intrinsic() || target->is_compiled_lambda_form()); if (is_method_handle_invoke) { info->set_is_method_handle_invoke(true); - __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr()); + if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) { + __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr()); + } } switch (x->code()) { @@ -2963,8 +2965,9 @@ void LIRGenerator::do_Invoke(Invoke* x) { } // JSR 292 - // Restore the SP after MethodHandle call sites. - if (is_method_handle_invoke) { + // Restore the SP after MethodHandle call sites, if needed. + if (is_method_handle_invoke + && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) { __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer()); } diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp index 757b9717668..fa476d62401 100644 --- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp @@ -631,11 +631,11 @@ InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, J } int max_inline_level_adjust = 0; if (caller_jvms->method() != NULL) { - if (caller_jvms->method()->is_compiled_lambda_form()) + if (caller_jvms->method()->is_compiled_lambda_form()) { max_inline_level_adjust += 1; // don't count actions in MH or indy adapter frames - else if (callee_method->is_method_handle_intrinsic() || - callee_method->is_compiled_lambda_form()) { - max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implem + } else if (callee_method->is_method_handle_intrinsic() || + callee_method->is_compiled_lambda_form()) { + max_inline_level_adjust += 1; // don't count method handle calls from java.lang.invoke implementation } if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) { CompileTask::print_inline_indent(inline_level()); diff --git a/hotspot/src/share/vm/prims/forte.cpp b/hotspot/src/share/vm/prims/forte.cpp index 2ae0e1bba9c..19d715ce36f 100644 --- a/hotspot/src/share/vm/prims/forte.cpp +++ b/hotspot/src/share/vm/prims/forte.cpp @@ -171,8 +171,27 @@ static bool is_decipherable_compiled_frame(JavaThread* thread, frame* fr, nmetho // Now do we have a useful PcDesc? if (pc_desc == NULL || pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) { - // No debug information available for this pc - // vframeStream would explode if we try and walk the frames. + // No debug information is available for this PC. + // + // vframeStreamCommon::fill_from_frame() will decode the frame depending + // on the state of the thread. + // + // Case #1: If the thread is in Java (state == _thread_in_Java), then + // the vframeStreamCommon object will be filled as if the frame were a native + // compiled frame. Therefore, no debug information is needed. + // + // Case #2: If the thread is in any other state, then two steps will be performed: + // - if asserts are enabled, found_bad_method_frame() will be called and + // the assert in found_bad_method_frame() will be triggered; + // - if asserts are disabled, the vframeStreamCommon object will be filled + // as if it were a native compiled frame. + // + // Case (2) is similar to the way interpreter frames are processed in + // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI + // was found for an interpreted frame. If asserts are enabled, the assert + // in found_bad_method_frame() will be triggered. If asserts are disabled, + // the vframeStreamCommon object will be filled afterwards as if the + // interpreter were at the point of entering into the method. return false; } @@ -229,9 +248,10 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread, // a valid method. Then again we may have caught an interpreter // frame in the middle of construction and the bci field is // not yet valid. - - *method_p = method; if (!method->is_valid_method()) return false; + *method_p = method; // If the Method* found is invalid, it is + // ignored by forte_fill_call_trace_given_top(). + // So set method_p only if the Method is valid. address bcp = fr->interpreter_frame_bcp(); int bci = method->validate_bci_from_bcp(bcp); @@ -245,18 +265,33 @@ static bool is_decipherable_interpreted_frame(JavaThread* thread, } -// Determine if 'fr' can be used to find an initial Java frame. -// Return false if it can not find a fully decipherable Java frame -// (in other words a frame that isn't safe to use in a vframe stream). -// Obviously if it can't even find a Java frame false will also be returned. +// Determine if a Java frame can be found starting with the frame 'fr'. // -// If we find a Java frame decipherable or not then by definition we have -// identified a method and that will be returned to the caller via method_p. -// If we can determine a bci that is returned also. (Hmm is it possible -// to return a method and bci and still return false? ) +// Check the return value of find_initial_Java_frame and the value of +// 'method_p' to decide on how use the results returned by this method. // -// The initial Java frame we find (if any) is return via initial_frame_p. +// If 'method_p' is not NULL, an initial Java frame has been found and +// the stack can be walked starting from that initial frame. In this case, +// 'method_p' points to the Method that the initial frame belongs to and +// the initial Java frame is returned in initial_frame_p. // +// find_initial_Java_frame() returns true if a Method has been found (i.e., +// 'method_p' is not NULL) and the initial frame that belongs to that Method +// is decipherable. +// +// A frame is considered to be decipherable: +// +// - if the frame is a compiled frame and a PCDesc is available; +// +// - if the frame is an interpreter frame that is valid or the thread is +// state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked). +// +// Note that find_initial_Java_frame() can return false even if an initial +// Java method was found (e.g., there is no PCDesc available for the method). +// +// If 'method_p' is NULL, it was not possible to find a Java frame when +// walking the stack starting from 'fr'. In this case find_initial_Java_frame +// returns false. static bool find_initial_Java_frame(JavaThread* thread, frame* fr, @@ -276,8 +311,6 @@ static bool find_initial_Java_frame(JavaThread* thread, // recognizable to us. This should only happen if we are in a JRT_LEAF // or something called by a JRT_LEAF method. - - frame candidate = *fr; // If the starting frame we were given has no codeBlob associated with @@ -332,9 +365,11 @@ static bool find_initial_Java_frame(JavaThread* thread, nmethod* nm = (nmethod*) candidate.cb(); *method_p = nm->method(); - // If the frame isn't fully decipherable then the default - // value for the bci is a signal that we don't have a bci. - // If we have a decipherable frame this bci value will + // If the frame is not decipherable, then the value of -1 + // for the BCI is used to signal that no BCI is available. + // Furthermore, the method returns false in this case. + // + // If a decipherable frame is available, the BCI value will // not be used. *bci_p = -1; @@ -345,9 +380,9 @@ static bool find_initial_Java_frame(JavaThread* thread, if (nm->is_native_method()) return true; - // If it isn't decipherable then we have found a pc that doesn't - // have a PCDesc that can get us a bci however we did find - // a method + // If the frame is not decipherable, then a PC was found + // that does not have a PCDesc from which a BCI can be obtained. + // Nevertheless, a Method was found. if (!is_decipherable_compiled_frame(thread, &candidate, nm)) { return false; @@ -356,7 +391,7 @@ static bool find_initial_Java_frame(JavaThread* thread, // is_decipherable_compiled_frame may modify candidate's pc *initial_frame_p = candidate; - assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid"); + assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable"); return true; } @@ -386,17 +421,17 @@ static void forte_fill_call_trace_given_top(JavaThread* thd, frame initial_Java_frame; Method* method; - int bci; + int bci = -1; // assume BCI is not available for method + // update with correct information if available int count; count = 0; assert(trace->frames != NULL, "trace->frames must be non-NULL"); - bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci); - - // The frame might not be walkable but still recovered a method - // (e.g. an nmethod with no scope info for the pc) + // Walk the stack starting from 'top_frame' and search for an initial Java frame. + find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci); + // Check if a Java Method has been found. if (method == NULL) return; if (!method->is_valid_method()) { @@ -404,29 +439,6 @@ static void forte_fill_call_trace_given_top(JavaThread* thd, return; } - // We got a Java frame however it isn't fully decipherable - // so it won't necessarily be safe to use it for the - // initial frame in the vframe stream. - - if (!fully_decipherable) { - // Take whatever method the top-frame decoder managed to scrape up. - // We look further at the top frame only if non-safepoint - // debugging information is available. - count++; - trace->num_frames = count; - trace->frames[0].method_id = method->find_jmethod_id_or_null(); - if (!method->is_native()) { - trace->frames[0].lineno = bci; - } else { - trace->frames[0].lineno = -3; - } - - if (!initial_Java_frame.safe_for_sender(thd)) return; - - RegisterMap map(thd, false); - initial_Java_frame = initial_Java_frame.sender(&map); - } - vframeStreamForte st(thd, initial_Java_frame, false); for (; !st.at_end() && count < depth; st.forte_next(), count++) { diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index c4815a992fe..9f929e99333 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -3918,7 +3918,11 @@ class CommandLineFlags { "Use locked-tracing when doing event-based tracing") \ \ diagnostic(bool, UseUnalignedAccesses, false, \ - "Use unaligned memory accesses in sun.misc.Unsafe") + "Use unaligned memory accesses in sun.misc.Unsafe") \ + \ + product_pd(bool, PreserveFramePointer, \ + "Use the FP register for holding the frame pointer " \ + "and not as a general purpose register.") /* * Macros for factoring of globals diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 04a23aa23c1..5446391648c 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -1179,7 +1179,7 @@ methodHandle SharedRuntime::resolve_sub_helper(JavaThread *thread, #endif // JSR 292 key invariant: - // If the resolved method is a MethodHandle invoke target the call + // If the resolved method is a MethodHandle invoke target, the call // site must be a MethodHandle call site, because the lambda form might tail-call // leaving the stack in a state unknown to either caller or callee // TODO detune for now but we might need it again diff --git a/hotspot/src/share/vm/runtime/vframe.hpp b/hotspot/src/share/vm/runtime/vframe.hpp index 17ead61b782..badd129455b 100644 --- a/hotspot/src/share/vm/runtime/vframe.hpp +++ b/hotspot/src/share/vm/runtime/vframe.hpp @@ -389,12 +389,12 @@ inline void vframeStreamCommon::fill_from_compiled_frame(int decode_offset) { decode_offset < 0 || decode_offset >= nm()->scopes_data_size()) { // 6379830 AsyncGetCallTrace sometimes feeds us wild frames. - // If we attempt to read nmethod::scopes_data at serialized_null (== 0), - // or if we read some at other crazy offset, - // we will decode garbage and make wild references into the heap, - // leading to crashes in product mode. - // (This isn't airtight, of course, since there are internal - // offsets which are also crazy.) + // If we read nmethod::scopes_data at serialized_null (== 0) + // or if read some at other invalid offset, invalid values will be decoded. + // Based on these values, invalid heap locations could be referenced + // that could lead to crashes in product mode. + // Therefore, do not use the decode offset if invalid, but fill the frame + // as it were a native compiled frame (no Java-level assumptions). #ifdef ASSERT if (WizardMode) { tty->print_cr("Error in fill_from_frame: pc_desc for " @@ -514,9 +514,15 @@ inline void vframeStreamCommon::fill_from_interpreter_frame() { address bcp = _frame.interpreter_frame_bcp(); int bci = method->validate_bci_from_bcp(bcp); // 6379830 AsyncGetCallTrace sometimes feeds us wild frames. + // AsyncGetCallTrace interrupts the VM asynchronously. As a result + // it is possible to access an interpreter frame for which + // no Java-level information is yet available (e.g., becasue + // the frame was being created when the VM interrupted it). + // In this scenario, pretend that the interpreter is at the point + // of entering the method. if (bci < 0) { found_bad_method_frame(); - bci = 0; // pretend it's on the point of entering + bci = 0; } _mode = interpreted_mode; _method = method; From cb19c4b63ab4416ed4ee4c551f245331bec96fec Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Thu, 23 Apr 2015 16:38:08 +0200 Subject: [PATCH 11/13] 8078444: compiler/arraycopy/TestArrayCopyNoInitDeopt.java fails with exception 'm2 not deoptimized' Some platform don't have speculative types Reviewed-by: kvn --- .../arraycopy/TestArrayCopyNoInitDeopt.java | 58 ++++++++++--------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java index c72ff09a4c5..8cef75f9606 100644 --- a/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java +++ b/hotspot/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java @@ -116,44 +116,46 @@ public class TestArrayCopyNoInitDeopt { throw new RuntimeException("m1 deoptimized again"); } - // Same test as above but with speculative types + if (WHITE_BOX.getUintxVMFlag("TypeProfileLevel") == 20) { + // Same test as above but with speculative types - // Warm up & make sure we collect type profiling - for (int i = 0; i < 20000; i++) { - m2(src); - } + // Warm up & make sure we collect type profiling + for (int i = 0; i < 20000; i++) { + m2(src); + } - // And make sure m2 is compiled by C2 - WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + // And make sure m2 is compiled by C2 + WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); - if (!WHITE_BOX.isMethodCompiled(method_m2)) { - throw new RuntimeException("m2 not compiled"); - } + if (!WHITE_BOX.isMethodCompiled(method_m2)) { + throw new RuntimeException("m2 not compiled"); + } - // should deoptimize for speculative type check - if (!deoptimize(method_m2, src_obj)) { - throw new RuntimeException("m2 not deoptimized"); - } + // should deoptimize for speculative type check + if (!deoptimize(method_m2, src_obj)) { + throw new RuntimeException("m2 not deoptimized"); + } - WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); - if (!WHITE_BOX.isMethodCompiled(method_m2)) { - throw new RuntimeException("m2 not recompiled"); - } + if (!WHITE_BOX.isMethodCompiled(method_m2)) { + throw new RuntimeException("m2 not recompiled"); + } - // should deoptimize for actual type check - if (!deoptimize(method_m2, src_obj)) { - throw new RuntimeException("m2 not deoptimized"); - } + // should deoptimize for actual type check + if (!deoptimize(method_m2, src_obj)) { + throw new RuntimeException("m2 not deoptimized"); + } - WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); + WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION); - if (!WHITE_BOX.isMethodCompiled(method_m2)) { - throw new RuntimeException("m2 not recompiled"); - } + if (!WHITE_BOX.isMethodCompiled(method_m2)) { + throw new RuntimeException("m2 not recompiled"); + } - if (deoptimize(method_m2, src_obj)) { - throw new RuntimeException("m2 deoptimized again"); + if (deoptimize(method_m2, src_obj)) { + throw new RuntimeException("m2 deoptimized again"); + } } } } From add46c476a4db41f101d28ddb9835d17142806ca Mon Sep 17 00:00:00 2001 From: Severin Gehwolf Date: Wed, 29 Apr 2015 12:23:48 -0700 Subject: [PATCH 12/13] 8078666: JVM fastdebug build compiled with GCC 5 asserts with "widen increases" Do the math on the unsigned type where overflows are well defined Reviewed-by: kvn, aph --- hotspot/src/share/vm/opto/type.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/hotspot/src/share/vm/opto/type.cpp b/hotspot/src/share/vm/opto/type.cpp index dd4d4e88d2d..c21fd8e158c 100644 --- a/hotspot/src/share/vm/opto/type.cpp +++ b/hotspot/src/share/vm/opto/type.cpp @@ -1158,11 +1158,11 @@ static int normalize_int_widen( jint lo, jint hi, int w ) { // Certain normalizations keep us sane when comparing types. // The 'SMALLINT' covers constants and also CC and its relatives. if (lo <= hi) { - if ((juint)(hi - lo) <= SMALLINT) w = Type::WidenMin; - if ((juint)(hi - lo) >= max_juint) w = Type::WidenMax; // TypeInt::INT + if (((juint)hi - lo) <= SMALLINT) w = Type::WidenMin; + if (((juint)hi - lo) >= max_juint) w = Type::WidenMax; // TypeInt::INT } else { - if ((juint)(lo - hi) <= SMALLINT) w = Type::WidenMin; - if ((juint)(lo - hi) >= max_juint) w = Type::WidenMin; // dual TypeInt::INT + if (((juint)lo - hi) <= SMALLINT) w = Type::WidenMin; + if (((juint)lo - hi) >= max_juint) w = Type::WidenMin; // dual TypeInt::INT } return w; } @@ -1416,11 +1416,11 @@ static int normalize_long_widen( jlong lo, jlong hi, int w ) { // Certain normalizations keep us sane when comparing types. // The 'SMALLINT' covers constants. if (lo <= hi) { - if ((julong)(hi - lo) <= SMALLINT) w = Type::WidenMin; - if ((julong)(hi - lo) >= max_julong) w = Type::WidenMax; // TypeLong::LONG + if (((julong)hi - lo) <= SMALLINT) w = Type::WidenMin; + if (((julong)hi - lo) >= max_julong) w = Type::WidenMax; // TypeLong::LONG } else { - if ((julong)(lo - hi) <= SMALLINT) w = Type::WidenMin; - if ((julong)(lo - hi) >= max_julong) w = Type::WidenMin; // dual TypeLong::LONG + if (((julong)lo - hi) <= SMALLINT) w = Type::WidenMin; + if (((julong)lo - hi) >= max_julong) w = Type::WidenMin; // dual TypeLong::LONG } return w; } From 3cdae26a756ea0d73a62ec5ebc2f8ea9222c376b Mon Sep 17 00:00:00 2001 From: Roland Westrelin Date: Wed, 29 Apr 2015 14:43:12 -0700 Subject: [PATCH 13/13] 8078426: mb/jvm/compiler/InterfaceCalls/testAC2 - assert(predicate_proj == 0L) failed: only one predicate entry expected Split if finds predicates on several incoming paths when unswitched's loops are optimized out Reviewed-by: kvn --- hotspot/src/share/vm/opto/ifnode.cpp | 10 ++- hotspot/src/share/vm/opto/loopPredicate.cpp | 2 +- .../TestSplitIfUnswitchedLoopsEliminated.java | 75 +++++++++++++++++++ 3 files changed, 85 insertions(+), 2 deletions(-) create mode 100644 hotspot/test/compiler/loopopts/TestSplitIfUnswitchedLoopsEliminated.java diff --git a/hotspot/src/share/vm/opto/ifnode.cpp b/hotspot/src/share/vm/opto/ifnode.cpp index f9a55edd45a..126b2f48dc7 100644 --- a/hotspot/src/share/vm/opto/ifnode.cpp +++ b/hotspot/src/share/vm/opto/ifnode.cpp @@ -234,16 +234,24 @@ static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) { // Make a region merging constants and a region merging the rest uint req_c = 0; Node* predicate_proj = NULL; + int nb_predicate_proj = 0; for (uint ii = 1; ii < r->req(); ii++) { if (phi->in(ii) == con1) { req_c++; } Node* proj = PhaseIdealLoop::find_predicate(r->in(ii)); if (proj != NULL) { - assert(predicate_proj == NULL, "only one predicate entry expected"); + nb_predicate_proj++; predicate_proj = proj; } } + if (nb_predicate_proj > 1) { + // Can happen in case of loop unswitching and when the loop is + // optimized out: it's not a loop anymore so we don't care about + // predicates. + assert(!r->is_Loop(), "this must not be a loop anymore"); + predicate_proj = NULL; + } Node* predicate_c = NULL; Node* predicate_x = NULL; bool counted_loop = r->is_CountedLoop(); diff --git a/hotspot/src/share/vm/opto/loopPredicate.cpp b/hotspot/src/share/vm/opto/loopPredicate.cpp index 2539ae4d402..d5157b79afb 100644 --- a/hotspot/src/share/vm/opto/loopPredicate.cpp +++ b/hotspot/src/share/vm/opto/loopPredicate.cpp @@ -89,7 +89,7 @@ void PhaseIdealLoop::register_control(Node* n, IdealLoopTree *loop, Node* pred) // // We will create a region to guard the uct call if there is no one there. // The true projecttion (if_cont) of the new_iff is returned. -// This code is also used to clone predicates to clonned loops. +// This code is also used to clone predicates to cloned loops. ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry, Deoptimization::DeoptReason reason) { assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!"); diff --git a/hotspot/test/compiler/loopopts/TestSplitIfUnswitchedLoopsEliminated.java b/hotspot/test/compiler/loopopts/TestSplitIfUnswitchedLoopsEliminated.java new file mode 100644 index 00000000000..78cda884119 --- /dev/null +++ b/hotspot/test/compiler/loopopts/TestSplitIfUnswitchedLoopsEliminated.java @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +/** + * @test + * @bug 8078426 + * @summary split if finds predicates on several incoming paths when unswitched's loops are optimized out + * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-UseCompressedOops TestSplitIfUnswitchedLoopsEliminated + * + */ + + +public class TestSplitIfUnswitchedLoopsEliminated { + + static class A { + int f; + } + + static A aa = new A(); + static A aaa = new A(); + + static int test_helper(int stop, boolean unswitch) { + A a = null; + for (int i = 3; i < 10; i++) { + if (unswitch) { + a = null; + } else { + a = aa; + int v = a.f; + } + } + if (stop != 4) { + a = aaa; + } + if (a != null) { + return a.f; + } + return 0; + } + + static int test(boolean unswitch) { + int stop = 1; + for (; stop < 3; stop *= 4) { + } + return test_helper(stop, unswitch); + } + + public static void main(String[] args) { + for (int i = 0; i < 20000; i++) { + test_helper(10, i%2 == 0); + test(i%2 == 0); + } + } +}