diff --git a/hotspot/src/share/vm/ci/ciMethod.cpp b/hotspot/src/share/vm/ci/ciMethod.cpp index d32df16bcbb..4769254e9b6 100644 --- a/hotspot/src/share/vm/ci/ciMethod.cpp +++ b/hotspot/src/share/vm/ci/ciMethod.cpp @@ -70,7 +70,8 @@ // Loaded method. ciMethod::ciMethod(methodHandle h_m, ciInstanceKlass* holder) : ciMetadata(h_m()), - _holder(holder) + _holder(holder), + _has_injected_profile(false) { assert(h_m() != NULL, "no null method"); @@ -168,7 +169,8 @@ ciMethod::ciMethod(ciInstanceKlass* holder, _liveness( NULL), _can_be_statically_bound(false), _method_blocks( NULL), - _method_data( NULL) + _method_data( NULL), + _has_injected_profile( false) #if defined(COMPILER2) || defined(SHARK) , _flow( NULL), diff --git a/hotspot/src/share/vm/ci/ciMethod.hpp b/hotspot/src/share/vm/ci/ciMethod.hpp index 5a46fc483c9..76662089bd8 100644 --- a/hotspot/src/share/vm/ci/ciMethod.hpp +++ b/hotspot/src/share/vm/ci/ciMethod.hpp @@ -79,6 +79,7 @@ class ciMethod : public ciMetadata { bool _is_c1_compilable; bool _is_c2_compilable; bool _can_be_statically_bound; + bool _has_injected_profile; // Lazy fields, filled in on demand address _code; @@ -286,6 +287,9 @@ class ciMethod : public ciMetadata { int instructions_size(); int scale_count(int count, float prof_factor = 1.); // make MDO count commensurate with IIC + bool has_injected_profile() const { return _has_injected_profile; } + void set_injected_profile(bool x) { _has_injected_profile = x; } + // Stack walking support bool is_ignored_by_security_stack_walk() const; diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index 6a5f1508076..94150a96cea 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -243,7 +243,6 @@ template(returnType_name, "returnType") \ template(signature_name, "signature") \ template(slot_name, "slot") \ - template(selectAlternative_name, "selectAlternative") \ \ /* Support for annotations (JDK 1.5 and above) */ \ \ @@ -295,8 +294,7 @@ template(setTarget_signature, "(Ljava/lang/invoke/MethodHandle;)V") \ NOT_LP64( do_alias(intptr_signature, int_signature) ) \ LP64_ONLY( do_alias(intptr_signature, long_signature) ) \ - template(selectAlternative_signature, "(ZLjava/lang/invoke/MethodHandle;Ljava/lang/invoke/MethodHandle;)Ljava/lang/invoke/MethodHandle;") \ - \ + \ /* common method and field names */ \ template(object_initializer_name, "") \ template(class_initializer_name, "") \ @@ -868,6 +866,12 @@ do_name( fullFence_name, "fullFence") \ do_alias( fullFence_signature, void_method_signature) \ \ + /* Custom branch frequencies profiling support for JSR292 */ \ + do_class(java_lang_invoke_MethodHandleImpl, "java/lang/invoke/MethodHandleImpl") \ + do_intrinsic(_profileBoolean, java_lang_invoke_MethodHandleImpl, profileBoolean_name, profileBoolean_signature, F_S) \ + do_name( profileBoolean_name, "profileBoolean") \ + do_signature(profileBoolean_signature, "(Z[I)Z") \ + \ /* unsafe memory references (there are a lot of them...) */ \ do_signature(getObject_signature, "(Ljava/lang/Object;J)Ljava/lang/Object;") \ do_signature(putObject_signature, "(Ljava/lang/Object;JLjava/lang/Object;)V") \ diff --git a/hotspot/src/share/vm/code/nmethod.cpp b/hotspot/src/share/vm/code/nmethod.cpp index 835b3dc831d..0e60a131ae8 100644 --- a/hotspot/src/share/vm/code/nmethod.cpp +++ b/hotspot/src/share/vm/code/nmethod.cpp @@ -2310,17 +2310,6 @@ void nmethod::preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map #endif // !SHARK } - -oop nmethod::embeddedOop_at(u_char* p) { - RelocIterator iter(this, p, p + 1); - while (iter.next()) - if (iter.type() == relocInfo::oop_type) { - return iter.oop_reloc()->oop_value(); - } - return NULL; -} - - inline bool includes(void* p, void* from, void* to) { return from <= p && p < to; } diff --git a/hotspot/src/share/vm/code/nmethod.hpp b/hotspot/src/share/vm/code/nmethod.hpp index c95679649fd..5d4173738d9 100644 --- a/hotspot/src/share/vm/code/nmethod.hpp +++ b/hotspot/src/share/vm/code/nmethod.hpp @@ -730,11 +730,6 @@ public: int compile_id() const { return _compile_id; } const char* compile_kind() const; - // For debugging - // CompiledIC* IC_at(char* p) const; - // PrimitiveIC* primitiveIC_at(char* p) const; - oop embeddedOop_at(address p); - // tells if any of this method's dependencies have been invalidated // (this is expensive!) static void check_all_dependencies(DepChange& changes); diff --git a/hotspot/src/share/vm/compiler/disassembler.cpp b/hotspot/src/share/vm/compiler/disassembler.cpp index e4f6d3f5040..eb99f050f87 100644 --- a/hotspot/src/share/vm/compiler/disassembler.cpp +++ b/hotspot/src/share/vm/compiler/disassembler.cpp @@ -345,21 +345,6 @@ void decode_env::print_address(address adr) { if (WizardMode) st->print(" " INTPTR_FORMAT, (intptr_t)adr); return; } - - oop obj; - if (_nm != NULL - && (obj = _nm->embeddedOop_at(cur_insn())) != NULL - && (address) obj == adr - && Universe::heap()->is_in(obj) - && Universe::heap()->is_in(obj->klass())) { - julong c = st->count(); - obj->print_value_on(st); - if (st->count() == c) { - // No output. (Can happen in product builds.) - st->print("(a %s)", obj->klass()->external_name()); - } - return; - } } // Fall through to a simple (hexadecimal) numeral. diff --git a/hotspot/src/share/vm/opto/classes.hpp b/hotspot/src/share/vm/opto/classes.hpp index 238e3491862..0638cdee3bb 100644 --- a/hotspot/src/share/vm/opto/classes.hpp +++ b/hotspot/src/share/vm/opto/classes.hpp @@ -200,6 +200,7 @@ macro(NeverBranch) macro(Opaque1) macro(Opaque2) macro(Opaque3) +macro(ProfileBoolean) macro(OrI) macro(OrL) macro(OverflowAddI) diff --git a/hotspot/src/share/vm/opto/compile.cpp b/hotspot/src/share/vm/opto/compile.cpp index 23253651dca..b2a29982e0f 100644 --- a/hotspot/src/share/vm/opto/compile.cpp +++ b/hotspot/src/share/vm/opto/compile.cpp @@ -3105,6 +3105,7 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) { default: assert( !n->is_Call(), "" ); assert( !n->is_Mem(), "" ); + assert( nop != Op_ProfileBoolean, "should be eliminated during IGVN"); break; } @@ -3321,6 +3322,9 @@ bool Compile::final_graph_reshaping() { bool Compile::too_many_traps(ciMethod* method, int bci, Deoptimization::DeoptReason reason) { + if (method->has_injected_profile()) { + return false; + } ciMethodData* md = method->method_data(); if (md->is_empty()) { // Assume the trap has not occurred, or that it occurred only @@ -3370,6 +3374,9 @@ bool Compile::too_many_traps(Deoptimization::DeoptReason reason, bool Compile::too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason) { + if (method->has_injected_profile()) { + return false; + } ciMethodData* md = method->method_data(); if (md->is_empty()) { // Assume the trap has not occurred, or that it occurred only diff --git a/hotspot/src/share/vm/opto/graphKit.cpp b/hotspot/src/share/vm/opto/graphKit.cpp index 20932a5bd51..ccde0152a87 100644 --- a/hotspot/src/share/vm/opto/graphKit.cpp +++ b/hotspot/src/share/vm/opto/graphKit.cpp @@ -1989,6 +1989,11 @@ void GraphKit::uncommon_trap(int trap_request, Deoptimization::trap_request_index(trap_request) < 0 && too_many_recompiles(reason)) { // This BCI is causing too many recompilations. + if (C->log() != NULL) { + C->log()->elem("observe that='trap_action_change' reason='%s' from='%s' to='none'", + Deoptimization::trap_reason_name(reason), + Deoptimization::trap_action_name(action)); + } action = Deoptimization::Action_none; trap_request = Deoptimization::make_trap_request(reason, action); } else { @@ -2763,7 +2768,7 @@ Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj, Deoptimization::DeoptReason reason = Deoptimization::reason_class_check(spec_klass != NULL); // Make sure we haven't already deoptimized from this tactic. - if (too_many_traps(reason)) + if (too_many_traps(reason) || too_many_recompiles(reason)) return NULL; // (No, this isn't a call, but it's enough like a virtual call @@ -2785,8 +2790,7 @@ Node* GraphKit::maybe_cast_profiled_receiver(Node* not_null_obj, &exact_obj); { PreserveJVMState pjvms(this); set_control(slow_ctl); - uncommon_trap(reason, - Deoptimization::Action_maybe_recompile); + uncommon_trap_exact(reason, Deoptimization::Action_maybe_recompile); } if (safe_for_replace) { replace_in_map(not_null_obj, exact_obj); @@ -2815,8 +2819,8 @@ Node* GraphKit::maybe_cast_profiled_obj(Node* obj, if (type != NULL) { Deoptimization::DeoptReason class_reason = Deoptimization::Reason_speculate_class_check; Deoptimization::DeoptReason null_reason = Deoptimization::Reason_speculate_null_check; - if (!too_many_traps(null_reason) && - !too_many_traps(class_reason)) { + if (!too_many_traps(null_reason) && !too_many_recompiles(null_reason) && + !too_many_traps(class_reason) && !too_many_recompiles(class_reason)) { Node* not_null_obj = NULL; // not_null is true if we know the object is not null and // there's no need for a null check @@ -2836,19 +2840,18 @@ Node* GraphKit::maybe_cast_profiled_obj(Node* obj, GraphKit kit(sfpt->jvms()); PreserveJVMState pjvms(&kit); kit.set_control(slow_ctl); - kit.uncommon_trap(class_reason, - Deoptimization::Action_maybe_recompile); + kit.uncommon_trap_exact(class_reason, Deoptimization::Action_maybe_recompile); } else { PreserveJVMState pjvms(this); set_control(slow_ctl); - uncommon_trap(class_reason, - Deoptimization::Action_maybe_recompile); + uncommon_trap_exact(class_reason, Deoptimization::Action_maybe_recompile); } replace_in_map(not_null_obj, exact_obj); obj = exact_obj; } } else { - if (!too_many_traps(Deoptimization::Reason_null_assert)) { + if (!too_many_traps(Deoptimization::Reason_null_assert) && + !too_many_recompiles(Deoptimization::Reason_null_assert)) { Node* exact_obj = null_assert(obj); replace_in_map(obj, exact_obj); obj = exact_obj; diff --git a/hotspot/src/share/vm/opto/graphKit.hpp b/hotspot/src/share/vm/opto/graphKit.hpp index 22699420cbb..d608537f90c 100644 --- a/hotspot/src/share/vm/opto/graphKit.hpp +++ b/hotspot/src/share/vm/opto/graphKit.hpp @@ -714,6 +714,15 @@ class GraphKit : public Phase { klass, reason_string, must_throw, keep_exact_action); } + // Bail out to the interpreter and keep exact action (avoid switching to Action_none). + void uncommon_trap_exact(Deoptimization::DeoptReason reason, + Deoptimization::DeoptAction action, + ciKlass* klass = NULL, const char* reason_string = NULL, + bool must_throw = false) { + uncommon_trap(Deoptimization::make_trap_request(reason, action), + klass, reason_string, must_throw, /*keep_exact_action=*/true); + } + // SP when bytecode needs to be reexecuted. virtual int reexecute_sp() { return sp(); } diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index 45eeb9672ff..384f8b491c2 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -41,6 +41,7 @@ #include "opto/movenode.hpp" #include "opto/mulnode.hpp" #include "opto/narrowptrnode.hpp" +#include "opto/opaquenode.hpp" #include "opto/parse.hpp" #include "opto/runtime.hpp" #include "opto/subnode.hpp" @@ -287,6 +288,8 @@ class LibraryCallKit : public GraphKit { bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); bool inline_multiplyToLen(); + + bool inline_profileBoolean(); }; @@ -900,6 +903,9 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_updateByteBufferCRC32: return inline_updateByteBufferCRC32(); + case vmIntrinsics::_profileBoolean: + return inline_profileBoolean(); + default: // If you get here, it may be that someone has added a new intrinsic // to the list in vmSymbols.hpp without implementing it here. @@ -5867,3 +5873,47 @@ Node* LibraryCallKit::inline_digestBase_implCompressMB_predicate(int predicate) return instof_false; // even if it is NULL } + +bool LibraryCallKit::inline_profileBoolean() { + Node* counts = argument(1); + const TypeAryPtr* ary = NULL; + ciArray* aobj = NULL; + if (counts->is_Con() + && (ary = counts->bottom_type()->isa_aryptr()) != NULL + && (aobj = ary->const_oop()->as_array()) != NULL + && (aobj->length() == 2)) { + // Profile is int[2] where [0] and [1] correspond to false and true value occurrences respectively. + jint false_cnt = aobj->element_value(0).as_int(); + jint true_cnt = aobj->element_value(1).as_int(); + + method()->set_injected_profile(true); + + if (C->log() != NULL) { + C->log()->elem("observe source='profileBoolean' false='%d' true='%d'", + false_cnt, true_cnt); + } + + if (false_cnt + true_cnt == 0) { + // According to profile, never executed. + uncommon_trap_exact(Deoptimization::Reason_intrinsic, + Deoptimization::Action_reinterpret); + return true; + } + // Stop profiling. + // MethodHandleImpl::profileBoolean() has profiling logic in it's bytecode. + // By replacing method's body with profile data (represented as ProfileBooleanNode + // on IR level) we effectively disable profiling. + // It enables full speed execution once optimized code is generated. + Node* profile = _gvn.transform(new ProfileBooleanNode(argument(0), false_cnt, true_cnt)); + C->record_for_igvn(profile); + set_result(profile); + return true; + } else { + // Continue profiling. + // Profile data isn't available at the moment. So, execute method's bytecode version. + // Usually, when GWT LambdaForms are profiled it means that a stand-alone nmethod + // is compiled and counters aren't available since corresponding MethodHandle + // isn't a compile-time constant. + return false; + } +} diff --git a/hotspot/src/share/vm/opto/opaquenode.cpp b/hotspot/src/share/vm/opto/opaquenode.cpp index b2cd073b131..cc0bdebc79e 100644 --- a/hotspot/src/share/vm/opto/opaquenode.cpp +++ b/hotspot/src/share/vm/opto/opaquenode.cpp @@ -60,4 +60,27 @@ uint Opaque2Node::cmp( const Node &n ) const { return (&n == this); // Always fail except on self } +//============================================================================= +uint ProfileBooleanNode::hash() const { return NO_HASH; } +uint ProfileBooleanNode::cmp( const Node &n ) const { + return (&n == this); +} + +Node *ProfileBooleanNode::Ideal(PhaseGVN *phase, bool can_reshape) { + if (can_reshape && _delay_removal) { + _delay_removal = false; + return this; + } else { + return NULL; + } +} + +Node *ProfileBooleanNode::Identity( PhaseTransform *phase ) { + if (_delay_removal) { + return this; + } else { + assert(_consumed, "profile should be consumed before elimination"); + return in(1); + } +} diff --git a/hotspot/src/share/vm/opto/opaquenode.hpp b/hotspot/src/share/vm/opto/opaquenode.hpp index 2f92a4a37aa..ba298864486 100644 --- a/hotspot/src/share/vm/opto/opaquenode.hpp +++ b/hotspot/src/share/vm/opto/opaquenode.hpp @@ -87,5 +87,31 @@ class Opaque3Node : public Opaque2Node { bool rtm_opt() const { return (_opt == RTM_OPT); } }; +//------------------------------ProfileBooleanNode------------------------------- +// A node represents value profile for a boolean during parsing. +// Once parsing is over, the node goes away (during IGVN). +// It is used to override branch frequencies from MDO (see has_injected_profile in parse2.cpp). +class ProfileBooleanNode : public Node { + uint _false_cnt; + uint _true_cnt; + bool _consumed; + bool _delay_removal; + virtual uint hash() const ; // { return NO_HASH; } + virtual uint cmp( const Node &n ) const; + public: + ProfileBooleanNode(Node *n, uint false_cnt, uint true_cnt) : Node(0, n), + _false_cnt(false_cnt), _true_cnt(true_cnt), _delay_removal(true), _consumed(false) {} + + uint false_count() const { return _false_cnt; } + uint true_count() const { return _true_cnt; } + + void consume() { _consumed = true; } + + virtual int Opcode() const; + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); + virtual Node *Identity(PhaseTransform *phase); + virtual const Type *bottom_type() const { return TypeInt::BOOL; } +}; + #endif // SHARE_VM_OPTO_OPAQUENODE_HPP diff --git a/hotspot/src/share/vm/opto/parse.hpp b/hotspot/src/share/vm/opto/parse.hpp index ab05a633ae1..97bfac6d169 100644 --- a/hotspot/src/share/vm/opto/parse.hpp +++ b/hotspot/src/share/vm/opto/parse.hpp @@ -555,8 +555,8 @@ class Parse : public GraphKit { void do_jsr(); void do_ret(); - float dynamic_branch_prediction(float &cnt); - float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci); + float dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test); + float branch_prediction(float &cnt, BoolTest::mask btest, int target_bci, Node* test); bool seems_never_taken(float prob) const; bool path_is_suitable_for_uncommon_trap(float prob) const; bool seems_stable_comparison() const; diff --git a/hotspot/src/share/vm/opto/parse2.cpp b/hotspot/src/share/vm/opto/parse2.cpp index c99ac9659d1..237bf7625ad 100644 --- a/hotspot/src/share/vm/opto/parse2.cpp +++ b/hotspot/src/share/vm/opto/parse2.cpp @@ -37,6 +37,7 @@ #include "opto/matcher.hpp" #include "opto/memnode.hpp" #include "opto/mulnode.hpp" +#include "opto/opaquenode.hpp" #include "opto/parse.hpp" #include "opto/runtime.hpp" #include "runtime/deoptimization.hpp" @@ -763,35 +764,64 @@ void Parse::do_ret() { merge_common(target, pnum); } +static bool has_injected_profile(BoolTest::mask btest, Node* test, int& taken, int& not_taken) { + if (btest != BoolTest::eq && btest != BoolTest::ne) { + // Only ::eq and ::ne are supported for profile injection. + return false; + } + if (test->is_Cmp() && + test->in(1)->Opcode() == Op_ProfileBoolean) { + ProfileBooleanNode* profile = (ProfileBooleanNode*)test->in(1); + int false_cnt = profile->false_count(); + int true_cnt = profile->true_count(); + + // Counts matching depends on the actual test operation (::eq or ::ne). + // No need to scale the counts because profile injection was designed + // to feed exact counts into VM. + taken = (btest == BoolTest::eq) ? false_cnt : true_cnt; + not_taken = (btest == BoolTest::eq) ? true_cnt : false_cnt; + + profile->consume(); + return true; + } + return false; +} //--------------------------dynamic_branch_prediction-------------------------- // Try to gather dynamic branch prediction behavior. Return a probability // of the branch being taken and set the "cnt" field. Returns a -1.0 // if we need to use static prediction for some reason. -float Parse::dynamic_branch_prediction(float &cnt) { +float Parse::dynamic_branch_prediction(float &cnt, BoolTest::mask btest, Node* test) { ResourceMark rm; cnt = COUNT_UNKNOWN; - // Use MethodData information if it is available - // FIXME: free the ProfileData structure - ciMethodData* methodData = method()->method_data(); - if (!methodData->is_mature()) return PROB_UNKNOWN; - ciProfileData* data = methodData->bci_to_data(bci()); - if (!data->is_JumpData()) return PROB_UNKNOWN; - - // get taken and not taken values - int taken = data->as_JumpData()->taken(); + int taken = 0; int not_taken = 0; - if (data->is_BranchData()) { - not_taken = data->as_BranchData()->not_taken(); + + bool use_mdo = !has_injected_profile(btest, test, taken, not_taken); + + if (use_mdo) { + // Use MethodData information if it is available + // FIXME: free the ProfileData structure + ciMethodData* methodData = method()->method_data(); + if (!methodData->is_mature()) return PROB_UNKNOWN; + ciProfileData* data = methodData->bci_to_data(bci()); + if (!data->is_JumpData()) return PROB_UNKNOWN; + + // get taken and not taken values + taken = data->as_JumpData()->taken(); + not_taken = 0; + if (data->is_BranchData()) { + not_taken = data->as_BranchData()->not_taken(); + } + + // scale the counts to be commensurate with invocation counts: + taken = method()->scale_count(taken); + not_taken = method()->scale_count(not_taken); } - // scale the counts to be commensurate with invocation counts: - taken = method()->scale_count(taken); - not_taken = method()->scale_count(not_taken); - // Give up if too few (or too many, in which case the sum will overflow) counts to be meaningful. - // We also check that individual counters are positive first, overwise the sum can become positive. + // We also check that individual counters are positive first, otherwise the sum can become positive. if (taken < 0 || not_taken < 0 || taken + not_taken < 40) { if (C->log() != NULL) { C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken); @@ -841,8 +871,9 @@ float Parse::dynamic_branch_prediction(float &cnt) { //-----------------------------branch_prediction------------------------------- float Parse::branch_prediction(float& cnt, BoolTest::mask btest, - int target_bci) { - float prob = dynamic_branch_prediction(cnt); + int target_bci, + Node* test) { + float prob = dynamic_branch_prediction(cnt, btest, test); // If prob is unknown, switch to static prediction if (prob != PROB_UNKNOWN) return prob; @@ -932,7 +963,7 @@ void Parse::do_ifnull(BoolTest::mask btest, Node *c) { Block* next_block = successor_for_bci(iter().next_bci()); float cnt; - float prob = branch_prediction(cnt, btest, target_bci); + float prob = branch_prediction(cnt, btest, target_bci, c); if (prob == PROB_UNKNOWN) { // (An earlier version of do_ifnull omitted this trap for OSR methods.) #ifndef PRODUCT @@ -1013,7 +1044,7 @@ void Parse::do_if(BoolTest::mask btest, Node* c) { Block* next_block = successor_for_bci(iter().next_bci()); float cnt; - float prob = branch_prediction(cnt, btest, target_bci); + float prob = branch_prediction(cnt, btest, target_bci, c); float untaken_prob = 1.0 - prob; if (prob == PROB_UNKNOWN) {