8202711: Merge tiered compilation policies

Reviewed-by: neliasso, kvn
2018-05-09 09:39:25 +02:00 · 2018-05-09 09:39:25 +02:00 · 7101b28dc3
commit 7101b28dc3
parent a322e0f0ff
11 changed files with 647 additions and 1054 deletions
--- a/src/hotspot/share/runtime/advancedThresholdPolicy.cpp
+++ b/src/hotspot/share/runtime/advancedThresholdPolicy.cpp
@ -1,667 +0,0 @@
-/*
- * Copyright (c) 2010, 2018, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "code/codeCache.hpp"
-#include "runtime/advancedThresholdPolicy.hpp"
-#include "runtime/handles.inline.hpp"
-#include "runtime/simpleThresholdPolicy.inline.hpp"
-#if INCLUDE_JVMCI
-#include "jvmci/jvmciRuntime.hpp"
-#endif
-
-#ifdef TIERED
-// Print an event.
-void AdvancedThresholdPolicy::print_specific(EventType type, const methodHandle& mh, const methodHandle& imh,
-                                             int bci, CompLevel level) {
-  tty->print(" rate=");
-  if (mh->prev_time() == 0) tty->print("n/a");
-  else tty->print("%f", mh->rate());
-
-  tty->print(" k=%.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback),
-                               threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback));
-
-}
-
-void AdvancedThresholdPolicy::initialize() {
-  int count = CICompilerCount;
-#ifdef _LP64
-  // Turn on ergonomic compiler count selection
-  if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
-    FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
-  }
-  if (CICompilerCountPerCPU) {
-    // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
-    int log_cpu = log2_intptr(os::active_processor_count());
-    int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
-    count = MAX2(log_cpu * loglog_cpu * 3 / 2, 2);
-    FLAG_SET_ERGO(intx, CICompilerCount, count);
-  }
-#else
-  // On 32-bit systems, the number of compiler threads is limited to 3.
-  // On these systems, the virtual address space available to the JVM
-  // is usually limited to 2-4 GB (the exact value depends on the platform).
-  // As the compilers (especially C2) can consume a large amount of
-  // memory, scaling the number of compiler threads with the number of
-  // available cores can result in the exhaustion of the address space
-  /// available to the VM and thus cause the VM to crash.
-  if (FLAG_IS_DEFAULT(CICompilerCount)) {
-    count = 3;
-    FLAG_SET_ERGO(intx, CICompilerCount, count);
-  }
-#endif
-
-  if (TieredStopAtLevel < CompLevel_full_optimization) {
-    // No C2 compiler thread required
-    set_c1_count(count);
-  } else {
-    set_c1_count(MAX2(count / 3, 1));
-    set_c2_count(MAX2(count - c1_count(), 1));
-  }
-  assert(count == c1_count() + c2_count(), "inconsistent compiler thread count");
-
-  // Some inlining tuning
-#ifdef X86
-  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
-    FLAG_SET_DEFAULT(InlineSmallCode, 2000);
-  }
-#endif
-
-#if defined SPARC || defined AARCH64
-  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
-    FLAG_SET_DEFAULT(InlineSmallCode, 2500);
-  }
-#endif
-
-  set_increase_threshold_at_ratio();
-  set_start_time(os::javaTimeMillis());
-}
-
-// update_rate() is called from select_task() while holding a compile queue lock.
-void AdvancedThresholdPolicy::update_rate(jlong t, Method* m) {
-  // Skip update if counters are absent.
-  // Can't allocate them since we are holding compile queue lock.
-  if (m->method_counters() == NULL)  return;
-
-  if (is_old(m)) {
-    // We don't remove old methods from the queue,
-    // so we can just zero the rate.
-    m->set_rate(0);
-    return;
-  }
-
-  // We don't update the rate if we've just came out of a safepoint.
-  // delta_s is the time since last safepoint in milliseconds.
-  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
-  jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement
-  // How many events were there since the last time?
-  int event_count = m->invocation_count() + m->backedge_count();
-  int delta_e = event_count - m->prev_event_count();
-
-  // We should be running for at least 1ms.
-  if (delta_s >= TieredRateUpdateMinTime) {
-    // And we must've taken the previous point at least 1ms before.
-    if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) {
-      m->set_prev_time(t);
-      m->set_prev_event_count(event_count);
-      m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond
-    } else {
-      if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) {
-        // If nothing happened for 25ms, zero the rate. Don't modify prev values.
-        m->set_rate(0);
-      }
-    }
-  }
-}
-
-// Check if this method has been stale from a given number of milliseconds.
-// See select_task().
-bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, Method* m) {
-  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
-  jlong delta_t = t - m->prev_time();
-  if (delta_t > timeout && delta_s > timeout) {
-    int event_count = m->invocation_count() + m->backedge_count();
-    int delta_e = event_count - m->prev_event_count();
-    // Return true if there were no events.
-    return delta_e == 0;
-  }
-  return false;
-}
-
-// We don't remove old methods from the compile queue even if they have
-// very low activity. See select_task().
-bool AdvancedThresholdPolicy::is_old(Method* method) {
-  return method->invocation_count() > 50000 || method->backedge_count() > 500000;
-}
-
-double AdvancedThresholdPolicy::weight(Method* method) {
-  return (double)(method->rate() + 1) *
-    (method->invocation_count() + 1) * (method->backedge_count() + 1);
-}
-
-// Apply heuristics and return true if x should be compiled before y
-bool AdvancedThresholdPolicy::compare_methods(Method* x, Method* y) {
-  if (x->highest_comp_level() > y->highest_comp_level()) {
-    // recompilation after deopt
-    return true;
-  } else
-    if (x->highest_comp_level() == y->highest_comp_level()) {
-      if (weight(x) > weight(y)) {
-        return true;
-      }
-    }
-  return false;
-}
-
-// Is method profiled enough?
-bool AdvancedThresholdPolicy::is_method_profiled(Method* method) {
-  MethodData* mdo = method->method_data();
-  if (mdo != NULL) {
-    int i = mdo->invocation_count_delta();
-    int b = mdo->backedge_count_delta();
-    return call_predicate_helper<CompLevel_full_profile>(i, b, 1, method);
-  }
-  return false;
-}
-
-// Called with the queue locked and with at least one element
-CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
-  CompileTask *max_blocking_task = NULL;
-  CompileTask *max_task = NULL;
-  Method* max_method = NULL;
-  jlong t = os::javaTimeMillis();
-  // Iterate through the queue and find a method with a maximum rate.
-  for (CompileTask* task = compile_queue->first(); task != NULL;) {
-    CompileTask* next_task = task->next();
-    Method* method = task->method();
-    update_rate(t, method);
-    if (max_task == NULL) {
-      max_task = task;
-      max_method = method;
-    } else {
-      // If a method has been stale for some time, remove it from the queue.
-      // Blocking tasks and tasks submitted from whitebox API don't become stale
-      if (task->can_become_stale() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
-        if (PrintTieredEvents) {
-          print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
-        }
-        compile_queue->remove_and_mark_stale(task);
-        method->clear_queued_for_compilation();
-        task = next_task;
-        continue;
-      }
-
-      // Select a method with a higher rate
-      if (compare_methods(method, max_method)) {
-        max_task = task;
-        max_method = method;
-      }
-    }
-
-    if (task->is_blocking()) {
-      if (max_blocking_task == NULL || compare_methods(method, max_blocking_task->method())) {
-        max_blocking_task = task;
-      }
-    }
-
-    task = next_task;
-  }
-
-  if (max_blocking_task != NULL) {
-    // In blocking compilation mode, the CompileBroker will make
-    // compilations submitted by a JVMCI compiler thread non-blocking. These
-    // compilations should be scheduled after all blocking compilations
-    // to service non-compiler related compilations sooner and reduce the
-    // chance of such compilations timing out.
-    max_task = max_blocking_task;
-    max_method = max_task->method();
-  }
-
-  if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
-      && is_method_profiled(max_method)) {
-    max_task->set_comp_level(CompLevel_limited_profile);
-    if (PrintTieredEvents) {
-      print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
-    }
-  }
-
-  return max_task;
-}
-
-double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) {
-  double queue_size = CompileBroker::queue_size(level);
-  int comp_count = compiler_count(level);
-  double k = queue_size / (feedback_k * comp_count) + 1;
-
-  // Increase C1 compile threshold when the code cache is filled more
-  // than specified by IncreaseFirstTierCompileThresholdAt percentage.
-  // The main intention is to keep enough free space for C2 compiled code
-  // to achieve peak performance if the code cache is under stress.
-  if ((TieredStopAtLevel == CompLevel_full_optimization) && (level != CompLevel_full_optimization))  {
-    double current_reverse_free_ratio = CodeCache::reverse_free_ratio(CodeCache::get_code_blob_type(level));
-    if (current_reverse_free_ratio > _increase_threshold_at_ratio) {
-      k *= exp(current_reverse_free_ratio - _increase_threshold_at_ratio);
-    }
-  }
-  return k;
-}
-
-// Call and loop predicates determine whether a transition to a higher
-// compilation level should be performed (pointers to predicate functions
-// are passed to common()).
-// Tier?LoadFeedback is basically a coefficient that determines of
-// how many methods per compiler thread can be in the queue before
-// the threshold values double.
-bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level, Method* method) {
-  switch(cur_level) {
-  case CompLevel_aot: {
-    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
-    return loop_predicate_helper<CompLevel_aot>(i, b, k, method);
-  }
-  case CompLevel_none:
-  case CompLevel_limited_profile: {
-    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
-    return loop_predicate_helper<CompLevel_none>(i, b, k, method);
-  }
-  case CompLevel_full_profile: {
-    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
-    return loop_predicate_helper<CompLevel_full_profile>(i, b, k, method);
-  }
-  default:
-    return true;
-  }
-}
-
-bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level, Method* method) {
-  switch(cur_level) {
-  case CompLevel_aot: {
-    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
-    return call_predicate_helper<CompLevel_aot>(i, b, k, method);
-  }
-  case CompLevel_none:
-  case CompLevel_limited_profile: {
-    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
-    return call_predicate_helper<CompLevel_none>(i, b, k, method);
-  }
-  case CompLevel_full_profile: {
-    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
-    return call_predicate_helper<CompLevel_full_profile>(i, b, k, method);
-  }
-  default:
-    return true;
-  }
-}
-
-// If a method is old enough and is still in the interpreter we would want to
-// start profiling without waiting for the compiled method to arrive.
-// We also take the load on compilers into the account.
-bool AdvancedThresholdPolicy::should_create_mdo(Method* method, CompLevel cur_level) {
-  if (cur_level == CompLevel_none &&
-      CompileBroker::queue_size(CompLevel_full_optimization) <=
-      Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
-    int i = method->invocation_count();
-    int b = method->backedge_count();
-    double k = Tier0ProfilingStartPercentage / 100.0;
-    return call_predicate_helper<CompLevel_none>(i, b, k, method) || loop_predicate_helper<CompLevel_none>(i, b, k, method);
-  }
-  return false;
-}
-
-// Inlining control: if we're compiling a profiled method with C1 and the callee
-// is known to have OSRed in a C2 version, don't inline it.
-bool AdvancedThresholdPolicy::should_not_inline(ciEnv* env, ciMethod* callee) {
-  CompLevel comp_level = (CompLevel)env->comp_level();
-  if (comp_level == CompLevel_full_profile ||
-      comp_level == CompLevel_limited_profile) {
-    return callee->highest_osr_comp_level() == CompLevel_full_optimization;
-  }
-  return false;
-}
-
-// Create MDO if necessary.
-void AdvancedThresholdPolicy::create_mdo(const methodHandle& mh, JavaThread* THREAD) {
-  if (mh->is_native() ||
-      mh->is_abstract() ||
-      mh->is_accessor() ||
-      mh->is_constant_getter()) {
-    return;
-  }
-  if (mh->method_data() == NULL) {
-    Method::build_interpreter_method_data(mh, CHECK_AND_CLEAR);
-  }
-}
-
-
-/*
- * Method states:
- *   0 - interpreter (CompLevel_none)
- *   1 - pure C1 (CompLevel_simple)
- *   2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
- *   3 - C1 with full profiling (CompLevel_full_profile)
- *   4 - C2 (CompLevel_full_optimization)
- *
- * Common state transition patterns:
- * a. 0 -> 3 -> 4.
- *    The most common path. But note that even in this straightforward case
- *    profiling can start at level 0 and finish at level 3.
- *
- * b. 0 -> 2 -> 3 -> 4.
- *    This case occurs when the load on C2 is deemed too high. So, instead of transitioning
- *    into state 3 directly and over-profiling while a method is in the C2 queue we transition to
- *    level 2 and wait until the load on C2 decreases. This path is disabled for OSRs.
- *
- * c. 0 -> (3->2) -> 4.
- *    In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough
- *    to enable the profiling to fully occur at level 0. In this case we change the compilation level
- *    of the method to 2 while the request is still in-queue, because it'll allow it to run much faster
- *    without full profiling while c2 is compiling.
- *
- * d. 0 -> 3 -> 1 or 0 -> 2 -> 1.
- *    After a method was once compiled with C1 it can be identified as trivial and be compiled to
- *    level 1. These transition can also occur if a method can't be compiled with C2 but can with C1.
- *
- * e. 0 -> 4.
- *    This can happen if a method fails C1 compilation (it will still be profiled in the interpreter)
- *    or because of a deopt that didn't require reprofiling (compilation won't happen in this case because
- *    the compiled version already exists).
- *
- * Note that since state 0 can be reached from any other state via deoptimization different loops
- * are possible.
- *
- */
-
-// Common transition function. Given a predicate determines if a method should transition to another level.
-CompLevel AdvancedThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback) {
-  CompLevel next_level = cur_level;
-  int i = method->invocation_count();
-  int b = method->backedge_count();
-
-  if (is_trivial(method)) {
-    next_level = CompLevel_simple;
-  } else {
-    switch(cur_level) {
-      default: break;
-      case CompLevel_aot: {
-      // If we were at full profile level, would we switch to full opt?
-      if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) {
-        next_level = CompLevel_full_optimization;
-      } else if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
-                               Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
-                               (this->*p)(i, b, cur_level, method))) {
-        next_level = CompLevel_full_profile;
-      }
-    }
-    break;
-    case CompLevel_none:
-      // If we were at full profile level, would we switch to full opt?
-      if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) {
-        next_level = CompLevel_full_optimization;
-      } else if ((this->*p)(i, b, cur_level, method)) {
-#if INCLUDE_JVMCI
-        if (EnableJVMCI && UseJVMCICompiler) {
-          // Since JVMCI takes a while to warm up, its queue inevitably backs up during
-          // early VM execution. As of 2014-06-13, JVMCI's inliner assumes that the root
-          // compilation method and all potential inlinees have mature profiles (which
-          // includes type profiling). If it sees immature profiles, JVMCI's inliner
-          // can perform pathologically bad (e.g., causing OutOfMemoryErrors due to
-          // exploring/inlining too many graphs). Since a rewrite of the inliner is
-          // in progress, we simply disable the dialing back heuristic for now and will
-          // revisit this decision once the new inliner is completed.
-          next_level = CompLevel_full_profile;
-        } else
-#endif
-        {
-          // C1-generated fully profiled code is about 30% slower than the limited profile
-          // code that has only invocation and backedge counters. The observation is that
-          // if C2 queue is large enough we can spend too much time in the fully profiled code
-          // while waiting for C2 to pick the method from the queue. To alleviate this problem
-          // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
-          // we choose to compile a limited profiled version and then recompile with full profiling
-          // when the load on C2 goes down.
-          if (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) >
-              Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
-            next_level = CompLevel_limited_profile;
-          } else {
-            next_level = CompLevel_full_profile;
-          }
-        }
-      }
-      break;
-    case CompLevel_limited_profile:
-      if (is_method_profiled(method)) {
-        // Special case: we got here because this method was fully profiled in the interpreter.
-        next_level = CompLevel_full_optimization;
-      } else {
-        MethodData* mdo = method->method_data();
-        if (mdo != NULL) {
-          if (mdo->would_profile()) {
-            if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
-                                     Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
-                                     (this->*p)(i, b, cur_level, method))) {
-              next_level = CompLevel_full_profile;
-            }
-          } else {
-            next_level = CompLevel_full_optimization;
-          }
-        } else {
-          // If there is no MDO we need to profile
-          if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
-                                   Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
-                                   (this->*p)(i, b, cur_level, method))) {
-            next_level = CompLevel_full_profile;
-          }
-        }
-      }
-      break;
-    case CompLevel_full_profile:
-      {
-        MethodData* mdo = method->method_data();
-        if (mdo != NULL) {
-          if (mdo->would_profile()) {
-            int mdo_i = mdo->invocation_count_delta();
-            int mdo_b = mdo->backedge_count_delta();
-            if ((this->*p)(mdo_i, mdo_b, cur_level, method)) {
-              next_level = CompLevel_full_optimization;
-            }
-          } else {
-            next_level = CompLevel_full_optimization;
-          }
-        }
-      }
-      break;
-    }
-  }
-  return MIN2(next_level, (CompLevel)TieredStopAtLevel);
-}
-
-// Determine if a method should be compiled with a normal entry point at a different level.
-CompLevel AdvancedThresholdPolicy::call_event(Method* method, CompLevel cur_level, JavaThread * thread) {
-  CompLevel osr_level = MIN2((CompLevel) method->highest_osr_comp_level(),
-                             common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level, true));
-  CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level);
-
-  // If OSR method level is greater than the regular method level, the levels should be
-  // equalized by raising the regular method level in order to avoid OSRs during each
-  // invocation of the method.
-  if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) {
-    MethodData* mdo = method->method_data();
-    guarantee(mdo != NULL, "MDO should not be NULL");
-    if (mdo->invocation_count() >= 1) {
-      next_level = CompLevel_full_optimization;
-    }
-  } else {
-    next_level = MAX2(osr_level, next_level);
-  }
-#if INCLUDE_JVMCI
-  if (UseJVMCICompiler) {
-    next_level = JVMCIRuntime::adjust_comp_level(method, false, next_level, thread);
-  }
-#endif
-  return next_level;
-}
-
-// Determine if we should do an OSR compilation of a given method.
-CompLevel AdvancedThresholdPolicy::loop_event(Method* method, CompLevel cur_level, JavaThread * thread) {
-  CompLevel next_level = common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level, true);
-  if (cur_level == CompLevel_none) {
-    // If there is a live OSR method that means that we deopted to the interpreter
-    // for the transition.
-    CompLevel osr_level = MIN2((CompLevel)method->highest_osr_comp_level(), next_level);
-    if (osr_level > CompLevel_none) {
-      return osr_level;
-    }
-  }
-#if INCLUDE_JVMCI
-  if (UseJVMCICompiler) {
-    next_level = JVMCIRuntime::adjust_comp_level(method, true, next_level, thread);
-  }
-#endif
-  return next_level;
-}
-
-// Update the rate and submit compile
-void AdvancedThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) {
-  int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
-  update_rate(os::javaTimeMillis(), mh());
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread);
-}
-
-bool AdvancedThresholdPolicy::maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread) {
-  if (UseAOT && !delay_compilation_during_startup()) {
-    if (cur_level == CompLevel_full_profile || cur_level == CompLevel_none) {
-      // If the current level is full profile or interpreter and we're switching to any other level,
-      // activate the AOT code back first so that we won't waste time overprofiling.
-      compile(mh, InvocationEntryBci, CompLevel_aot, thread);
-      // Fall through for JIT compilation.
-    }
-    if (next_level == CompLevel_limited_profile && cur_level != CompLevel_aot && mh->has_aot_code()) {
-      // If the next level is limited profile, use the aot code (if there is any),
-      // since it's essentially the same thing.
-      compile(mh, InvocationEntryBci, CompLevel_aot, thread);
-      // Not need to JIT, we're done.
-      return true;
-    }
-  }
-  return false;
-}
-
-
-// Handle the invocation event.
-void AdvancedThresholdPolicy::method_invocation_event(const methodHandle& mh, const methodHandle& imh,
-                                                      CompLevel level, CompiledMethod* nm, JavaThread* thread) {
-  if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, thread);
-  }
-  CompLevel next_level = call_event(mh(), level, thread);
-  if (next_level != level) {
-    if (maybe_switch_to_aot(mh, level, next_level, thread)) {
-      // No JITting necessary
-      return;
-    }
-    if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) {
-      compile(mh, InvocationEntryBci, next_level, thread);
-    }
-  }
-}
-
-// Handle the back branch event. Notice that we can compile the method
-// with a regular entry from here.
-void AdvancedThresholdPolicy::method_back_branch_event(const methodHandle& mh, const methodHandle& imh,
-                                                       int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread) {
-  if (should_create_mdo(mh(), level)) {
-    create_mdo(mh, thread);
-  }
-  // Check if MDO should be created for the inlined method
-  if (should_create_mdo(imh(), level)) {
-    create_mdo(imh, thread);
-  }
-
-  if (is_compilation_enabled()) {
-    CompLevel next_osr_level = loop_event(imh(), level, thread);
-    CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level();
-    // At the very least compile the OSR version
-    if (!CompileBroker::compilation_is_in_queue(imh) && (next_osr_level != level)) {
-      compile(imh, bci, next_osr_level, thread);
-    }
-
-    // Use loop event as an opportunity to also check if there's been
-    // enough calls.
-    CompLevel cur_level, next_level;
-    if (mh() != imh()) { // If there is an enclosing method
-      if (level == CompLevel_aot) {
-        // Recompile the enclosing method to prevent infinite OSRs. Stay at AOT level while it's compiling.
-        if (max_osr_level != CompLevel_none && !CompileBroker::compilation_is_in_queue(mh)) {
-          compile(mh, InvocationEntryBci, MIN2((CompLevel)TieredStopAtLevel, CompLevel_full_profile), thread);
-        }
-      } else {
-        // Current loop event level is not AOT
-        guarantee(nm != NULL, "Should have nmethod here");
-        cur_level = comp_level(mh());
-        next_level = call_event(mh(), cur_level, thread);
-
-        if (max_osr_level == CompLevel_full_optimization) {
-          // The inlinee OSRed to full opt, we need to modify the enclosing method to avoid deopts
-          bool make_not_entrant = false;
-          if (nm->is_osr_method()) {
-            // This is an osr method, just make it not entrant and recompile later if needed
-            make_not_entrant = true;
-          } else {
-            if (next_level != CompLevel_full_optimization) {
-              // next_level is not full opt, so we need to recompile the
-              // enclosing method without the inlinee
-              cur_level = CompLevel_none;
-              make_not_entrant = true;
-            }
-          }
-          if (make_not_entrant) {
-            if (PrintTieredEvents) {
-              int osr_bci = nm->is_osr_method() ? nm->osr_entry_bci() : InvocationEntryBci;
-              print_event(MAKE_NOT_ENTRANT, mh(), mh(), osr_bci, level);
-            }
-            nm->make_not_entrant();
-          }
-        }
-        // Fix up next_level if necessary to avoid deopts
-        if (next_level == CompLevel_limited_profile && max_osr_level == CompLevel_full_profile) {
-          next_level = CompLevel_full_profile;
-        }
-        if (cur_level != next_level) {
-          if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) {
-            compile(mh, InvocationEntryBci, next_level, thread);
-          }
-        }
-      }
-    } else {
-      cur_level = comp_level(mh());
-      next_level = call_event(mh(), cur_level, thread);
-      if (next_level != cur_level) {
-        if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) {
-          compile(mh, InvocationEntryBci, next_level, thread);
-        }
-      }
-    }
-  }
-}
-
-#endif // TIERED
--- a/src/hotspot/share/runtime/advancedThresholdPolicy.hpp
+++ b/src/hotspot/share/runtime/advancedThresholdPolicy.hpp
@ -1,235 +0,0 @@
-/*
- * Copyright (c) 2010, 2017, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
-#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
-
-#include "runtime/simpleThresholdPolicy.hpp"
-
-#ifdef TIERED
-class CompileTask;
-class CompileQueue;
-
-/*
- *  The system supports 5 execution levels:
- *  * level 0 - interpreter
- *  * level 1 - C1 with full optimization (no profiling)
- *  * level 2 - C1 with invocation and backedge counters
- *  * level 3 - C1 with full profiling (level 2 + MDO)
- *  * level 4 - C2
- *
- * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters
- * (invocation counters and backedge counters). The frequency of these notifications is
- * different at each level. These notifications are used by the policy to decide what transition
- * to make.
- *
- * Execution starts at level 0 (interpreter), then the policy can decide either to compile the
- * method at level 3 or level 2. The decision is based on the following factors:
- *    1. The length of the C2 queue determines the next level. The observation is that level 2
- * is generally faster than level 3 by about 30%, therefore we would want to minimize the time
- * a method spends at level 3. We should only spend the time at level 3 that is necessary to get
- * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to
- * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile
- * request makes its way through the long queue. When the load on C2 recedes we are going to
- * recompile at level 3 and start gathering profiling information.
- *    2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce
- * additional filtering if the compiler is overloaded. The rationale is that by the time a
- * method gets compiled it can become unused, so it doesn't make sense to put too much onto the
- * queue.
- *
- * After profiling is completed at level 3 the transition is made to level 4. Again, the length
- * of the C2 queue is used as a feedback to adjust the thresholds.
- *
- * After the first C1 compile some basic information is determined about the code like the number
- * of the blocks and the number of the loops. Based on that it can be decided that a method
- * is trivial and compiling it with C1 will yield the same code. In this case the method is
- * compiled at level 1 instead of 4.
- *
- * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of
- * the code and the C2 queue is sufficiently small we can decide to start profiling in the
- * interpreter (and continue profiling in the compiled code once the level 3 version arrives).
- * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2
- * version is compiled instead in order to run faster waiting for a level 4 version.
- *
- * Compile queues are implemented as priority queues - for each method in the queue we compute
- * the event rate (the number of invocation and backedge counter increments per unit of time).
- * When getting an element off the queue we pick the one with the largest rate. Maintaining the
- * rate also allows us to remove stale methods (the ones that got on the queue but stopped
- * being used shortly after that).
-*/
-
-/* Command line options:
- * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method
- *   invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread
- *   makes a call into the runtime.
- *
- * - Tier?InvocationThreshold, Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control
- *   compilation thresholds.
- *   Level 2 thresholds are not used and are provided for option-compatibility and potential future use.
- *   Other thresholds work as follows:
- *
- *   Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when
- *   the following predicate is true (X is the level):
- *
- *   i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s  && i + b > TierXCompileThreshold * s),
- *
- *   where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling
- *   coefficient that will be discussed further.
- *   The intuition is to equalize the time that is spend profiling each method.
- *   The same predicate is used to control the transition from level 3 to level 4 (C2). It should be
- *   noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come
- *   from Method* and for 3->4 transition they come from MDO (since profiled invocations are
- *   counted separately). Finally, if a method does not contain anything worth profiling, a transition
- *   from level 3 to level 4 occurs without considering thresholds (e.g., with fewer invocations than
- *   what is specified by Tier4InvocationThreshold).
- *
- *   OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates.
- *
- * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending
- *   on the compiler load. The scaling coefficients are computed as follows:
- *
- *   s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1,
- *
- *   where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X
- *   is the number of level X compiler threads.
- *
- *   Basically these parameters describe how many methods should be in the compile queue
- *   per compiler thread before the scaling coefficient increases by one.
- *
- *   This feedback provides the mechanism to automatically control the flow of compilation requests
- *   depending on the machine speed, mutator load and other external factors.
- *
- * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop.
- *   Consider the following observation: a method compiled with full profiling (level 3)
- *   is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO).
- *   Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue
- *   gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues
- *   executing at level 3 for much longer time than is required by the predicate and at suboptimal speed.
- *   The idea is to dynamically change the behavior of the system in such a way that if a substantial
- *   load on C2 is detected we would first do the 0->2 transition allowing a method to run faster.
- *   And then when the load decreases to allow 2->3 transitions.
- *
- *   Tier3Delay* parameters control this switching mechanism.
- *   Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy
- *   no longer does 0->3 transitions but does 0->2 transitions instead.
- *   Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue
- *   per compiler thread falls below the specified amount.
- *   The hysteresis is necessary to avoid jitter.
- *
- * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue.
- *   Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to
- *   compile from the compile queue, we also can detect stale methods for which the rate has been
- *   0 for some time in the same iteration. Stale methods can appear in the queue when an application
- *   abruptly changes its behavior.
- *
- * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick
- *   to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything
- *   with pure c1.
- *
- * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the
- *   0->3 predicate are already exceeded by the given percentage but the level 3 version of the
- *   method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled
- *   version in time. This reduces the overall transition to level 4 and decreases the startup time.
- *   Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long
- *   these is not reason to start profiling prematurely.
- *
- * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation.
- *   Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered
- *   to be zero if no events occurred in TieredRateUpdateMaxTime.
- */
-
-
-class AdvancedThresholdPolicy : public SimpleThresholdPolicy {
-  jlong _start_time;
-
-  // Call and loop predicates determine whether a transition to a higher compilation
-  // level should be performed (pointers to predicate functions are passed to common().
-  // Predicates also take compiler load into account.
-  typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level, Method* method);
-  bool call_predicate(int i, int b, CompLevel cur_level, Method* method);
-  bool loop_predicate(int i, int b, CompLevel cur_level, Method* method);
-  // Common transition function. Given a predicate determines if a method should transition to another level.
-  CompLevel common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback = false);
-  // Transition functions.
-  // call_event determines if a method should be compiled at a different
-  // level with a regular invocation entry.
-  CompLevel call_event(Method* method, CompLevel cur_level, JavaThread * thread);
-  // loop_event checks if a method should be OSR compiled at a different
-  // level.
-  CompLevel loop_event(Method* method, CompLevel cur_level, JavaThread * thread);
-  // Has a method been long around?
-  // We don't remove old methods from the compile queue even if they have
-  // very low activity (see select_task()).
-  inline bool is_old(Method* method);
-  // Was a given method inactive for a given number of milliseconds.
-  // If it is, we would remove it from the queue (see select_task()).
-  inline bool is_stale(jlong t, jlong timeout, Method* m);
-  // Compute the weight of the method for the compilation scheduling
-  inline double weight(Method* method);
-  // Apply heuristics and return true if x should be compiled before y
-  inline bool compare_methods(Method* x, Method* y);
-  // Compute event rate for a given method. The rate is the number of event (invocations + backedges)
-  // per millisecond.
-  inline void update_rate(jlong t, Method* m);
-  // Compute threshold scaling coefficient
-  inline double threshold_scale(CompLevel level, int feedback_k);
-  // If a method is old enough and is still in the interpreter we would want to
-  // start profiling without waiting for the compiled method to arrive. This function
-  // determines whether we should do that.
-  inline bool should_create_mdo(Method* method, CompLevel cur_level);
-  // Create MDO if necessary.
-  void create_mdo(const methodHandle& mh, JavaThread* thread);
-  // Is method profiled enough?
-  bool is_method_profiled(Method* method);
-
-  double _increase_threshold_at_ratio;
-
-  bool maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread);
-
-protected:
-  void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level);
-
-  void set_increase_threshold_at_ratio() { _increase_threshold_at_ratio = 100 / (100 - (double)IncreaseFirstTierCompileThresholdAt); }
-  void set_start_time(jlong t) { _start_time = t;    }
-  jlong start_time() const     { return _start_time; }
-
-  // Submit a given method for compilation (and update the rate).
-  virtual void submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread);
-  // event() from SimpleThresholdPolicy would call these.
-  virtual void method_invocation_event(const methodHandle& method, const methodHandle& inlinee,
-                                       CompLevel level, CompiledMethod* nm, JavaThread* thread);
-  virtual void method_back_branch_event(const methodHandle& method, const methodHandle& inlinee,
-                                        int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread);
-public:
-  AdvancedThresholdPolicy() : _start_time(0) { }
-  // Select task is called by CompileBroker. We should return a task or NULL.
-  virtual CompileTask* select_task(CompileQueue* compile_queue);
-  virtual void initialize();
-  virtual bool should_not_inline(ciEnv* env, ciMethod* callee);
-
-};
-
-#endif // TIERED
-
-#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
--- a/src/hotspot/share/runtime/arguments.cpp
+++ b/src/hotspot/share/runtime/arguments.cpp
@ -1603,9 +1603,9 @@ intx Arguments::scaled_freq_log(intx freq_log, double scale) {
 }

 void Arguments::set_tiered_flags() {
-  // With tiered, set default policy to AdvancedThresholdPolicy, which is 3.
+  // With tiered, set default policy to SimpleThresholdPolicy, which is 2.
  if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) {
-    FLAG_SET_DEFAULT(CompilationPolicyChoice, 3);
+    FLAG_SET_DEFAULT(CompilationPolicyChoice, 2);
  }
  if (CompilationPolicyChoice < 2) {
    vm_exit_during_initialization(
--- a/src/hotspot/share/runtime/compilationPolicy.cpp
+++ b/src/hotspot/share/runtime/compilationPolicy.cpp
@ -33,7 +33,6 @@
 #include "oops/method.inline.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/nativeLookup.hpp"
-#include "runtime/advancedThresholdPolicy.hpp"
 #include "runtime/compilationPolicy.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/handles.inline.hpp"
@ -72,17 +71,10 @@ void compilationPolicy_init() {
    CompilationPolicy::set_policy(new SimpleThresholdPolicy());
 #else
    Unimplemented();
-#endif
-    break;
-  case 3:
-#ifdef TIERED
-    CompilationPolicy::set_policy(new AdvancedThresholdPolicy());
-#else
-    Unimplemented();
 #endif
    break;
  default:
-    fatal("CompilationPolicyChoice must be in the range: [0-3]");
+    fatal("CompilationPolicyChoice must be in the range: [0-2]");
  }
  CompilationPolicy::policy()->initialize();
 }
--- a/src/hotspot/share/runtime/globals.hpp
+++ b/src/hotspot/share/runtime/globals.hpp
@ -1158,8 +1158,8 @@ define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
          "UseDynamicNumberOfCompilerThreads")                              \
                                                                            \
  product(intx, CompilationPolicyChoice, 0,                                 \
-          "which compilation policy (0-3)")                                 \
-          range(0, 3)                                                       \
+          "which compilation policy (0-2)")                                 \
+          range(0, 2)                                                       \
                                                                            \
  develop(bool, UseStackBanging, true,                                      \
          "use stack banging for stack overflow checks (required for "      \
--- a/src/hotspot/share/runtime/simpleThresholdPolicy.cpp
+++ b/src/hotspot/share/runtime/simpleThresholdPolicy.cpp
@ -140,20 +140,33 @@ void SimpleThresholdPolicy::print_event(EventType type, const methodHandle& mh,
 }

 void SimpleThresholdPolicy::initialize() {
-  if (FLAG_IS_DEFAULT(CICompilerCount)) {
-    FLAG_SET_DEFAULT(CICompilerCount, 3);
-  }
  int count = CICompilerCount;
 #ifdef _LP64
-  // On 64-bit systems, scale the number of compiler threads with
-  // the number of cores available on the system. Scaling is not
-  // performed on 32-bit systems because it can lead to exhaustion
-  // of the virtual memory address space available to the JVM.
+  // Turn on ergonomic compiler count selection
+  if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
+    FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
+  }
  if (CICompilerCountPerCPU) {
-    count = MAX2(log2_intptr(os::active_processor_count()) * 3 / 2, 2);
+    // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
+    int log_cpu = log2_intptr(os::active_processor_count());
+    int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
+    count = MAX2(log_cpu * loglog_cpu * 3 / 2, 2);
+    FLAG_SET_ERGO(intx, CICompilerCount, count);
+  }
+#else
+  // On 32-bit systems, the number of compiler threads is limited to 3.
+  // On these systems, the virtual address space available to the JVM
+  // is usually limited to 2-4 GB (the exact value depends on the platform).
+  // As the compilers (especially C2) can consume a large amount of
+  // memory, scaling the number of compiler threads with the number of
+  // available cores can result in the exhaustion of the address space
+  /// available to the VM and thus cause the VM to crash.
+  if (FLAG_IS_DEFAULT(CICompilerCount)) {
+    count = 3;
    FLAG_SET_ERGO(intx, CICompilerCount, count);
  }
 #endif
+
  if (TieredStopAtLevel < CompLevel_full_optimization) {
    // No C2 compiler thread required
    set_c1_count(count);
@ -162,6 +175,22 @@ void SimpleThresholdPolicy::initialize() {
    set_c2_count(MAX2(count - c1_count(), 1));
  }
  assert(count == c1_count() + c2_count(), "inconsistent compiler thread count");
+
+  // Some inlining tuning
+#ifdef X86
+  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+    FLAG_SET_DEFAULT(InlineSmallCode, 2000);
+  }
+#endif
+
+#if defined SPARC || defined AARCH64
+  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
+    FLAG_SET_DEFAULT(InlineSmallCode, 2500);
+  }
+#endif
+
+  set_increase_threshold_at_ratio();
+  set_start_time(os::javaTimeMillis());
 }

 void SimpleThresholdPolicy::set_carry_if_necessary(InvocationCounter *counter) {
@ -186,7 +215,66 @@ void SimpleThresholdPolicy::handle_counter_overflow(Method* method) {

 // Called with the queue locked and with at least one element
 CompileTask* SimpleThresholdPolicy::select_task(CompileQueue* compile_queue) {
-  return select_task_helper(compile_queue);
+  CompileTask *max_blocking_task = NULL;
+  CompileTask *max_task = NULL;
+  Method* max_method = NULL;
+  jlong t = os::javaTimeMillis();
+  // Iterate through the queue and find a method with a maximum rate.
+  for (CompileTask* task = compile_queue->first(); task != NULL;) {
+    CompileTask* next_task = task->next();
+    Method* method = task->method();
+    update_rate(t, method);
+    if (max_task == NULL) {
+      max_task = task;
+      max_method = method;
+    } else {
+      // If a method has been stale for some time, remove it from the queue.
+      // Blocking tasks and tasks submitted from whitebox API don't become stale
+      if (task->can_become_stale() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+        if (PrintTieredEvents) {
+          print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
+        }
+        compile_queue->remove_and_mark_stale(task);
+        method->clear_queued_for_compilation();
+        task = next_task;
+        continue;
+      }
+
+      // Select a method with a higher rate
+      if (compare_methods(method, max_method)) {
+        max_task = task;
+        max_method = method;
+      }
+    }
+
+    if (task->is_blocking()) {
+      if (max_blocking_task == NULL || compare_methods(method, max_blocking_task->method())) {
+        max_blocking_task = task;
+      }
+    }
+
+    task = next_task;
+  }
+
+  if (max_blocking_task != NULL) {
+    // In blocking compilation mode, the CompileBroker will make
+    // compilations submitted by a JVMCI compiler thread non-blocking. These
+    // compilations should be scheduled after all blocking compilations
+    // to service non-compiler related compilations sooner and reduce the
+    // chance of such compilations timing out.
+    max_task = max_blocking_task;
+    max_method = max_task->method();
+  }
+
+  if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
+      && is_method_profiled(max_method)) {
+    max_task->set_comp_level(CompLevel_limited_profile);
+    if (PrintTieredEvents) {
+      print_event(UPDATE_IN_QUEUE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
+    }
+  }
+
+  return max_task;
 }

 void SimpleThresholdPolicy::reprofile(ScopeDesc* trap_scope, bool is_osr) {
@ -284,26 +372,150 @@ void SimpleThresholdPolicy::compile(const methodHandle& mh, int bci, CompLevel l
  }
 }

-// Tell the broker to compile the method
+// Update the rate and submit compile
 void SimpleThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) {
  int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
+  update_rate(os::javaTimeMillis(), mh());
  CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread);
 }

+// Print an event.
+void SimpleThresholdPolicy::print_specific(EventType type, const methodHandle& mh, const methodHandle& imh,
+                                             int bci, CompLevel level) {
+  tty->print(" rate=");
+  if (mh->prev_time() == 0) tty->print("n/a");
+  else tty->print("%f", mh->rate());
+
+  tty->print(" k=%.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback),
+                               threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback));
+
+}
+
+// update_rate() is called from select_task() while holding a compile queue lock.
+void SimpleThresholdPolicy::update_rate(jlong t, Method* m) {
+  // Skip update if counters are absent.
+  // Can't allocate them since we are holding compile queue lock.
+  if (m->method_counters() == NULL)  return;
+
+  if (is_old(m)) {
+    // We don't remove old methods from the queue,
+    // so we can just zero the rate.
+    m->set_rate(0);
+    return;
+  }
+
+  // We don't update the rate if we've just came out of a safepoint.
+  // delta_s is the time since last safepoint in milliseconds.
+  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+  jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement
+  // How many events were there since the last time?
+  int event_count = m->invocation_count() + m->backedge_count();
+  int delta_e = event_count - m->prev_event_count();
+
+  // We should be running for at least 1ms.
+  if (delta_s >= TieredRateUpdateMinTime) {
+    // And we must've taken the previous point at least 1ms before.
+    if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) {
+      m->set_prev_time(t);
+      m->set_prev_event_count(event_count);
+      m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond
+    } else {
+      if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) {
+        // If nothing happened for 25ms, zero the rate. Don't modify prev values.
+        m->set_rate(0);
+      }
+    }
+  }
+}
+
+// Check if this method has been stale from a given number of milliseconds.
+// See select_task().
+bool SimpleThresholdPolicy::is_stale(jlong t, jlong timeout, Method* m) {
+  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
+  jlong delta_t = t - m->prev_time();
+  if (delta_t > timeout && delta_s > timeout) {
+    int event_count = m->invocation_count() + m->backedge_count();
+    int delta_e = event_count - m->prev_event_count();
+    // Return true if there were no events.
+    return delta_e == 0;
+  }
+  return false;
+}
+
+// We don't remove old methods from the compile queue even if they have
+// very low activity. See select_task().
+bool SimpleThresholdPolicy::is_old(Method* method) {
+  return method->invocation_count() > 50000 || method->backedge_count() > 500000;
+}
+
+double SimpleThresholdPolicy::weight(Method* method) {
+  return (double)(method->rate() + 1) *
+    (method->invocation_count() + 1) * (method->backedge_count() + 1);
+}
+
+// Apply heuristics and return true if x should be compiled before y
+bool SimpleThresholdPolicy::compare_methods(Method* x, Method* y) {
+  if (x->highest_comp_level() > y->highest_comp_level()) {
+    // recompilation after deopt
+    return true;
+  } else
+    if (x->highest_comp_level() == y->highest_comp_level()) {
+      if (weight(x) > weight(y)) {
+        return true;
+      }
+    }
+  return false;
+}
+
+// Is method profiled enough?
+bool SimpleThresholdPolicy::is_method_profiled(Method* method) {
+  MethodData* mdo = method->method_data();
+  if (mdo != NULL) {
+    int i = mdo->invocation_count_delta();
+    int b = mdo->backedge_count_delta();
+    return call_predicate_helper<CompLevel_full_profile>(i, b, 1, method);
+  }
+  return false;
+}
+
+double SimpleThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) {
+  double queue_size = CompileBroker::queue_size(level);
+  int comp_count = compiler_count(level);
+  double k = queue_size / (feedback_k * comp_count) + 1;
+
+  // Increase C1 compile threshold when the code cache is filled more
+  // than specified by IncreaseFirstTierCompileThresholdAt percentage.
+  // The main intention is to keep enough free space for C2 compiled code
+  // to achieve peak performance if the code cache is under stress.
+  if ((TieredStopAtLevel == CompLevel_full_optimization) && (level != CompLevel_full_optimization))  {
+    double current_reverse_free_ratio = CodeCache::reverse_free_ratio(CodeCache::get_code_blob_type(level));
+    if (current_reverse_free_ratio > _increase_threshold_at_ratio) {
+      k *= exp(current_reverse_free_ratio - _increase_threshold_at_ratio);
+    }
+  }
+  return k;
+}
+
 // Call and loop predicates determine whether a transition to a higher
 // compilation level should be performed (pointers to predicate functions
-// are passed to common() transition function).
+// are passed to common()).
+// Tier?LoadFeedback is basically a coefficient that determines of
+// how many methods per compiler thread can be in the queue before
+// the threshold values double.
 bool SimpleThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level, Method* method) {
  switch(cur_level) {
  case CompLevel_aot: {
-    return loop_predicate_helper<CompLevel_aot>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return loop_predicate_helper<CompLevel_aot>(i, b, k, method);
  }
  case CompLevel_none:
  case CompLevel_limited_profile: {
-    return loop_predicate_helper<CompLevel_none>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return loop_predicate_helper<CompLevel_none>(i, b, k, method);
  }
  case CompLevel_full_profile: {
-    return loop_predicate_helper<CompLevel_full_profile>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+    return loop_predicate_helper<CompLevel_full_profile>(i, b, k, method);
  }
  default:
    return true;
@ -313,14 +525,17 @@ bool SimpleThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level, Me
 bool SimpleThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level, Method* method) {
  switch(cur_level) {
  case CompLevel_aot: {
-    return call_predicate_helper<CompLevel_aot>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return call_predicate_helper<CompLevel_aot>(i, b, k, method);
  }
  case CompLevel_none:
  case CompLevel_limited_profile: {
-    return call_predicate_helper<CompLevel_none>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
+    return call_predicate_helper<CompLevel_none>(i, b, k, method);
  }
  case CompLevel_full_profile: {
-    return call_predicate_helper<CompLevel_full_profile>(i, b, 1.0, method);
+    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
+    return call_predicate_helper<CompLevel_full_profile>(i, b, k, method);
  }
  default:
    return true;
@ -341,31 +556,167 @@ bool SimpleThresholdPolicy::is_mature(Method* method) {
  return false;
 }

+// If a method is old enough and is still in the interpreter we would want to
+// start profiling without waiting for the compiled method to arrive.
+// We also take the load on compilers into the account.
+bool SimpleThresholdPolicy::should_create_mdo(Method* method, CompLevel cur_level) {
+  if (cur_level == CompLevel_none &&
+      CompileBroker::queue_size(CompLevel_full_optimization) <=
+      Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+    int i = method->invocation_count();
+    int b = method->backedge_count();
+    double k = Tier0ProfilingStartPercentage / 100.0;
+    return call_predicate_helper<CompLevel_none>(i, b, k, method) || loop_predicate_helper<CompLevel_none>(i, b, k, method);
+  }
+  return false;
+}
+
+// Inlining control: if we're compiling a profiled method with C1 and the callee
+// is known to have OSRed in a C2 version, don't inline it.
+bool SimpleThresholdPolicy::should_not_inline(ciEnv* env, ciMethod* callee) {
+  CompLevel comp_level = (CompLevel)env->comp_level();
+  if (comp_level == CompLevel_full_profile ||
+      comp_level == CompLevel_limited_profile) {
+    return callee->highest_osr_comp_level() == CompLevel_full_optimization;
+  }
+  return false;
+}
+
+// Create MDO if necessary.
+void SimpleThresholdPolicy::create_mdo(const methodHandle& mh, JavaThread* THREAD) {
+  if (mh->is_native() ||
+      mh->is_abstract() ||
+      mh->is_accessor() ||
+      mh->is_constant_getter()) {
+    return;
+  }
+  if (mh->method_data() == NULL) {
+    Method::build_interpreter_method_data(mh, CHECK_AND_CLEAR);
+  }
+}
+
+
+/*
+ * Method states:
+ *   0 - interpreter (CompLevel_none)
+ *   1 - pure C1 (CompLevel_simple)
+ *   2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
+ *   3 - C1 with full profiling (CompLevel_full_profile)
+ *   4 - C2 (CompLevel_full_optimization)
+ *
+ * Common state transition patterns:
+ * a. 0 -> 3 -> 4.
+ *    The most common path. But note that even in this straightforward case
+ *    profiling can start at level 0 and finish at level 3.
+ *
+ * b. 0 -> 2 -> 3 -> 4.
+ *    This case occurs when the load on C2 is deemed too high. So, instead of transitioning
+ *    into state 3 directly and over-profiling while a method is in the C2 queue we transition to
+ *    level 2 and wait until the load on C2 decreases. This path is disabled for OSRs.
+ *
+ * c. 0 -> (3->2) -> 4.
+ *    In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough
+ *    to enable the profiling to fully occur at level 0. In this case we change the compilation level
+ *    of the method to 2 while the request is still in-queue, because it'll allow it to run much faster
+ *    without full profiling while c2 is compiling.
+ *
+ * d. 0 -> 3 -> 1 or 0 -> 2 -> 1.
+ *    After a method was once compiled with C1 it can be identified as trivial and be compiled to
+ *    level 1. These transition can also occur if a method can't be compiled with C2 but can with C1.
+ *
+ * e. 0 -> 4.
+ *    This can happen if a method fails C1 compilation (it will still be profiled in the interpreter)
+ *    or because of a deopt that didn't require reprofiling (compilation won't happen in this case because
+ *    the compiled version already exists).
+ *
+ * Note that since state 0 can be reached from any other state via deoptimization different loops
+ * are possible.
+ *
+ */
+
 // Common transition function. Given a predicate determines if a method should transition to another level.
-CompLevel SimpleThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level) {
+CompLevel SimpleThresholdPolicy::common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback) {
  CompLevel next_level = cur_level;
  int i = method->invocation_count();
  int b = method->backedge_count();

-  if (is_trivial(method) && cur_level != CompLevel_aot) {
+  if (is_trivial(method)) {
    next_level = CompLevel_simple;
  } else {
    switch(cur_level) {
-    case CompLevel_aot: {
-      if ((this->*p)(i, b, cur_level, method)) {
+      default: break;
+      case CompLevel_aot: {
+      // If we were at full profile level, would we switch to full opt?
+      if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) {
+        next_level = CompLevel_full_optimization;
+      } else if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
+                               Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
+                               (this->*p)(i, b, cur_level, method))) {
        next_level = CompLevel_full_profile;
      }
    }
    break;
    case CompLevel_none:
      // If we were at full profile level, would we switch to full opt?
-      if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) {
+      if (common(p, method, CompLevel_full_profile, disable_feedback) == CompLevel_full_optimization) {
        next_level = CompLevel_full_optimization;
      } else if ((this->*p)(i, b, cur_level, method)) {
-        next_level = CompLevel_full_profile;
+#if INCLUDE_JVMCI
+        if (EnableJVMCI && UseJVMCICompiler) {
+          // Since JVMCI takes a while to warm up, its queue inevitably backs up during
+          // early VM execution. As of 2014-06-13, JVMCI's inliner assumes that the root
+          // compilation method and all potential inlinees have mature profiles (which
+          // includes type profiling). If it sees immature profiles, JVMCI's inliner
+          // can perform pathologically bad (e.g., causing OutOfMemoryErrors due to
+          // exploring/inlining too many graphs). Since a rewrite of the inliner is
+          // in progress, we simply disable the dialing back heuristic for now and will
+          // revisit this decision once the new inliner is completed.
+          next_level = CompLevel_full_profile;
+        } else
+#endif
+        {
+          // C1-generated fully profiled code is about 30% slower than the limited profile
+          // code that has only invocation and backedge counters. The observation is that
+          // if C2 queue is large enough we can spend too much time in the fully profiled code
+          // while waiting for C2 to pick the method from the queue. To alleviate this problem
+          // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
+          // we choose to compile a limited profiled version and then recompile with full profiling
+          // when the load on C2 goes down.
+          if (!disable_feedback && CompileBroker::queue_size(CompLevel_full_optimization) >
+              Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
+            next_level = CompLevel_limited_profile;
+          } else {
+            next_level = CompLevel_full_profile;
+          }
+        }
      }
      break;
    case CompLevel_limited_profile:
+      if (is_method_profiled(method)) {
+        // Special case: we got here because this method was fully profiled in the interpreter.
+        next_level = CompLevel_full_optimization;
+      } else {
+        MethodData* mdo = method->method_data();
+        if (mdo != NULL) {
+          if (mdo->would_profile()) {
+            if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
+                                     Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
+                                     (this->*p)(i, b, cur_level, method))) {
+              next_level = CompLevel_full_profile;
+            }
+          } else {
+            next_level = CompLevel_full_optimization;
+          }
+        } else {
+          // If there is no MDO we need to profile
+          if (disable_feedback || (CompileBroker::queue_size(CompLevel_full_optimization) <=
+                                   Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
+                                   (this->*p)(i, b, cur_level, method))) {
+            next_level = CompLevel_full_profile;
+          }
+        }
+      }
+      break;
    case CompLevel_full_profile:
      {
        MethodData* mdo = method->method_data();
@ -382,17 +733,15 @@ CompLevel SimpleThresholdPolicy::common(Predicate p, Method* method, CompLevel c
        }
      }
      break;
-    default:
-      break;
    }
  }
  return MIN2(next_level, (CompLevel)TieredStopAtLevel);
 }

 // Determine if a method should be compiled with a normal entry point at a different level.
-CompLevel SimpleThresholdPolicy::call_event(Method* method,  CompLevel cur_level, JavaThread* thread) {
+CompLevel SimpleThresholdPolicy::call_event(Method* method, CompLevel cur_level, JavaThread * thread) {
  CompLevel osr_level = MIN2((CompLevel) method->highest_osr_comp_level(),
-                             common(&SimpleThresholdPolicy::loop_predicate, method, cur_level));
+                             common(&SimpleThresholdPolicy::loop_predicate, method, cur_level, true));
  CompLevel next_level = common(&SimpleThresholdPolicy::call_predicate, method, cur_level);

  // If OSR method level is greater than the regular method level, the levels should be
@ -417,7 +766,7 @@ CompLevel SimpleThresholdPolicy::call_event(Method* method,  CompLevel cur_level

 // Determine if we should do an OSR compilation of a given method.
 CompLevel SimpleThresholdPolicy::loop_event(Method* method, CompLevel cur_level, JavaThread* thread) {
-  CompLevel next_level = common(&SimpleThresholdPolicy::loop_predicate, method, cur_level);
+  CompLevel next_level = common(&SimpleThresholdPolicy::loop_predicate, method, cur_level, true);
  if (cur_level == CompLevel_none) {
    // If there is a live OSR method that means that we deopted to the interpreter
    // for the transition.
@ -434,13 +783,39 @@ CompLevel SimpleThresholdPolicy::loop_event(Method* method, CompLevel cur_level,
  return next_level;
 }

+bool SimpleThresholdPolicy::maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread) {
+  if (UseAOT && !delay_compilation_during_startup()) {
+    if (cur_level == CompLevel_full_profile || cur_level == CompLevel_none) {
+      // If the current level is full profile or interpreter and we're switching to any other level,
+      // activate the AOT code back first so that we won't waste time overprofiling.
+      compile(mh, InvocationEntryBci, CompLevel_aot, thread);
+      // Fall through for JIT compilation.
+    }
+    if (next_level == CompLevel_limited_profile && cur_level != CompLevel_aot && mh->has_aot_code()) {
+      // If the next level is limited profile, use the aot code (if there is any),
+      // since it's essentially the same thing.
+      compile(mh, InvocationEntryBci, CompLevel_aot, thread);
+      // Not need to JIT, we're done.
+      return true;
+    }
+  }
+  return false;
+}
+

 // Handle the invocation event.
 void SimpleThresholdPolicy::method_invocation_event(const methodHandle& mh, const methodHandle& imh,
-                                              CompLevel level, CompiledMethod* nm, JavaThread* thread) {
-  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) {
-    CompLevel next_level = call_event(mh(), level, thread);
-    if (next_level != level) {
+                                                      CompLevel level, CompiledMethod* nm, JavaThread* thread) {
+  if (should_create_mdo(mh(), level)) {
+    create_mdo(mh, thread);
+  }
+  CompLevel next_level = call_event(mh(), level, thread);
+  if (next_level != level) {
+    if (maybe_switch_to_aot(mh, level, next_level, thread)) {
+      // No JITting necessary
+      return;
+    }
+    if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) {
      compile(mh, InvocationEntryBci, next_level, thread);
    }
  }
@ -450,25 +825,77 @@ void SimpleThresholdPolicy::method_invocation_event(const methodHandle& mh, cons
 // with a regular entry from here.
 void SimpleThresholdPolicy::method_back_branch_event(const methodHandle& mh, const methodHandle& imh,
                                                     int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread) {
-  // If the method is already compiling, quickly bail out.
-  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh)) {
-    // Use loop event as an opportunity to also check there's been
-    // enough calls.
-    CompLevel cur_level = comp_level(mh());
-    CompLevel next_level = call_event(mh(), cur_level, thread);
-    CompLevel next_osr_level = loop_event(mh(), level, thread);
+  if (should_create_mdo(mh(), level)) {
+    create_mdo(mh, thread);
+  }
+  // Check if MDO should be created for the inlined method
+  if (should_create_mdo(imh(), level)) {
+    create_mdo(imh, thread);
+  }

-    next_level = MAX2(next_level,
-                      next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
-    bool is_compiling = false;
-    if (next_level != cur_level) {
-      compile(mh, InvocationEntryBci, next_level, thread);
-      is_compiling = true;
+  if (is_compilation_enabled()) {
+    CompLevel next_osr_level = loop_event(imh(), level, thread);
+    CompLevel max_osr_level = (CompLevel)imh->highest_osr_comp_level();
+    // At the very least compile the OSR version
+    if (!CompileBroker::compilation_is_in_queue(imh) && (next_osr_level != level)) {
+      compile(imh, bci, next_osr_level, thread);
    }

-    // Do the OSR version
-    if (!is_compiling && next_osr_level != level) {
-      compile(mh, bci, next_osr_level, thread);
+    // Use loop event as an opportunity to also check if there's been
+    // enough calls.
+    CompLevel cur_level, next_level;
+    if (mh() != imh()) { // If there is an enclosing method
+      if (level == CompLevel_aot) {
+        // Recompile the enclosing method to prevent infinite OSRs. Stay at AOT level while it's compiling.
+        if (max_osr_level != CompLevel_none && !CompileBroker::compilation_is_in_queue(mh)) {
+          compile(mh, InvocationEntryBci, MIN2((CompLevel)TieredStopAtLevel, CompLevel_full_profile), thread);
+        }
+      } else {
+        // Current loop event level is not AOT
+        guarantee(nm != NULL, "Should have nmethod here");
+        cur_level = comp_level(mh());
+        next_level = call_event(mh(), cur_level, thread);
+
+        if (max_osr_level == CompLevel_full_optimization) {
+          // The inlinee OSRed to full opt, we need to modify the enclosing method to avoid deopts
+          bool make_not_entrant = false;
+          if (nm->is_osr_method()) {
+            // This is an osr method, just make it not entrant and recompile later if needed
+            make_not_entrant = true;
+          } else {
+            if (next_level != CompLevel_full_optimization) {
+              // next_level is not full opt, so we need to recompile the
+              // enclosing method without the inlinee
+              cur_level = CompLevel_none;
+              make_not_entrant = true;
+            }
+          }
+          if (make_not_entrant) {
+            if (PrintTieredEvents) {
+              int osr_bci = nm->is_osr_method() ? nm->osr_entry_bci() : InvocationEntryBci;
+              print_event(MAKE_NOT_ENTRANT, mh(), mh(), osr_bci, level);
+            }
+            nm->make_not_entrant();
+          }
+        }
+        // Fix up next_level if necessary to avoid deopts
+        if (next_level == CompLevel_limited_profile && max_osr_level == CompLevel_full_profile) {
+          next_level = CompLevel_full_profile;
+        }
+        if (cur_level != next_level) {
+          if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) {
+            compile(mh, InvocationEntryBci, next_level, thread);
+          }
+        }
+      }
+    } else {
+      cur_level = comp_level(mh());
+      next_level = call_event(mh(), cur_level, thread);
+      if (next_level != cur_level) {
+        if (!maybe_switch_to_aot(mh, cur_level, next_level, thread) && !CompileBroker::compilation_is_in_queue(mh)) {
+          compile(mh, InvocationEntryBci, next_level, thread);
+        }
+      }
    }
  }
 }
--- a/src/hotspot/share/runtime/simpleThresholdPolicy.hpp
+++ b/src/hotspot/share/runtime/simpleThresholdPolicy.hpp
@ -34,8 +34,136 @@

 class CompileTask;
 class CompileQueue;
+/*
+ *  The system supports 5 execution levels:
+ *  * level 0 - interpreter
+ *  * level 1 - C1 with full optimization (no profiling)
+ *  * level 2 - C1 with invocation and backedge counters
+ *  * level 3 - C1 with full profiling (level 2 + MDO)
+ *  * level 4 - C2
+ *
+ * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters
+ * (invocation counters and backedge counters). The frequency of these notifications is
+ * different at each level. These notifications are used by the policy to decide what transition
+ * to make.
+ *
+ * Execution starts at level 0 (interpreter), then the policy can decide either to compile the
+ * method at level 3 or level 2. The decision is based on the following factors:
+ *    1. The length of the C2 queue determines the next level. The observation is that level 2
+ * is generally faster than level 3 by about 30%, therefore we would want to minimize the time
+ * a method spends at level 3. We should only spend the time at level 3 that is necessary to get
+ * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to
+ * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile
+ * request makes its way through the long queue. When the load on C2 recedes we are going to
+ * recompile at level 3 and start gathering profiling information.
+ *    2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce
+ * additional filtering if the compiler is overloaded. The rationale is that by the time a
+ * method gets compiled it can become unused, so it doesn't make sense to put too much onto the
+ * queue.
+ *
+ * After profiling is completed at level 3 the transition is made to level 4. Again, the length
+ * of the C2 queue is used as a feedback to adjust the thresholds.
+ *
+ * After the first C1 compile some basic information is determined about the code like the number
+ * of the blocks and the number of the loops. Based on that it can be decided that a method
+ * is trivial and compiling it with C1 will yield the same code. In this case the method is
+ * compiled at level 1 instead of 4.
+ *
+ * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of
+ * the code and the C2 queue is sufficiently small we can decide to start profiling in the
+ * interpreter (and continue profiling in the compiled code once the level 3 version arrives).
+ * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2
+ * version is compiled instead in order to run faster waiting for a level 4 version.
+ *
+ * Compile queues are implemented as priority queues - for each method in the queue we compute
+ * the event rate (the number of invocation and backedge counter increments per unit of time).
+ * When getting an element off the queue we pick the one with the largest rate. Maintaining the
+ * rate also allows us to remove stale methods (the ones that got on the queue but stopped
+ * being used shortly after that).
+*/
+
+/* Command line options:
+ * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method
+ *   invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread
+ *   makes a call into the runtime.
+ *
+ * - Tier?InvocationThreshold, Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control
+ *   compilation thresholds.
+ *   Level 2 thresholds are not used and are provided for option-compatibility and potential future use.
+ *   Other thresholds work as follows:
+ *
+ *   Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when
+ *   the following predicate is true (X is the level):
+ *
+ *   i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s  && i + b > TierXCompileThreshold * s),
+ *
+ *   where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling
+ *   coefficient that will be discussed further.
+ *   The intuition is to equalize the time that is spend profiling each method.
+ *   The same predicate is used to control the transition from level 3 to level 4 (C2). It should be
+ *   noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come
+ *   from Method* and for 3->4 transition they come from MDO (since profiled invocations are
+ *   counted separately). Finally, if a method does not contain anything worth profiling, a transition
+ *   from level 3 to level 4 occurs without considering thresholds (e.g., with fewer invocations than
+ *   what is specified by Tier4InvocationThreshold).
+ *
+ *   OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates.
+ *
+ * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending
+ *   on the compiler load. The scaling coefficients are computed as follows:
+ *
+ *   s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1,
+ *
+ *   where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X
+ *   is the number of level X compiler threads.
+ *
+ *   Basically these parameters describe how many methods should be in the compile queue
+ *   per compiler thread before the scaling coefficient increases by one.
+ *
+ *   This feedback provides the mechanism to automatically control the flow of compilation requests
+ *   depending on the machine speed, mutator load and other external factors.
+ *
+ * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop.
+ *   Consider the following observation: a method compiled with full profiling (level 3)
+ *   is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO).
+ *   Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue
+ *   gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues
+ *   executing at level 3 for much longer time than is required by the predicate and at suboptimal speed.
+ *   The idea is to dynamically change the behavior of the system in such a way that if a substantial
+ *   load on C2 is detected we would first do the 0->2 transition allowing a method to run faster.
+ *   And then when the load decreases to allow 2->3 transitions.
+ *
+ *   Tier3Delay* parameters control this switching mechanism.
+ *   Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy
+ *   no longer does 0->3 transitions but does 0->2 transitions instead.
+ *   Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue
+ *   per compiler thread falls below the specified amount.
+ *   The hysteresis is necessary to avoid jitter.
+ *
+ * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue.
+ *   Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to
+ *   compile from the compile queue, we also can detect stale methods for which the rate has been
+ *   0 for some time in the same iteration. Stale methods can appear in the queue when an application
+ *   abruptly changes its behavior.
+ *
+ * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick
+ *   to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything
+ *   with pure c1.
+ *
+ * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the
+ *   0->3 predicate are already exceeded by the given percentage but the level 3 version of the
+ *   method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled
+ *   version in time. This reduces the overall transition to level 4 and decreases the startup time.
+ *   Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long
+ *   these is not reason to start profiling prematurely.
+ *
+ * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation.
+ *   Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered
+ *   to be zero if no events occurred in TieredRateUpdateMaxTime.
+ */

 class SimpleThresholdPolicy : public CompilationPolicy {
+  jlong _start_time;
  int _c1_count, _c2_count;

  // Check if the counter is big enough and set carry (effectively infinity).
@ -49,7 +177,7 @@ class SimpleThresholdPolicy : public CompilationPolicy {
  bool call_predicate(int i, int b, CompLevel cur_level, Method* method);
  bool loop_predicate(int i, int b, CompLevel cur_level, Method* method);
  // Common transition function. Given a predicate determines if a method should transition to another level.
-  CompLevel common(Predicate p, Method* method, CompLevel cur_level);
+  CompLevel common(Predicate p, Method* method, CompLevel cur_level, bool disable_feedback = false);
  // Transition functions.
  // call_event determines if a method should be compiled at a different
  // level with a regular invocation entry.
@ -58,6 +186,35 @@ class SimpleThresholdPolicy : public CompilationPolicy {
  // level.
  CompLevel loop_event(Method* method, CompLevel cur_level, JavaThread* thread);
  void print_counters(const char* prefix, const methodHandle& mh);
+  // Has a method been long around?
+  // We don't remove old methods from the compile queue even if they have
+  // very low activity (see select_task()).
+  inline bool is_old(Method* method);
+  // Was a given method inactive for a given number of milliseconds.
+  // If it is, we would remove it from the queue (see select_task()).
+  inline bool is_stale(jlong t, jlong timeout, Method* m);
+  // Compute the weight of the method for the compilation scheduling
+  inline double weight(Method* method);
+  // Apply heuristics and return true if x should be compiled before y
+  inline bool compare_methods(Method* x, Method* y);
+  // Compute event rate for a given method. The rate is the number of event (invocations + backedges)
+  // per millisecond.
+  inline void update_rate(jlong t, Method* m);
+  // Compute threshold scaling coefficient
+  inline double threshold_scale(CompLevel level, int feedback_k);
+  // If a method is old enough and is still in the interpreter we would want to
+  // start profiling without waiting for the compiled method to arrive. This function
+  // determines whether we should do that.
+  inline bool should_create_mdo(Method* method, CompLevel cur_level);
+  // Create MDO if necessary.
+  void create_mdo(const methodHandle& mh, JavaThread* thread);
+  // Is method profiled enough?
+  bool is_method_profiled(Method* method);
+
+  double _increase_threshold_at_ratio;
+
+  bool maybe_switch_to_aot(const methodHandle& mh, CompLevel cur_level, CompLevel next_level, JavaThread* thread);
+
 protected:
  int c1_count() const     { return _c1_count; }
  int c2_count() const     { return _c2_count; }
@ -67,7 +224,7 @@ protected:
  enum EventType { CALL, LOOP, COMPILE, REMOVE_FROM_QUEUE, UPDATE_IN_QUEUE, REPROFILE, MAKE_NOT_ENTRANT };
  void print_event(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level);
  // Print policy-specific information if necessary
-  virtual void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level) { }
+  virtual void print_specific(EventType type, const methodHandle& mh, const methodHandle& imh, int bci, CompLevel level);
  // Check if the method can be compiled, change level if necessary
  void compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread);
  // Submit a given method for compilation
@ -87,8 +244,13 @@ protected:
                                       CompLevel level, CompiledMethod* nm, JavaThread* thread);
  virtual void method_back_branch_event(const methodHandle& method, const methodHandle& inlinee,
                                        int bci, CompLevel level, CompiledMethod* nm, JavaThread* thread);
+
+  void set_increase_threshold_at_ratio() { _increase_threshold_at_ratio = 100 / (100 - (double)IncreaseFirstTierCompileThresholdAt); }
+  void set_start_time(jlong t) { _start_time = t;    }
+  jlong start_time() const     { return _start_time; }
+
 public:
-  SimpleThresholdPolicy() : _c1_count(0), _c2_count(0) { }
+  SimpleThresholdPolicy() : _start_time(0), _c1_count(0), _c2_count(0) { }
  virtual int compiler_count(CompLevel comp_level) {
    if (is_c1_compile(comp_level)) return c1_count();
    if (is_c2_compile(comp_level)) return c2_count();
@ -107,11 +269,7 @@ public:
  virtual bool is_mature(Method* method);
  // Initialize: set compiler thread count
  virtual void initialize();
-  virtual bool should_not_inline(ciEnv* env, ciMethod* callee) {
-    return (env->comp_level() == CompLevel_limited_profile ||
-            env->comp_level() == CompLevel_full_profile) &&
-            callee->has_loops();
-  }
+  virtual bool should_not_inline(ciEnv* env, ciMethod* callee);
 };

 #endif // TIERED
--- a/test/hotspot/jtreg/compiler/aot/RecompilationTest.java
+++ b/test/hotspot/jtreg/compiler/aot/RecompilationTest.java
@ -37,26 +37,12 @@
 *     -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:.
 *     -extraopt -XX:-UseCompressedOops
 *     -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::*
- * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:CompilationPolicyChoice=2
- *     -XX:-UseCounterDecay -XX:-UseCompressedOops
- *     -XX:-Inline
- *     -XX:AOTLibrary=./libRecompilationTest1.so -Xbootclasspath/a:.
- *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *     -Dcompiler.aot.RecompilationTest.check_level=1
- *     compiler.aot.RecompilationTest
 * @run driver compiler.aot.AotCompiler -libname libRecompilationTest2.so
 *     -class compiler.whitebox.SimpleTestCaseHelper
 *     -extraopt -Dgraal.TieredAOT=false
 *     -extraopt -XX:+UnlockDiagnosticVMOptions -extraopt -XX:+WhiteBoxAPI -extraopt -Xbootclasspath/a:.
 *     -extraopt -XX:-UseCompressedOops
 *     -extraopt -XX:CompileCommand=dontinline,compiler.whitebox.SimpleTestCaseHelper::*
- * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:+TieredCompilation -XX:CompilationPolicyChoice=2
- *     -XX:-UseCounterDecay -XX:-UseCompressedOops
- *     -XX:-Inline
- *     -XX:AOTLibrary=./libRecompilationTest2.so -Xbootclasspath/a:.
- *     -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI
- *     -Dcompiler.aot.RecompilationTest.check_level=-1
- *     compiler.aot.RecompilationTest
 * @run main/othervm -Xmixed -Xbatch -XX:+UseAOT -XX:-TieredCompilation
 *     -XX:-UseCounterDecay -XX:-UseCompressedOops
 *     -XX:-Inline
--- a/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java
+++ b/test/hotspot/jtreg/compiler/tiered/ConstantGettersTransitionsTest.java
@ -34,7 +34,6 @@
 * @run main/othervm/timeout=240 -Xmixed -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions
 *      -XX:+WhiteBoxAPI -XX:+TieredCompilation -XX:-UseCounterDecay
 *      -XX:CompileCommand=compileonly,compiler.tiered.ConstantGettersTransitionsTest$ConstantGettersTestCase$TrivialMethods::*
- *      compiler.tiered.TransitionsTestExecutor
 *      compiler.tiered.ConstantGettersTransitionsTest
 */

@ -200,4 +199,4 @@ public class ConstantGettersTransitionsTest extends LevelTransitionTest {
            }
        }
    }
-}
+}
--- a/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java
+++ b/test/hotspot/jtreg/compiler/tiered/LevelTransitionTest.java
@ -36,7 +36,6 @@
 *                   -XX:+WhiteBoxAPI -XX:+TieredCompilation -XX:-UseCounterDecay
 *                   -XX:CompileCommand=compileonly,compiler.whitebox.SimpleTestCaseHelper::*
 *                   -XX:CompileCommand=compileonly,compiler.tiered.LevelTransitionTest$ExtendedTestCase$CompileMethodHolder::*
- *                   compiler.tiered.TransitionsTestExecutor
 *                   compiler.tiered.LevelTransitionTest
 */

--- a/test/hotspot/jtreg/compiler/tiered/TransitionsTestExecutor.java
+++ b/test/hotspot/jtreg/compiler/tiered/TransitionsTestExecutor.java
@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-package compiler.tiered;
-
-import compiler.whitebox.CompilerWhiteBoxTest;
-import jdk.test.lib.process.OutputAnalyzer;
-import jdk.test.lib.process.ProcessTools;
-
-import java.lang.management.ManagementFactory;
-import java.lang.management.RuntimeMXBean;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Executes given test in a separate VM with enabled Tiered Compilation for
- * CompilationPolicyChoice 2 and 3
- */
-public class TransitionsTestExecutor {
-    public static void main(String[] args) throws Throwable {
-        if (CompilerWhiteBoxTest.skipOnTieredCompilation(false)) {
-            return;
-        }
-        if (args.length != 1) {
-            throw new Error("TESTBUG: Test name should be specified");
-        }
-        executeTestFor(2, args[0]);
-        executeTestFor(3, args[0]);
-    }
-
-    private static void executeTestFor(int compilationPolicy, String testName) throws Throwable {
-        String policy = "-XX:CompilationPolicyChoice=" + compilationPolicy;
-
-        // Get runtime arguments including VM options given to this executor
-        RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
-        List<String> vmArgs = runtime.getInputArguments();
-
-        // Construct execution command with compilation policy choice and test name
-        List<String> args = new ArrayList<>(vmArgs);
-        Collections.addAll(args, policy, testName);
-
-        OutputAnalyzer out = ProcessTools.executeTestJvm(args.toArray(new String[args.size()]));
-        out.shouldHaveExitValue(0);
-    }
-}