7020403: Add AdvancedCompilationPolicy for tiered

Fri, 04 Mar 2011 15:14:16 -0800

author
iveresov
date
Fri, 04 Mar 2011 15:14:16 -0800
changeset 2630
5d8f5a6dced7
parent 2607
8c9c9ee30d71
child 2632
8ec5e1f45ea1

7020403: Add AdvancedCompilationPolicy for tiered
Summary: This implements adaptive tiered compilation policy.
Reviewed-by: kvn, never

src/share/vm/oops/methodKlass.cpp file | annotate | diff | comparison | revisions
src/share/vm/oops/methodOop.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/advancedThresholdPolicy.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/advancedThresholdPolicy.hpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/arguments.cpp file | annotate | diff | comparison | revisions
src/share/vm/runtime/compilationPolicy.cpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/oops/methodKlass.cpp	Thu Mar 03 23:31:45 2011 -0800
     1.2 +++ b/src/share/vm/oops/methodKlass.cpp	Fri Mar 04 15:14:16 2011 -0800
     1.3 @@ -103,6 +103,12 @@
     1.4    m->backedge_counter()->init();
     1.5    m->clear_number_of_breakpoints();
     1.6  
     1.7 +#ifdef TIERED
     1.8 +  m->set_rate(0);
     1.9 +  m->set_prev_event_count(0);
    1.10 +  m->set_prev_time(0);
    1.11 +#endif
    1.12 +
    1.13    assert(m->is_parsable(), "must be parsable here.");
    1.14    assert(m->size() == size, "wrong size for object");
    1.15    // We should not publish an uprasable object's reference
     2.1 --- a/src/share/vm/oops/methodOop.hpp	Thu Mar 03 23:31:45 2011 -0800
     2.2 +++ b/src/share/vm/oops/methodOop.hpp	Fri Mar 04 15:14:16 2011 -0800
     2.3 @@ -84,6 +84,11 @@
     2.4  // | invocation_counter                                   |
     2.5  // | backedge_counter                                     |
     2.6  // |------------------------------------------------------|
     2.7 +// |           prev_time (tiered only, 64 bit wide)       |
     2.8 +// |                                                      |
     2.9 +// |------------------------------------------------------|
    2.10 +// |                  rate (tiered)                       |
    2.11 +// |------------------------------------------------------|
    2.12  // | code                           (pointer)             |
    2.13  // | i2i                            (pointer)             |
    2.14  // | adapter                        (pointer)             |
    2.15 @@ -124,6 +129,11 @@
    2.16    InvocationCounter _invocation_counter;         // Incremented before each activation of the method - used to trigger frequency-based optimizations
    2.17    InvocationCounter _backedge_counter;           // Incremented before each backedge taken - used to trigger frequencey-based optimizations
    2.18  
    2.19 +#ifdef TIERED
    2.20 +  jlong             _prev_time;                   // Previous time the rate was acquired
    2.21 +  float             _rate;                        // Events (invocation and backedge counter increments) per millisecond
    2.22 +#endif
    2.23 +
    2.24  #ifndef PRODUCT
    2.25    int               _compiled_invocation_count;  // Number of nmethod invocations so far (for perf. debugging)
    2.26  #endif
    2.27 @@ -304,6 +314,17 @@
    2.28    InvocationCounter* invocation_counter() { return &_invocation_counter; }
    2.29    InvocationCounter* backedge_counter()   { return &_backedge_counter; }
    2.30  
    2.31 +#ifdef TIERED
    2.32 +  // We are reusing interpreter_invocation_count as a holder for the previous event count!
    2.33 +  // We can do that since interpreter_invocation_count is not used in tiered.
    2.34 +  int prev_event_count() const                   { return _interpreter_invocation_count;  }
    2.35 +  void set_prev_event_count(int count)           { _interpreter_invocation_count = count; }
    2.36 +  jlong prev_time() const                        { return _prev_time; }
    2.37 +  void set_prev_time(jlong time)                 { _prev_time = time; }
    2.38 +  float rate() const                             { return _rate; }
    2.39 +  void set_rate(float rate)                      { _rate = rate; }
    2.40 +#endif
    2.41 +
    2.42    int invocation_count();
    2.43    int backedge_count();
    2.44  
     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Fri Mar 04 15:14:16 2011 -0800
     3.3 @@ -0,0 +1,450 @@
     3.4 +/*
     3.5 +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
     3.6 +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
     3.7 +*/
     3.8 +
     3.9 +#include "precompiled.hpp"
    3.10 +#include "runtime/advancedThresholdPolicy.hpp"
    3.11 +#include "runtime/simpleThresholdPolicy.inline.hpp"
    3.12 +
    3.13 +#ifdef TIERED
    3.14 +// Print an event.
    3.15 +void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, methodHandle imh,
    3.16 +                                             int bci, CompLevel level) {
    3.17 +  tty->print(" rate: ");
    3.18 +  if (mh->prev_time() == 0) tty->print("n/a");
    3.19 +  else tty->print("%f", mh->rate());
    3.20 +
    3.21 +  tty->print(" k: %.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback),
    3.22 +                                threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback));
    3.23 +
    3.24 +}
    3.25 +
    3.26 +void AdvancedThresholdPolicy::initialize() {
    3.27 +  // Turn on ergonomic compiler count selection
    3.28 +  if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
    3.29 +    FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
    3.30 +  }
    3.31 +  int count = CICompilerCount;
    3.32 +  if (CICompilerCountPerCPU) {
    3.33 +    // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
    3.34 +    int log_cpu = log2_intptr(os::active_processor_count());
    3.35 +    int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
    3.36 +    count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
    3.37 +  }
    3.38 +
    3.39 +  set_c1_count(MAX2(count / 3, 1));
    3.40 +  set_c2_count(MAX2(count - count / 3, 1));
    3.41 +
    3.42 +  // Some inlining tuning
    3.43 +#ifdef X86
    3.44 +  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
    3.45 +    FLAG_SET_DEFAULT(InlineSmallCode, 2000);
    3.46 +  }
    3.47 +#endif
    3.48 +
    3.49 +#ifdef SPARC
    3.50 +  if (FLAG_IS_DEFAULT(InlineSmallCode)) {
    3.51 +    FLAG_SET_DEFAULT(InlineSmallCode, 2500);
    3.52 +  }
    3.53 +#endif
    3.54 +
    3.55 +
    3.56 +  set_start_time(os::javaTimeMillis());
    3.57 +}
    3.58 +
    3.59 +// update_rate() is called from select_task() while holding a compile queue lock.
    3.60 +void AdvancedThresholdPolicy::update_rate(jlong t, methodOop m) {
    3.61 +  if (is_old(m)) {
    3.62 +    // We don't remove old methods from the queue,
    3.63 +    // so we can just zero the rate.
    3.64 +    m->set_rate(0);
    3.65 +    return;
    3.66 +  }
    3.67 +
    3.68 +  // We don't update the rate if we've just came out of a safepoint.
    3.69 +  // delta_s is the time since last safepoint in milliseconds.
    3.70 +  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
    3.71 +  jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement
    3.72 +  // How many events were there since the last time?
    3.73 +  int event_count = m->invocation_count() + m->backedge_count();
    3.74 +  int delta_e = event_count - m->prev_event_count();
    3.75 +
    3.76 +  // We should be running for at least 1ms.
    3.77 +  if (delta_s >= TieredRateUpdateMinTime) {
    3.78 +    // And we must've taken the previous point at least 1ms before.
    3.79 +    if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) {
    3.80 +      m->set_prev_time(t);
    3.81 +      m->set_prev_event_count(event_count);
    3.82 +      m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond
    3.83 +    } else
    3.84 +      if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) {
    3.85 +        // If nothing happened for 25ms, zero the rate. Don't modify prev values.
    3.86 +        m->set_rate(0);
    3.87 +      }
    3.88 +  }
    3.89 +}
    3.90 +
    3.91 +// Check if this method has been stale from a given number of milliseconds.
    3.92 +// See select_task().
    3.93 +bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, methodOop m) {
    3.94 +  jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint();
    3.95 +  jlong delta_t = t - m->prev_time();
    3.96 +  if (delta_t > timeout && delta_s > timeout) {
    3.97 +    int event_count = m->invocation_count() + m->backedge_count();
    3.98 +    int delta_e = event_count - m->prev_event_count();
    3.99 +    // Return true if there were no events.
   3.100 +    return delta_e == 0;
   3.101 +  }
   3.102 +  return false;
   3.103 +}
   3.104 +
   3.105 +// We don't remove old methods from the compile queue even if they have
   3.106 +// very low activity. See select_task().
   3.107 +bool AdvancedThresholdPolicy::is_old(methodOop method) {
   3.108 +  return method->invocation_count() > 50000 || method->backedge_count() > 500000;
   3.109 +}
   3.110 +
   3.111 +double AdvancedThresholdPolicy::weight(methodOop method) {
   3.112 +  return (method->rate() + 1) * ((method->invocation_count() + 1) *  (method->backedge_count() + 1));
   3.113 +}
   3.114 +
   3.115 +// Apply heuristics and return true if x should be compiled before y
   3.116 +bool AdvancedThresholdPolicy::compare_methods(methodOop x, methodOop y) {
   3.117 +  if (x->highest_comp_level() > y->highest_comp_level()) {
   3.118 +    // recompilation after deopt
   3.119 +    return true;
   3.120 +  } else
   3.121 +    if (x->highest_comp_level() == y->highest_comp_level()) {
   3.122 +      if (weight(x) > weight(y)) {
   3.123 +        return true;
   3.124 +      }
   3.125 +    }
   3.126 +  return false;
   3.127 +}
   3.128 +
   3.129 +// Is method profiled enough?
   3.130 +bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) {
   3.131 +  methodDataOop mdo = method->method_data();
   3.132 +  if (mdo != NULL) {
   3.133 +    int i = mdo->invocation_count_delta();
   3.134 +    int b = mdo->backedge_count_delta();
   3.135 +    return call_predicate_helper<CompLevel_full_profile>(i, b, 1);
   3.136 +  }
   3.137 +  return false;
   3.138 +}
   3.139 +
   3.140 +// Called with the queue locked and with at least one element
   3.141 +CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
   3.142 +  CompileTask *max_task = NULL;
   3.143 +  methodOop max_method;
   3.144 +  jlong t = os::javaTimeMillis();
   3.145 +  // Iterate through the queue and find a method with a maximum rate.
   3.146 +  for (CompileTask* task = compile_queue->first(); task != NULL;) {
   3.147 +    CompileTask* next_task = task->next();
   3.148 +    methodOop method = (methodOop)JNIHandles::resolve(task->method_handle());
   3.149 +    methodDataOop mdo = method->method_data();
   3.150 +    update_rate(t, method);
   3.151 +    if (max_task == NULL) {
   3.152 +      max_task = task;
   3.153 +      max_method = method;
   3.154 +    } else {
   3.155 +      // If a method has been stale for some time, remove it from the queue.
   3.156 +      if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
   3.157 +        if (PrintTieredEvents) {
   3.158 +          print_event(KILL, method, method, task->osr_bci(), (CompLevel)task->comp_level());
   3.159 +        }
   3.160 +        CompileTaskWrapper ctw(task); // Frees the task
   3.161 +        compile_queue->remove(task);
   3.162 +        method->clear_queued_for_compilation();
   3.163 +        task = next_task;
   3.164 +        continue;
   3.165 +      }
   3.166 +
   3.167 +      // Select a method with a higher rate
   3.168 +      if (compare_methods(method, max_method)) {
   3.169 +        max_task = task;
   3.170 +        max_method = method;
   3.171 +      }
   3.172 +    }
   3.173 +    task = next_task;
   3.174 +  }
   3.175 +
   3.176 +  if (max_task->comp_level() == CompLevel_full_profile && is_method_profiled(max_method)) {
   3.177 +    max_task->set_comp_level(CompLevel_limited_profile);
   3.178 +    if (PrintTieredEvents) {
   3.179 +      print_event(UPDATE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level());
   3.180 +    }
   3.181 +  }
   3.182 +
   3.183 +  return max_task;
   3.184 +}
   3.185 +
   3.186 +double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) {
   3.187 +  double queue_size = CompileBroker::queue_size(level);
   3.188 +  int comp_count = compiler_count(level);
   3.189 +  double k = queue_size / (feedback_k * comp_count) + 1;
   3.190 +  return k;
   3.191 +}
   3.192 +
   3.193 +// Call and loop predicates determine whether a transition to a higher
   3.194 +// compilation level should be performed (pointers to predicate functions
   3.195 +// are passed to common()).
   3.196 +// Tier?LoadFeedback is basically a coefficient that determines of
   3.197 +// how many methods per compiler thread can be in the queue before
   3.198 +// the threshold values double.
   3.199 +bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level) {
   3.200 +  switch(cur_level) {
   3.201 +  case CompLevel_none:
   3.202 +  case CompLevel_limited_profile: {
   3.203 +    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
   3.204 +    return loop_predicate_helper<CompLevel_none>(i, b, k);
   3.205 +  }
   3.206 +  case CompLevel_full_profile: {
   3.207 +    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
   3.208 +    return loop_predicate_helper<CompLevel_full_profile>(i, b, k);
   3.209 +  }
   3.210 +  default:
   3.211 +    return true;
   3.212 +  }
   3.213 +}
   3.214 +
   3.215 +bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level) {
   3.216 +  switch(cur_level) {
   3.217 +  case CompLevel_none:
   3.218 +  case CompLevel_limited_profile: {
   3.219 +    double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback);
   3.220 +    return call_predicate_helper<CompLevel_none>(i, b, k);
   3.221 +  }
   3.222 +  case CompLevel_full_profile: {
   3.223 +    double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback);
   3.224 +    return call_predicate_helper<CompLevel_full_profile>(i, b, k);
   3.225 +  }
   3.226 +  default:
   3.227 +    return true;
   3.228 +  }
   3.229 +}
   3.230 +
   3.231 +// If a method is old enough and is still in the interpreter we would want to
   3.232 +// start profiling without waiting for the compiled method to arrive.
   3.233 +// We also take the load on compilers into the account.
   3.234 +bool AdvancedThresholdPolicy::should_create_mdo(methodOop method, CompLevel cur_level) {
   3.235 +  if (cur_level == CompLevel_none &&
   3.236 +      CompileBroker::queue_size(CompLevel_full_optimization) <=
   3.237 +      Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
   3.238 +    int i = method->invocation_count();
   3.239 +    int b = method->backedge_count();
   3.240 +    double k = Tier0ProfilingStartPercentage / 100.0;
   3.241 +    return call_predicate_helper<CompLevel_none>(i, b, k) || loop_predicate_helper<CompLevel_none>(i, b, k);
   3.242 +  }
   3.243 +  return false;
   3.244 +}
   3.245 +
   3.246 +// Create MDO if necessary.
   3.247 +void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) {
   3.248 +  if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return;
   3.249 +  if (mh->method_data() == NULL) {
   3.250 +    methodOopDesc::build_interpreter_method_data(mh, THREAD);
   3.251 +    if (HAS_PENDING_EXCEPTION) {
   3.252 +      CLEAR_PENDING_EXCEPTION;
   3.253 +    }
   3.254 +  }
   3.255 +}
   3.256 +
   3.257 +
   3.258 +/*
   3.259 + * Method states:
   3.260 + *   0 - interpreter (CompLevel_none)
   3.261 + *   1 - pure C1 (CompLevel_simple)
   3.262 + *   2 - C1 with invocation and backedge counting (CompLevel_limited_profile)
   3.263 + *   3 - C1 with full profiling (CompLevel_full_profile)
   3.264 + *   4 - C2 (CompLevel_full_optimization)
   3.265 + *
   3.266 + * Common state transition patterns:
   3.267 + * a. 0 -> 3 -> 4.
   3.268 + *    The most common path. But note that even in this straightforward case
   3.269 + *    profiling can start at level 0 and finish at level 3.
   3.270 + *
   3.271 + * b. 0 -> 2 -> 3 -> 4.
   3.272 + *    This case occures when the load on C2 is deemed too high. So, instead of transitioning
   3.273 + *    into state 3 directly and over-profiling while a method is in the C2 queue we transition to
   3.274 + *    level 2 and wait until the load on C2 decreases. This path is disabled for OSRs.
   3.275 + *
   3.276 + * c. 0 -> (3->2) -> 4.
   3.277 + *    In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough
   3.278 + *    to enable the profiling to fully occur at level 0. In this case we change the compilation level
   3.279 + *    of the method to 2, because it'll allow it to run much faster without full profiling while c2
   3.280 + *    is compiling.
   3.281 + *
   3.282 + * d. 0 -> 3 -> 1 or 0 -> 2 -> 1.
   3.283 + *    After a method was once compiled with C1 it can be identified as trivial and be compiled to
   3.284 + *    level 1. These transition can also occur if a method can't be compiled with C2 but can with C1.
   3.285 + *
   3.286 + * e. 0 -> 4.
   3.287 + *    This can happen if a method fails C1 compilation (it will still be profiled in the interpreter)
   3.288 + *    or because of a deopt that didn't require reprofiling (compilation won't happen in this case because
   3.289 + *    the compiled version already exists).
   3.290 + *
   3.291 + * Note that since state 0 can be reached from any other state via deoptimization different loops
   3.292 + * are possible.
   3.293 + *
   3.294 + */
   3.295 +
   3.296 +// Common transition function. Given a predicate determines if a method should transition to another level.
   3.297 +CompLevel AdvancedThresholdPolicy::common(Predicate p, methodOop method, CompLevel cur_level) {
   3.298 +  if (is_trivial(method)) return CompLevel_simple;
   3.299 +
   3.300 +  CompLevel next_level = cur_level;
   3.301 +  int i = method->invocation_count();
   3.302 +  int b = method->backedge_count();
   3.303 +
   3.304 +  switch(cur_level) {
   3.305 +  case CompLevel_none:
   3.306 +    // If we were at full profile level, would we switch to full opt?
   3.307 +    if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) {
   3.308 +      next_level = CompLevel_full_optimization;
   3.309 +    } else if ((this->*p)(i, b, cur_level)) {
   3.310 +      // C1-generated fully profiled code is about 30% slower than the limited profile
   3.311 +      // code that has only invocation and backedge counters. The observation is that
   3.312 +      // if C2 queue is large enough we can spend too much time in the fully profiled code
   3.313 +      // while waiting for C2 to pick the method from the queue. To alleviate this problem
   3.314 +      // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long
   3.315 +      // we choose to compile a limited profiled version and then recompile with full profiling
   3.316 +      // when the load on C2 goes down.
   3.317 +      if (CompileBroker::queue_size(CompLevel_full_optimization) >
   3.318 +          Tier3DelayOn * compiler_count(CompLevel_full_optimization)) {
   3.319 +        next_level = CompLevel_limited_profile;
   3.320 +      } else {
   3.321 +        next_level = CompLevel_full_profile;
   3.322 +      }
   3.323 +    }
   3.324 +    break;
   3.325 +  case CompLevel_limited_profile:
   3.326 +    if (is_method_profiled(method)) {
   3.327 +      // Special case: we got here because this method was fully profiled in the interpreter.
   3.328 +      next_level = CompLevel_full_optimization;
   3.329 +    } else {
   3.330 +      methodDataOop mdo = method->method_data();
   3.331 +      if (mdo != NULL) {
   3.332 +        if (mdo->would_profile()) {
   3.333 +          if (CompileBroker::queue_size(CompLevel_full_optimization) <=
   3.334 +              Tier3DelayOff * compiler_count(CompLevel_full_optimization) &&
   3.335 +              (this->*p)(i, b, cur_level)) {
   3.336 +            next_level = CompLevel_full_profile;
   3.337 +          }
   3.338 +        } else {
   3.339 +          next_level = CompLevel_full_optimization;
   3.340 +        }
   3.341 +      }
   3.342 +    }
   3.343 +    break;
   3.344 +  case CompLevel_full_profile:
   3.345 +    {
   3.346 +      methodDataOop mdo = method->method_data();
   3.347 +      if (mdo != NULL) {
   3.348 +        if (mdo->would_profile()) {
   3.349 +          int mdo_i = mdo->invocation_count_delta();
   3.350 +          int mdo_b = mdo->backedge_count_delta();
   3.351 +          if ((this->*p)(mdo_i, mdo_b, cur_level)) {
   3.352 +            next_level = CompLevel_full_optimization;
   3.353 +          }
   3.354 +        } else {
   3.355 +          next_level = CompLevel_full_optimization;
   3.356 +        }
   3.357 +      }
   3.358 +    }
   3.359 +    break;
   3.360 +  }
   3.361 +  return next_level;
   3.362 +}
   3.363 +
   3.364 +// Determine if a method should be compiled with a normal entry point at a different level.
   3.365 +CompLevel AdvancedThresholdPolicy::call_event(methodOop method,  CompLevel cur_level) {
   3.366 +  CompLevel osr_level = (CompLevel) method->highest_osr_comp_level();
   3.367 +  CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level);
   3.368 +
   3.369 +  // If OSR method level is greater than the regular method level, the levels should be
   3.370 +  // equalized by raising the regular method level in order to avoid OSRs during each
   3.371 +  // invocation of the method.
   3.372 +  if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) {
   3.373 +    methodDataOop mdo = method->method_data();
   3.374 +    guarantee(mdo != NULL, "MDO should not be NULL");
   3.375 +    if (mdo->invocation_count() >= 1) {
   3.376 +      next_level = CompLevel_full_optimization;
   3.377 +    }
   3.378 +  } else {
   3.379 +    next_level = MAX2(osr_level, next_level);
   3.380 +  }
   3.381 +
   3.382 +  return next_level;
   3.383 +}
   3.384 +
   3.385 +// Determine if we should do an OSR compilation of a given method.
   3.386 +CompLevel AdvancedThresholdPolicy::loop_event(methodOop method, CompLevel cur_level) {
   3.387 +  if (cur_level == CompLevel_none) {
   3.388 +    // If there is a live OSR method that means that we deopted to the interpreter
   3.389 +    // for the transition.
   3.390 +    CompLevel osr_level = (CompLevel)method->highest_osr_comp_level();
   3.391 +    if (osr_level > CompLevel_none) {
   3.392 +      return osr_level;
   3.393 +    }
   3.394 +  }
   3.395 +  return common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level);
   3.396 +}
   3.397 +
   3.398 +// Update the rate and submit compile
   3.399 +void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) {
   3.400 +  int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
   3.401 +  update_rate(os::javaTimeMillis(), mh());
   3.402 +  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD);
   3.403 +}
   3.404 +
   3.405 +
   3.406 +// Handle the invocation event.
   3.407 +void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh,
   3.408 +                                                      CompLevel level, TRAPS) {
   3.409 +  if (should_create_mdo(mh(), level)) {
   3.410 +    create_mdo(mh, THREAD);
   3.411 +  }
   3.412 +  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) {
   3.413 +    CompLevel next_level = call_event(mh(), level);
   3.414 +    if (next_level != level) {
   3.415 +      compile(mh, InvocationEntryBci, next_level, THREAD);
   3.416 +    }
   3.417 +  }
   3.418 +}
   3.419 +
   3.420 +// Handle the back branch event. Notice that we can compile the method
   3.421 +// with a regular entry from here.
   3.422 +void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh,
   3.423 +                                                       int bci, CompLevel level, TRAPS) {
   3.424 +  if (should_create_mdo(mh(), level)) {
   3.425 +    create_mdo(mh, THREAD);
   3.426 +  }
   3.427 +
   3.428 +  // If the method is already compiling, quickly bail out.
   3.429 +  if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) {
   3.430 +    // Use loop event as an opportinity to also check there's been
   3.431 +    // enough calls.
   3.432 +    CompLevel cur_level = comp_level(mh());
   3.433 +    CompLevel next_level = call_event(mh(), cur_level);
   3.434 +    CompLevel next_osr_level = loop_event(mh(), level);
   3.435 +    if (next_osr_level  == CompLevel_limited_profile) {
   3.436 +      next_osr_level = CompLevel_full_profile; // OSRs are supposed to be for very hot methods.
   3.437 +    }
   3.438 +    next_level = MAX2(next_level,
   3.439 +                      next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level);
   3.440 +    bool is_compiling = false;
   3.441 +    if (next_level != cur_level) {
   3.442 +      compile(mh, InvocationEntryBci, next_level, THREAD);
   3.443 +      is_compiling = true;
   3.444 +    }
   3.445 +
   3.446 +    // Do the OSR version
   3.447 +    if (!is_compiling && next_osr_level != level) {
   3.448 +      compile(mh, bci, next_osr_level, THREAD);
   3.449 +    }
   3.450 +  }
   3.451 +}
   3.452 +
   3.453 +#endif // TIERED
     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp	Fri Mar 04 15:14:16 2011 -0800
     4.3 @@ -0,0 +1,207 @@
     4.4 +/*
     4.5 +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved.
     4.6 +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
     4.7 +*/
     4.8 +
     4.9 +#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
    4.10 +#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
    4.11 +
    4.12 +#include "runtime/simpleThresholdPolicy.hpp"
    4.13 +
    4.14 +#ifdef TIERED
    4.15 +class CompileTask;
    4.16 +class CompileQueue;
    4.17 +
    4.18 +/*
    4.19 + *  The system supports 5 execution levels:
    4.20 + *  * level 0 - interpreter
    4.21 + *  * level 1 - C1 with full optimization (no profiling)
    4.22 + *  * level 2 - C1 with invocation and backedge counters
    4.23 + *  * level 3 - C1 with full profiling (level 2 + MDO)
    4.24 + *  * level 4 - C2
    4.25 + *
    4.26 + * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters
    4.27 + * (invocation counters and backedge counters). The frequency of these notifications is
    4.28 + * different at each level. These notifications are used by the policy to decide what transition
    4.29 + * to make.
    4.30 + *
    4.31 + * Execution starts at level 0 (interpreter), then the policy can decide either to compile the
    4.32 + * method at level 3 or level 2. The decision is based on the following factors:
    4.33 + *    1. The length of the C2 queue determines the next level. The observation is that level 2
    4.34 + * is generally faster than level 3 by about 30%, therefore we would want to minimize the time
    4.35 + * a method spends at level 3. We should only spend the time at level 3 that is necessary to get
    4.36 + * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to
    4.37 + * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile
    4.38 + * request makes its way through the long queue. When the load on C2 recedes we are going to
    4.39 + * recompile at level 3 and start gathering profiling information.
    4.40 + *    2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce
    4.41 + * additional filtering if the compiler is overloaded. The rationale is that by the time a
    4.42 + * method gets compiled it can become unused, so it doesn't make sense to put too much onto the
    4.43 + * queue.
    4.44 + *
    4.45 + * After profiling is completed at level 3 the transition is made to level 4. Again, the length
    4.46 + * of the C2 queue is used as a feedback to adjust the thresholds.
    4.47 + *
    4.48 + * After the first C1 compile some basic information is determined about the code like the number
    4.49 + * of the blocks and the number of the loops. Based on that it can be decided that a method
    4.50 + * is trivial and compiling it with C1 will yield the same code. In this case the method is
    4.51 + * compiled at level 1 instead of 4.
    4.52 + *
    4.53 + * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of
    4.54 + * the code and the C2 queue is sufficiently small we can decide to start profiling in the
    4.55 + * interpreter (and continue profiling in the compiled code once the level 3 version arrives).
    4.56 + * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2
    4.57 + * version is compiled instead in order to run faster waiting for a level 4 version.
    4.58 + *
    4.59 + * Compile queues are implemented as priority queues - for each method in the queue we compute
    4.60 + * the event rate (the number of invocation and backedge counter increments per unit of time).
    4.61 + * When getting an element off the queue we pick the one with the largest rate. Maintaining the
    4.62 + * rate also allows us to remove stale methods (the ones that got on the queue but stopped
    4.63 + * being used shortly after that).
    4.64 +*/
    4.65 +
    4.66 +/* Command line options:
    4.67 + * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method
    4.68 + *   invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread
    4.69 + *   makes a call into the runtime.
    4.70 + *
    4.71 + * - Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control
    4.72 + *   compilation thresholds.
    4.73 + *   Level 2 thresholds are not used and are provided for option-compatibility and potential future use.
    4.74 + *   Other thresholds work as follows:
    4.75 + *
    4.76 + *   Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when
    4.77 + *   the following predicate is true (X is the level):
    4.78 + *
    4.79 + *   i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s  && i + b > TierXCompileThreshold * s),
    4.80 + *
    4.81 + *   where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling
    4.82 + *   coefficient that will be discussed further.
    4.83 + *   The intuition is to equalize the time that is spend profiling each method.
    4.84 + *   The same predicate is used to control the transition from level 3 to level 4 (C2). It should be
    4.85 + *   noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come
    4.86 + *   from methodOop and for 3->4 transition they come from MDO (since profiled invocations are
    4.87 + *   counted separately).
    4.88 + *
    4.89 + *   OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates.
    4.90 + *
    4.91 + * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending
    4.92 + *   on the compiler load. The scaling coefficients are computed as follows:
    4.93 + *
    4.94 + *   s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1,
    4.95 + *
    4.96 + *   where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X
    4.97 + *   is the number of level X compiler threads.
    4.98 + *
    4.99 + *   Basically these parameters describe how many methods should be in the compile queue
   4.100 + *   per compiler thread before the scaling coefficient increases by one.
   4.101 + *
   4.102 + *   This feedback provides the mechanism to automatically control the flow of compilation requests
   4.103 + *   depending on the machine speed, mutator load and other external factors.
   4.104 + *
   4.105 + * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop.
   4.106 + *   Consider the following observation: a method compiled with full profiling (level 3)
   4.107 + *   is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO).
   4.108 + *   Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue
   4.109 + *   gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues
   4.110 + *   executing at level 3 for much longer time than is required by the predicate and at suboptimal speed.
   4.111 + *   The idea is to dynamically change the behavior of the system in such a way that if a substantial
   4.112 + *   load on C2 is detected we would first do the 0->2 transition allowing a method to run faster.
   4.113 + *   And then when the load decreases to allow 2->3 transitions.
   4.114 + *
   4.115 + *   Tier3Delay* parameters control this switching mechanism.
   4.116 + *   Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy
   4.117 + *   no longer does 0->3 transitions but does 0->2 transitions instead.
   4.118 + *   Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue
   4.119 + *   per compiler thread falls below the specified amount.
   4.120 + *   The hysteresis is necessary to avoid jitter.
   4.121 + *
   4.122 + * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue.
   4.123 + *   Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to
   4.124 + *   compile from the compile queue, we also can detect stale methods for which the rate has been
   4.125 + *   0 for some time in the same iteration. Stale methods can appear in the queue when an application
   4.126 + *   abruptly changes its behavior.
   4.127 + *
   4.128 + * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick
   4.129 + *   to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything
   4.130 + *   with pure c1.
   4.131 + *
   4.132 + * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the
   4.133 + *   0->3 predicate are already exceeded by the given percentage but the level 3 version of the
   4.134 + *   method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled
   4.135 + *   version in time. This reduces the overall transition to level 4 and decreases the startup time.
   4.136 + *   Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long
   4.137 + *   these is not reason to start profiling prematurely.
   4.138 + *
   4.139 + * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation.
   4.140 + *   Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered
   4.141 + *   to be zero if no events occurred in TieredRateUpdateMaxTime.
   4.142 + */
   4.143 +
   4.144 +
   4.145 +class AdvancedThresholdPolicy : public SimpleThresholdPolicy {
   4.146 +  jlong _start_time;
   4.147 +
   4.148 +  // Call and loop predicates determine whether a transition to a higher compilation
   4.149 +  // level should be performed (pointers to predicate functions are passed to common().
   4.150 +  // Predicates also take compiler load into account.
   4.151 +  typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level);
   4.152 +  bool call_predicate(int i, int b, CompLevel cur_level);
   4.153 +  bool loop_predicate(int i, int b, CompLevel cur_level);
   4.154 +  // Common transition function. Given a predicate determines if a method should transition to another level.
   4.155 +  CompLevel common(Predicate p, methodOop method, CompLevel cur_level);
   4.156 +  // Transition functions.
   4.157 +  // call_event determines if a method should be compiled at a different
   4.158 +  // level with a regular invocation entry.
   4.159 +  CompLevel call_event(methodOop method, CompLevel cur_level);
   4.160 +  // loop_event checks if a method should be OSR compiled at a different
   4.161 +  // level.
   4.162 +  CompLevel loop_event(methodOop method, CompLevel cur_level);
   4.163 +  // Has a method been long around?
   4.164 +  // We don't remove old methods from the compile queue even if they have
   4.165 +  // very low activity (see select_task()).
   4.166 +  inline bool is_old(methodOop method);
   4.167 +  // Was a given method inactive for a given number of milliseconds.
   4.168 +  // If it is, we would remove it from the queue (see select_task()).
   4.169 +  inline bool is_stale(jlong t, jlong timeout, methodOop m);
   4.170 +  // Compute the weight of the method for the compilation scheduling
   4.171 +  inline double weight(methodOop method);
   4.172 +  // Apply heuristics and return true if x should be compiled before y
   4.173 +  inline bool compare_methods(methodOop x, methodOop y);
   4.174 +  // Compute event rate for a given method. The rate is the number of event (invocations + backedges)
   4.175 +  // per millisecond.
   4.176 +  inline void update_rate(jlong t, methodOop m);
   4.177 +  // Compute threshold scaling coefficient
   4.178 +  inline double threshold_scale(CompLevel level, int feedback_k);
   4.179 +  // If a method is old enough and is still in the interpreter we would want to
   4.180 +  // start profiling without waiting for the compiled method to arrive. This function
   4.181 +  // determines whether we should do that.
   4.182 +  inline bool should_create_mdo(methodOop method, CompLevel cur_level);
   4.183 +  // Create MDO if necessary.
   4.184 +  void create_mdo(methodHandle mh, TRAPS);
   4.185 +  // Is method profiled enough?
   4.186 +  bool is_method_profiled(methodOop method);
   4.187 +
   4.188 +protected:
   4.189 +  void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level);
   4.190 +
   4.191 +  void set_start_time(jlong t) { _start_time = t;    }
   4.192 +  jlong start_time() const     { return _start_time; }
   4.193 +
   4.194 +  // Submit a given method for compilation (and update the rate).
   4.195 +  virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS);
   4.196 +  // event() from SimpleThresholdPolicy would call these.
   4.197 +  virtual void method_invocation_event(methodHandle method, methodHandle inlinee,
   4.198 +                                       CompLevel level, TRAPS);
   4.199 +  virtual void method_back_branch_event(methodHandle method, methodHandle inlinee,
   4.200 +                                        int bci, CompLevel level, TRAPS);
   4.201 +public:
   4.202 +  AdvancedThresholdPolicy() : _start_time(0) { }
   4.203 +  // Select task is called by CompileBroker. We should return a task or NULL.
   4.204 +  virtual CompileTask* select_task(CompileQueue* compile_queue);
   4.205 +  virtual void initialize();
   4.206 +};
   4.207 +
   4.208 +#endif // TIERED
   4.209 +
   4.210 +#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
     5.1 --- a/src/share/vm/runtime/arguments.cpp	Thu Mar 03 23:31:45 2011 -0800
     5.2 +++ b/src/share/vm/runtime/arguments.cpp	Fri Mar 04 15:14:16 2011 -0800
     5.3 @@ -1026,8 +1026,9 @@
     5.4  }
     5.5  
     5.6  void Arguments::set_tiered_flags() {
     5.7 +  // With tiered, set default policy to AdvancedThresholdPolicy, which is 3.
     5.8    if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) {
     5.9 -    FLAG_SET_DEFAULT(CompilationPolicyChoice, 2);
    5.10 +    FLAG_SET_DEFAULT(CompilationPolicyChoice, 3);
    5.11    }
    5.12    if (CompilationPolicyChoice < 2) {
    5.13      vm_exit_during_initialization(
     6.1 --- a/src/share/vm/runtime/compilationPolicy.cpp	Thu Mar 03 23:31:45 2011 -0800
     6.2 +++ b/src/share/vm/runtime/compilationPolicy.cpp	Fri Mar 04 15:14:16 2011 -0800
     6.3 @@ -1,5 +1,5 @@
     6.4  /*
     6.5 - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
     6.6 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
     6.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
     6.8   *
     6.9   * This code is free software; you can redistribute it and/or modify it
    6.10 @@ -32,6 +32,7 @@
    6.11  #include "oops/methodOop.hpp"
    6.12  #include "oops/oop.inline.hpp"
    6.13  #include "prims/nativeLookup.hpp"
    6.14 +#include "runtime/advancedThresholdPolicy.hpp"
    6.15  #include "runtime/compilationPolicy.hpp"
    6.16  #include "runtime/frame.hpp"
    6.17  #include "runtime/handles.inline.hpp"
    6.18 @@ -72,8 +73,15 @@
    6.19      Unimplemented();
    6.20  #endif
    6.21      break;
    6.22 +  case 3:
    6.23 +#ifdef TIERED
    6.24 +    CompilationPolicy::set_policy(new AdvancedThresholdPolicy());
    6.25 +#else
    6.26 +    Unimplemented();
    6.27 +#endif
    6.28 +    break;
    6.29    default:
    6.30 -    fatal("CompilationPolicyChoice must be in the range: [0-2]");
    6.31 +    fatal("CompilationPolicyChoice must be in the range: [0-3]");
    6.32    }
    6.33    CompilationPolicy::policy()->initialize();
    6.34  }

mercurial