Fri, 04 Mar 2011 15:14:16 -0800
7020403: Add AdvancedCompilationPolicy for tiered
Summary: This implements adaptive tiered compilation policy.
Reviewed-by: kvn, never
1.1 --- a/src/share/vm/oops/methodKlass.cpp Thu Mar 03 23:31:45 2011 -0800 1.2 +++ b/src/share/vm/oops/methodKlass.cpp Fri Mar 04 15:14:16 2011 -0800 1.3 @@ -103,6 +103,12 @@ 1.4 m->backedge_counter()->init(); 1.5 m->clear_number_of_breakpoints(); 1.6 1.7 +#ifdef TIERED 1.8 + m->set_rate(0); 1.9 + m->set_prev_event_count(0); 1.10 + m->set_prev_time(0); 1.11 +#endif 1.12 + 1.13 assert(m->is_parsable(), "must be parsable here."); 1.14 assert(m->size() == size, "wrong size for object"); 1.15 // We should not publish an uprasable object's reference
2.1 --- a/src/share/vm/oops/methodOop.hpp Thu Mar 03 23:31:45 2011 -0800 2.2 +++ b/src/share/vm/oops/methodOop.hpp Fri Mar 04 15:14:16 2011 -0800 2.3 @@ -84,6 +84,11 @@ 2.4 // | invocation_counter | 2.5 // | backedge_counter | 2.6 // |------------------------------------------------------| 2.7 +// | prev_time (tiered only, 64 bit wide) | 2.8 +// | | 2.9 +// |------------------------------------------------------| 2.10 +// | rate (tiered) | 2.11 +// |------------------------------------------------------| 2.12 // | code (pointer) | 2.13 // | i2i (pointer) | 2.14 // | adapter (pointer) | 2.15 @@ -124,6 +129,11 @@ 2.16 InvocationCounter _invocation_counter; // Incremented before each activation of the method - used to trigger frequency-based optimizations 2.17 InvocationCounter _backedge_counter; // Incremented before each backedge taken - used to trigger frequencey-based optimizations 2.18 2.19 +#ifdef TIERED 2.20 + jlong _prev_time; // Previous time the rate was acquired 2.21 + float _rate; // Events (invocation and backedge counter increments) per millisecond 2.22 +#endif 2.23 + 2.24 #ifndef PRODUCT 2.25 int _compiled_invocation_count; // Number of nmethod invocations so far (for perf. debugging) 2.26 #endif 2.27 @@ -304,6 +314,17 @@ 2.28 InvocationCounter* invocation_counter() { return &_invocation_counter; } 2.29 InvocationCounter* backedge_counter() { return &_backedge_counter; } 2.30 2.31 +#ifdef TIERED 2.32 + // We are reusing interpreter_invocation_count as a holder for the previous event count! 2.33 + // We can do that since interpreter_invocation_count is not used in tiered. 2.34 + int prev_event_count() const { return _interpreter_invocation_count; } 2.35 + void set_prev_event_count(int count) { _interpreter_invocation_count = count; } 2.36 + jlong prev_time() const { return _prev_time; } 2.37 + void set_prev_time(jlong time) { _prev_time = time; } 2.38 + float rate() const { return _rate; } 2.39 + void set_rate(float rate) { _rate = rate; } 2.40 +#endif 2.41 + 2.42 int invocation_count(); 2.43 int backedge_count(); 2.44
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 3.2 +++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp Fri Mar 04 15:14:16 2011 -0800 3.3 @@ -0,0 +1,450 @@ 3.4 +/* 3.5 +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. 3.6 +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. 3.7 +*/ 3.8 + 3.9 +#include "precompiled.hpp" 3.10 +#include "runtime/advancedThresholdPolicy.hpp" 3.11 +#include "runtime/simpleThresholdPolicy.inline.hpp" 3.12 + 3.13 +#ifdef TIERED 3.14 +// Print an event. 3.15 +void AdvancedThresholdPolicy::print_specific(EventType type, methodHandle mh, methodHandle imh, 3.16 + int bci, CompLevel level) { 3.17 + tty->print(" rate: "); 3.18 + if (mh->prev_time() == 0) tty->print("n/a"); 3.19 + else tty->print("%f", mh->rate()); 3.20 + 3.21 + tty->print(" k: %.2lf,%.2lf", threshold_scale(CompLevel_full_profile, Tier3LoadFeedback), 3.22 + threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback)); 3.23 + 3.24 +} 3.25 + 3.26 +void AdvancedThresholdPolicy::initialize() { 3.27 + // Turn on ergonomic compiler count selection 3.28 + if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) { 3.29 + FLAG_SET_DEFAULT(CICompilerCountPerCPU, true); 3.30 + } 3.31 + int count = CICompilerCount; 3.32 + if (CICompilerCountPerCPU) { 3.33 + // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n 3.34 + int log_cpu = log2_intptr(os::active_processor_count()); 3.35 + int loglog_cpu = log2_intptr(MAX2(log_cpu, 1)); 3.36 + count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2; 3.37 + } 3.38 + 3.39 + set_c1_count(MAX2(count / 3, 1)); 3.40 + set_c2_count(MAX2(count - count / 3, 1)); 3.41 + 3.42 + // Some inlining tuning 3.43 +#ifdef X86 3.44 + if (FLAG_IS_DEFAULT(InlineSmallCode)) { 3.45 + FLAG_SET_DEFAULT(InlineSmallCode, 2000); 3.46 + } 3.47 +#endif 3.48 + 3.49 +#ifdef SPARC 3.50 + if (FLAG_IS_DEFAULT(InlineSmallCode)) { 3.51 + FLAG_SET_DEFAULT(InlineSmallCode, 2500); 3.52 + } 3.53 +#endif 3.54 + 3.55 + 3.56 + set_start_time(os::javaTimeMillis()); 3.57 +} 3.58 + 3.59 +// update_rate() is called from select_task() while holding a compile queue lock. 3.60 +void AdvancedThresholdPolicy::update_rate(jlong t, methodOop m) { 3.61 + if (is_old(m)) { 3.62 + // We don't remove old methods from the queue, 3.63 + // so we can just zero the rate. 3.64 + m->set_rate(0); 3.65 + return; 3.66 + } 3.67 + 3.68 + // We don't update the rate if we've just came out of a safepoint. 3.69 + // delta_s is the time since last safepoint in milliseconds. 3.70 + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); 3.71 + jlong delta_t = t - (m->prev_time() != 0 ? m->prev_time() : start_time()); // milliseconds since the last measurement 3.72 + // How many events were there since the last time? 3.73 + int event_count = m->invocation_count() + m->backedge_count(); 3.74 + int delta_e = event_count - m->prev_event_count(); 3.75 + 3.76 + // We should be running for at least 1ms. 3.77 + if (delta_s >= TieredRateUpdateMinTime) { 3.78 + // And we must've taken the previous point at least 1ms before. 3.79 + if (delta_t >= TieredRateUpdateMinTime && delta_e > 0) { 3.80 + m->set_prev_time(t); 3.81 + m->set_prev_event_count(event_count); 3.82 + m->set_rate((float)delta_e / (float)delta_t); // Rate is events per millisecond 3.83 + } else 3.84 + if (delta_t > TieredRateUpdateMaxTime && delta_e == 0) { 3.85 + // If nothing happened for 25ms, zero the rate. Don't modify prev values. 3.86 + m->set_rate(0); 3.87 + } 3.88 + } 3.89 +} 3.90 + 3.91 +// Check if this method has been stale from a given number of milliseconds. 3.92 +// See select_task(). 3.93 +bool AdvancedThresholdPolicy::is_stale(jlong t, jlong timeout, methodOop m) { 3.94 + jlong delta_s = t - SafepointSynchronize::end_of_last_safepoint(); 3.95 + jlong delta_t = t - m->prev_time(); 3.96 + if (delta_t > timeout && delta_s > timeout) { 3.97 + int event_count = m->invocation_count() + m->backedge_count(); 3.98 + int delta_e = event_count - m->prev_event_count(); 3.99 + // Return true if there were no events. 3.100 + return delta_e == 0; 3.101 + } 3.102 + return false; 3.103 +} 3.104 + 3.105 +// We don't remove old methods from the compile queue even if they have 3.106 +// very low activity. See select_task(). 3.107 +bool AdvancedThresholdPolicy::is_old(methodOop method) { 3.108 + return method->invocation_count() > 50000 || method->backedge_count() > 500000; 3.109 +} 3.110 + 3.111 +double AdvancedThresholdPolicy::weight(methodOop method) { 3.112 + return (method->rate() + 1) * ((method->invocation_count() + 1) * (method->backedge_count() + 1)); 3.113 +} 3.114 + 3.115 +// Apply heuristics and return true if x should be compiled before y 3.116 +bool AdvancedThresholdPolicy::compare_methods(methodOop x, methodOop y) { 3.117 + if (x->highest_comp_level() > y->highest_comp_level()) { 3.118 + // recompilation after deopt 3.119 + return true; 3.120 + } else 3.121 + if (x->highest_comp_level() == y->highest_comp_level()) { 3.122 + if (weight(x) > weight(y)) { 3.123 + return true; 3.124 + } 3.125 + } 3.126 + return false; 3.127 +} 3.128 + 3.129 +// Is method profiled enough? 3.130 +bool AdvancedThresholdPolicy::is_method_profiled(methodOop method) { 3.131 + methodDataOop mdo = method->method_data(); 3.132 + if (mdo != NULL) { 3.133 + int i = mdo->invocation_count_delta(); 3.134 + int b = mdo->backedge_count_delta(); 3.135 + return call_predicate_helper<CompLevel_full_profile>(i, b, 1); 3.136 + } 3.137 + return false; 3.138 +} 3.139 + 3.140 +// Called with the queue locked and with at least one element 3.141 +CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) { 3.142 + CompileTask *max_task = NULL; 3.143 + methodOop max_method; 3.144 + jlong t = os::javaTimeMillis(); 3.145 + // Iterate through the queue and find a method with a maximum rate. 3.146 + for (CompileTask* task = compile_queue->first(); task != NULL;) { 3.147 + CompileTask* next_task = task->next(); 3.148 + methodOop method = (methodOop)JNIHandles::resolve(task->method_handle()); 3.149 + methodDataOop mdo = method->method_data(); 3.150 + update_rate(t, method); 3.151 + if (max_task == NULL) { 3.152 + max_task = task; 3.153 + max_method = method; 3.154 + } else { 3.155 + // If a method has been stale for some time, remove it from the queue. 3.156 + if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) { 3.157 + if (PrintTieredEvents) { 3.158 + print_event(KILL, method, method, task->osr_bci(), (CompLevel)task->comp_level()); 3.159 + } 3.160 + CompileTaskWrapper ctw(task); // Frees the task 3.161 + compile_queue->remove(task); 3.162 + method->clear_queued_for_compilation(); 3.163 + task = next_task; 3.164 + continue; 3.165 + } 3.166 + 3.167 + // Select a method with a higher rate 3.168 + if (compare_methods(method, max_method)) { 3.169 + max_task = task; 3.170 + max_method = method; 3.171 + } 3.172 + } 3.173 + task = next_task; 3.174 + } 3.175 + 3.176 + if (max_task->comp_level() == CompLevel_full_profile && is_method_profiled(max_method)) { 3.177 + max_task->set_comp_level(CompLevel_limited_profile); 3.178 + if (PrintTieredEvents) { 3.179 + print_event(UPDATE, max_method, max_method, max_task->osr_bci(), (CompLevel)max_task->comp_level()); 3.180 + } 3.181 + } 3.182 + 3.183 + return max_task; 3.184 +} 3.185 + 3.186 +double AdvancedThresholdPolicy::threshold_scale(CompLevel level, int feedback_k) { 3.187 + double queue_size = CompileBroker::queue_size(level); 3.188 + int comp_count = compiler_count(level); 3.189 + double k = queue_size / (feedback_k * comp_count) + 1; 3.190 + return k; 3.191 +} 3.192 + 3.193 +// Call and loop predicates determine whether a transition to a higher 3.194 +// compilation level should be performed (pointers to predicate functions 3.195 +// are passed to common()). 3.196 +// Tier?LoadFeedback is basically a coefficient that determines of 3.197 +// how many methods per compiler thread can be in the queue before 3.198 +// the threshold values double. 3.199 +bool AdvancedThresholdPolicy::loop_predicate(int i, int b, CompLevel cur_level) { 3.200 + switch(cur_level) { 3.201 + case CompLevel_none: 3.202 + case CompLevel_limited_profile: { 3.203 + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); 3.204 + return loop_predicate_helper<CompLevel_none>(i, b, k); 3.205 + } 3.206 + case CompLevel_full_profile: { 3.207 + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); 3.208 + return loop_predicate_helper<CompLevel_full_profile>(i, b, k); 3.209 + } 3.210 + default: 3.211 + return true; 3.212 + } 3.213 +} 3.214 + 3.215 +bool AdvancedThresholdPolicy::call_predicate(int i, int b, CompLevel cur_level) { 3.216 + switch(cur_level) { 3.217 + case CompLevel_none: 3.218 + case CompLevel_limited_profile: { 3.219 + double k = threshold_scale(CompLevel_full_profile, Tier3LoadFeedback); 3.220 + return call_predicate_helper<CompLevel_none>(i, b, k); 3.221 + } 3.222 + case CompLevel_full_profile: { 3.223 + double k = threshold_scale(CompLevel_full_optimization, Tier4LoadFeedback); 3.224 + return call_predicate_helper<CompLevel_full_profile>(i, b, k); 3.225 + } 3.226 + default: 3.227 + return true; 3.228 + } 3.229 +} 3.230 + 3.231 +// If a method is old enough and is still in the interpreter we would want to 3.232 +// start profiling without waiting for the compiled method to arrive. 3.233 +// We also take the load on compilers into the account. 3.234 +bool AdvancedThresholdPolicy::should_create_mdo(methodOop method, CompLevel cur_level) { 3.235 + if (cur_level == CompLevel_none && 3.236 + CompileBroker::queue_size(CompLevel_full_optimization) <= 3.237 + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { 3.238 + int i = method->invocation_count(); 3.239 + int b = method->backedge_count(); 3.240 + double k = Tier0ProfilingStartPercentage / 100.0; 3.241 + return call_predicate_helper<CompLevel_none>(i, b, k) || loop_predicate_helper<CompLevel_none>(i, b, k); 3.242 + } 3.243 + return false; 3.244 +} 3.245 + 3.246 +// Create MDO if necessary. 3.247 +void AdvancedThresholdPolicy::create_mdo(methodHandle mh, TRAPS) { 3.248 + if (mh->is_native() || mh->is_abstract() || mh->is_accessor()) return; 3.249 + if (mh->method_data() == NULL) { 3.250 + methodOopDesc::build_interpreter_method_data(mh, THREAD); 3.251 + if (HAS_PENDING_EXCEPTION) { 3.252 + CLEAR_PENDING_EXCEPTION; 3.253 + } 3.254 + } 3.255 +} 3.256 + 3.257 + 3.258 +/* 3.259 + * Method states: 3.260 + * 0 - interpreter (CompLevel_none) 3.261 + * 1 - pure C1 (CompLevel_simple) 3.262 + * 2 - C1 with invocation and backedge counting (CompLevel_limited_profile) 3.263 + * 3 - C1 with full profiling (CompLevel_full_profile) 3.264 + * 4 - C2 (CompLevel_full_optimization) 3.265 + * 3.266 + * Common state transition patterns: 3.267 + * a. 0 -> 3 -> 4. 3.268 + * The most common path. But note that even in this straightforward case 3.269 + * profiling can start at level 0 and finish at level 3. 3.270 + * 3.271 + * b. 0 -> 2 -> 3 -> 4. 3.272 + * This case occures when the load on C2 is deemed too high. So, instead of transitioning 3.273 + * into state 3 directly and over-profiling while a method is in the C2 queue we transition to 3.274 + * level 2 and wait until the load on C2 decreases. This path is disabled for OSRs. 3.275 + * 3.276 + * c. 0 -> (3->2) -> 4. 3.277 + * In this case we enqueue a method for compilation at level 3, but the C1 queue is long enough 3.278 + * to enable the profiling to fully occur at level 0. In this case we change the compilation level 3.279 + * of the method to 2, because it'll allow it to run much faster without full profiling while c2 3.280 + * is compiling. 3.281 + * 3.282 + * d. 0 -> 3 -> 1 or 0 -> 2 -> 1. 3.283 + * After a method was once compiled with C1 it can be identified as trivial and be compiled to 3.284 + * level 1. These transition can also occur if a method can't be compiled with C2 but can with C1. 3.285 + * 3.286 + * e. 0 -> 4. 3.287 + * This can happen if a method fails C1 compilation (it will still be profiled in the interpreter) 3.288 + * or because of a deopt that didn't require reprofiling (compilation won't happen in this case because 3.289 + * the compiled version already exists). 3.290 + * 3.291 + * Note that since state 0 can be reached from any other state via deoptimization different loops 3.292 + * are possible. 3.293 + * 3.294 + */ 3.295 + 3.296 +// Common transition function. Given a predicate determines if a method should transition to another level. 3.297 +CompLevel AdvancedThresholdPolicy::common(Predicate p, methodOop method, CompLevel cur_level) { 3.298 + if (is_trivial(method)) return CompLevel_simple; 3.299 + 3.300 + CompLevel next_level = cur_level; 3.301 + int i = method->invocation_count(); 3.302 + int b = method->backedge_count(); 3.303 + 3.304 + switch(cur_level) { 3.305 + case CompLevel_none: 3.306 + // If we were at full profile level, would we switch to full opt? 3.307 + if (common(p, method, CompLevel_full_profile) == CompLevel_full_optimization) { 3.308 + next_level = CompLevel_full_optimization; 3.309 + } else if ((this->*p)(i, b, cur_level)) { 3.310 + // C1-generated fully profiled code is about 30% slower than the limited profile 3.311 + // code that has only invocation and backedge counters. The observation is that 3.312 + // if C2 queue is large enough we can spend too much time in the fully profiled code 3.313 + // while waiting for C2 to pick the method from the queue. To alleviate this problem 3.314 + // we introduce a feedback on the C2 queue size. If the C2 queue is sufficiently long 3.315 + // we choose to compile a limited profiled version and then recompile with full profiling 3.316 + // when the load on C2 goes down. 3.317 + if (CompileBroker::queue_size(CompLevel_full_optimization) > 3.318 + Tier3DelayOn * compiler_count(CompLevel_full_optimization)) { 3.319 + next_level = CompLevel_limited_profile; 3.320 + } else { 3.321 + next_level = CompLevel_full_profile; 3.322 + } 3.323 + } 3.324 + break; 3.325 + case CompLevel_limited_profile: 3.326 + if (is_method_profiled(method)) { 3.327 + // Special case: we got here because this method was fully profiled in the interpreter. 3.328 + next_level = CompLevel_full_optimization; 3.329 + } else { 3.330 + methodDataOop mdo = method->method_data(); 3.331 + if (mdo != NULL) { 3.332 + if (mdo->would_profile()) { 3.333 + if (CompileBroker::queue_size(CompLevel_full_optimization) <= 3.334 + Tier3DelayOff * compiler_count(CompLevel_full_optimization) && 3.335 + (this->*p)(i, b, cur_level)) { 3.336 + next_level = CompLevel_full_profile; 3.337 + } 3.338 + } else { 3.339 + next_level = CompLevel_full_optimization; 3.340 + } 3.341 + } 3.342 + } 3.343 + break; 3.344 + case CompLevel_full_profile: 3.345 + { 3.346 + methodDataOop mdo = method->method_data(); 3.347 + if (mdo != NULL) { 3.348 + if (mdo->would_profile()) { 3.349 + int mdo_i = mdo->invocation_count_delta(); 3.350 + int mdo_b = mdo->backedge_count_delta(); 3.351 + if ((this->*p)(mdo_i, mdo_b, cur_level)) { 3.352 + next_level = CompLevel_full_optimization; 3.353 + } 3.354 + } else { 3.355 + next_level = CompLevel_full_optimization; 3.356 + } 3.357 + } 3.358 + } 3.359 + break; 3.360 + } 3.361 + return next_level; 3.362 +} 3.363 + 3.364 +// Determine if a method should be compiled with a normal entry point at a different level. 3.365 +CompLevel AdvancedThresholdPolicy::call_event(methodOop method, CompLevel cur_level) { 3.366 + CompLevel osr_level = (CompLevel) method->highest_osr_comp_level(); 3.367 + CompLevel next_level = common(&AdvancedThresholdPolicy::call_predicate, method, cur_level); 3.368 + 3.369 + // If OSR method level is greater than the regular method level, the levels should be 3.370 + // equalized by raising the regular method level in order to avoid OSRs during each 3.371 + // invocation of the method. 3.372 + if (osr_level == CompLevel_full_optimization && cur_level == CompLevel_full_profile) { 3.373 + methodDataOop mdo = method->method_data(); 3.374 + guarantee(mdo != NULL, "MDO should not be NULL"); 3.375 + if (mdo->invocation_count() >= 1) { 3.376 + next_level = CompLevel_full_optimization; 3.377 + } 3.378 + } else { 3.379 + next_level = MAX2(osr_level, next_level); 3.380 + } 3.381 + 3.382 + return next_level; 3.383 +} 3.384 + 3.385 +// Determine if we should do an OSR compilation of a given method. 3.386 +CompLevel AdvancedThresholdPolicy::loop_event(methodOop method, CompLevel cur_level) { 3.387 + if (cur_level == CompLevel_none) { 3.388 + // If there is a live OSR method that means that we deopted to the interpreter 3.389 + // for the transition. 3.390 + CompLevel osr_level = (CompLevel)method->highest_osr_comp_level(); 3.391 + if (osr_level > CompLevel_none) { 3.392 + return osr_level; 3.393 + } 3.394 + } 3.395 + return common(&AdvancedThresholdPolicy::loop_predicate, method, cur_level); 3.396 +} 3.397 + 3.398 +// Update the rate and submit compile 3.399 +void AdvancedThresholdPolicy::submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS) { 3.400 + int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count(); 3.401 + update_rate(os::javaTimeMillis(), mh()); 3.402 + CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", THREAD); 3.403 +} 3.404 + 3.405 + 3.406 +// Handle the invocation event. 3.407 +void AdvancedThresholdPolicy::method_invocation_event(methodHandle mh, methodHandle imh, 3.408 + CompLevel level, TRAPS) { 3.409 + if (should_create_mdo(mh(), level)) { 3.410 + create_mdo(mh, THREAD); 3.411 + } 3.412 + if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, InvocationEntryBci)) { 3.413 + CompLevel next_level = call_event(mh(), level); 3.414 + if (next_level != level) { 3.415 + compile(mh, InvocationEntryBci, next_level, THREAD); 3.416 + } 3.417 + } 3.418 +} 3.419 + 3.420 +// Handle the back branch event. Notice that we can compile the method 3.421 +// with a regular entry from here. 3.422 +void AdvancedThresholdPolicy::method_back_branch_event(methodHandle mh, methodHandle imh, 3.423 + int bci, CompLevel level, TRAPS) { 3.424 + if (should_create_mdo(mh(), level)) { 3.425 + create_mdo(mh, THREAD); 3.426 + } 3.427 + 3.428 + // If the method is already compiling, quickly bail out. 3.429 + if (is_compilation_enabled() && !CompileBroker::compilation_is_in_queue(mh, bci)) { 3.430 + // Use loop event as an opportinity to also check there's been 3.431 + // enough calls. 3.432 + CompLevel cur_level = comp_level(mh()); 3.433 + CompLevel next_level = call_event(mh(), cur_level); 3.434 + CompLevel next_osr_level = loop_event(mh(), level); 3.435 + if (next_osr_level == CompLevel_limited_profile) { 3.436 + next_osr_level = CompLevel_full_profile; // OSRs are supposed to be for very hot methods. 3.437 + } 3.438 + next_level = MAX2(next_level, 3.439 + next_osr_level < CompLevel_full_optimization ? next_osr_level : cur_level); 3.440 + bool is_compiling = false; 3.441 + if (next_level != cur_level) { 3.442 + compile(mh, InvocationEntryBci, next_level, THREAD); 3.443 + is_compiling = true; 3.444 + } 3.445 + 3.446 + // Do the OSR version 3.447 + if (!is_compiling && next_osr_level != level) { 3.448 + compile(mh, bci, next_osr_level, THREAD); 3.449 + } 3.450 + } 3.451 +} 3.452 + 3.453 +#endif // TIERED
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 4.2 +++ b/src/share/vm/runtime/advancedThresholdPolicy.hpp Fri Mar 04 15:14:16 2011 -0800 4.3 @@ -0,0 +1,207 @@ 4.4 +/* 4.5 +* Copyright (c) 2010, 2011 Oracle and/or its affiliates. All rights reserved. 4.6 +* ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. 4.7 +*/ 4.8 + 4.9 +#ifndef SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP 4.10 +#define SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP 4.11 + 4.12 +#include "runtime/simpleThresholdPolicy.hpp" 4.13 + 4.14 +#ifdef TIERED 4.15 +class CompileTask; 4.16 +class CompileQueue; 4.17 + 4.18 +/* 4.19 + * The system supports 5 execution levels: 4.20 + * * level 0 - interpreter 4.21 + * * level 1 - C1 with full optimization (no profiling) 4.22 + * * level 2 - C1 with invocation and backedge counters 4.23 + * * level 3 - C1 with full profiling (level 2 + MDO) 4.24 + * * level 4 - C2 4.25 + * 4.26 + * Levels 0, 2 and 3 periodically notify the runtime about the current value of the counters 4.27 + * (invocation counters and backedge counters). The frequency of these notifications is 4.28 + * different at each level. These notifications are used by the policy to decide what transition 4.29 + * to make. 4.30 + * 4.31 + * Execution starts at level 0 (interpreter), then the policy can decide either to compile the 4.32 + * method at level 3 or level 2. The decision is based on the following factors: 4.33 + * 1. The length of the C2 queue determines the next level. The observation is that level 2 4.34 + * is generally faster than level 3 by about 30%, therefore we would want to minimize the time 4.35 + * a method spends at level 3. We should only spend the time at level 3 that is necessary to get 4.36 + * adequate profiling. So, if the C2 queue is long enough it is more beneficial to go first to 4.37 + * level 2, because if we transitioned to level 3 we would be stuck there until our C2 compile 4.38 + * request makes its way through the long queue. When the load on C2 recedes we are going to 4.39 + * recompile at level 3 and start gathering profiling information. 4.40 + * 2. The length of C1 queue is used to dynamically adjust the thresholds, so as to introduce 4.41 + * additional filtering if the compiler is overloaded. The rationale is that by the time a 4.42 + * method gets compiled it can become unused, so it doesn't make sense to put too much onto the 4.43 + * queue. 4.44 + * 4.45 + * After profiling is completed at level 3 the transition is made to level 4. Again, the length 4.46 + * of the C2 queue is used as a feedback to adjust the thresholds. 4.47 + * 4.48 + * After the first C1 compile some basic information is determined about the code like the number 4.49 + * of the blocks and the number of the loops. Based on that it can be decided that a method 4.50 + * is trivial and compiling it with C1 will yield the same code. In this case the method is 4.51 + * compiled at level 1 instead of 4. 4.52 + * 4.53 + * We also support profiling at level 0. If C1 is slow enough to produce the level 3 version of 4.54 + * the code and the C2 queue is sufficiently small we can decide to start profiling in the 4.55 + * interpreter (and continue profiling in the compiled code once the level 3 version arrives). 4.56 + * If the profiling at level 0 is fully completed before level 3 version is produced, a level 2 4.57 + * version is compiled instead in order to run faster waiting for a level 4 version. 4.58 + * 4.59 + * Compile queues are implemented as priority queues - for each method in the queue we compute 4.60 + * the event rate (the number of invocation and backedge counter increments per unit of time). 4.61 + * When getting an element off the queue we pick the one with the largest rate. Maintaining the 4.62 + * rate also allows us to remove stale methods (the ones that got on the queue but stopped 4.63 + * being used shortly after that). 4.64 +*/ 4.65 + 4.66 +/* Command line options: 4.67 + * - Tier?InvokeNotifyFreqLog and Tier?BackedgeNotifyFreqLog control the frequency of method 4.68 + * invocation and backedge notifications. Basically every n-th invocation or backedge a mutator thread 4.69 + * makes a call into the runtime. 4.70 + * 4.71 + * - Tier?CompileThreshold, Tier?BackEdgeThreshold, Tier?MinInvocationThreshold control 4.72 + * compilation thresholds. 4.73 + * Level 2 thresholds are not used and are provided for option-compatibility and potential future use. 4.74 + * Other thresholds work as follows: 4.75 + * 4.76 + * Transition from interpreter (level 0) to C1 with full profiling (level 3) happens when 4.77 + * the following predicate is true (X is the level): 4.78 + * 4.79 + * i > TierXInvocationThreshold * s || (i > TierXMinInvocationThreshold * s && i + b > TierXCompileThreshold * s), 4.80 + * 4.81 + * where $i$ is the number of method invocations, $b$ number of backedges and $s$ is the scaling 4.82 + * coefficient that will be discussed further. 4.83 + * The intuition is to equalize the time that is spend profiling each method. 4.84 + * The same predicate is used to control the transition from level 3 to level 4 (C2). It should be 4.85 + * noted though that the thresholds are relative. Moreover i and b for the 0->3 transition come 4.86 + * from methodOop and for 3->4 transition they come from MDO (since profiled invocations are 4.87 + * counted separately). 4.88 + * 4.89 + * OSR transitions are controlled simply with b > TierXBackEdgeThreshold * s predicates. 4.90 + * 4.91 + * - Tier?LoadFeedback options are used to automatically scale the predicates described above depending 4.92 + * on the compiler load. The scaling coefficients are computed as follows: 4.93 + * 4.94 + * s = queue_size_X / (TierXLoadFeedback * compiler_count_X) + 1, 4.95 + * 4.96 + * where queue_size_X is the current size of the compiler queue of level X, and compiler_count_X 4.97 + * is the number of level X compiler threads. 4.98 + * 4.99 + * Basically these parameters describe how many methods should be in the compile queue 4.100 + * per compiler thread before the scaling coefficient increases by one. 4.101 + * 4.102 + * This feedback provides the mechanism to automatically control the flow of compilation requests 4.103 + * depending on the machine speed, mutator load and other external factors. 4.104 + * 4.105 + * - Tier3DelayOn and Tier3DelayOff parameters control another important feedback loop. 4.106 + * Consider the following observation: a method compiled with full profiling (level 3) 4.107 + * is about 30% slower than a method at level 2 (just invocation and backedge counters, no MDO). 4.108 + * Normally, the following transitions will occur: 0->3->4. The problem arises when the C2 queue 4.109 + * gets congested and the 3->4 transition is delayed. While the method is the C2 queue it continues 4.110 + * executing at level 3 for much longer time than is required by the predicate and at suboptimal speed. 4.111 + * The idea is to dynamically change the behavior of the system in such a way that if a substantial 4.112 + * load on C2 is detected we would first do the 0->2 transition allowing a method to run faster. 4.113 + * And then when the load decreases to allow 2->3 transitions. 4.114 + * 4.115 + * Tier3Delay* parameters control this switching mechanism. 4.116 + * Tier3DelayOn is the number of methods in the C2 queue per compiler thread after which the policy 4.117 + * no longer does 0->3 transitions but does 0->2 transitions instead. 4.118 + * Tier3DelayOff switches the original behavior back when the number of methods in the C2 queue 4.119 + * per compiler thread falls below the specified amount. 4.120 + * The hysteresis is necessary to avoid jitter. 4.121 + * 4.122 + * - TieredCompileTaskTimeout is the amount of time an idle method can spend in the compile queue. 4.123 + * Basically, since we use the event rate d(i + b)/dt as a value of priority when selecting a method to 4.124 + * compile from the compile queue, we also can detect stale methods for which the rate has been 4.125 + * 0 for some time in the same iteration. Stale methods can appear in the queue when an application 4.126 + * abruptly changes its behavior. 4.127 + * 4.128 + * - TieredStopAtLevel, is used mostly for testing. It allows to bypass the policy logic and stick 4.129 + * to a given level. For example it's useful to set TieredStopAtLevel = 1 in order to compile everything 4.130 + * with pure c1. 4.131 + * 4.132 + * - Tier0ProfilingStartPercentage allows the interpreter to start profiling when the inequalities in the 4.133 + * 0->3 predicate are already exceeded by the given percentage but the level 3 version of the 4.134 + * method is still not ready. We can even go directly from level 0 to 4 if c1 doesn't produce a compiled 4.135 + * version in time. This reduces the overall transition to level 4 and decreases the startup time. 4.136 + * Note that this behavior is also guarded by the Tier3Delay mechanism: when the c2 queue is too long 4.137 + * these is not reason to start profiling prematurely. 4.138 + * 4.139 + * - TieredRateUpdateMinTime and TieredRateUpdateMaxTime are parameters of the rate computation. 4.140 + * Basically, the rate is not computed more frequently than TieredRateUpdateMinTime and is considered 4.141 + * to be zero if no events occurred in TieredRateUpdateMaxTime. 4.142 + */ 4.143 + 4.144 + 4.145 +class AdvancedThresholdPolicy : public SimpleThresholdPolicy { 4.146 + jlong _start_time; 4.147 + 4.148 + // Call and loop predicates determine whether a transition to a higher compilation 4.149 + // level should be performed (pointers to predicate functions are passed to common(). 4.150 + // Predicates also take compiler load into account. 4.151 + typedef bool (AdvancedThresholdPolicy::*Predicate)(int i, int b, CompLevel cur_level); 4.152 + bool call_predicate(int i, int b, CompLevel cur_level); 4.153 + bool loop_predicate(int i, int b, CompLevel cur_level); 4.154 + // Common transition function. Given a predicate determines if a method should transition to another level. 4.155 + CompLevel common(Predicate p, methodOop method, CompLevel cur_level); 4.156 + // Transition functions. 4.157 + // call_event determines if a method should be compiled at a different 4.158 + // level with a regular invocation entry. 4.159 + CompLevel call_event(methodOop method, CompLevel cur_level); 4.160 + // loop_event checks if a method should be OSR compiled at a different 4.161 + // level. 4.162 + CompLevel loop_event(methodOop method, CompLevel cur_level); 4.163 + // Has a method been long around? 4.164 + // We don't remove old methods from the compile queue even if they have 4.165 + // very low activity (see select_task()). 4.166 + inline bool is_old(methodOop method); 4.167 + // Was a given method inactive for a given number of milliseconds. 4.168 + // If it is, we would remove it from the queue (see select_task()). 4.169 + inline bool is_stale(jlong t, jlong timeout, methodOop m); 4.170 + // Compute the weight of the method for the compilation scheduling 4.171 + inline double weight(methodOop method); 4.172 + // Apply heuristics and return true if x should be compiled before y 4.173 + inline bool compare_methods(methodOop x, methodOop y); 4.174 + // Compute event rate for a given method. The rate is the number of event (invocations + backedges) 4.175 + // per millisecond. 4.176 + inline void update_rate(jlong t, methodOop m); 4.177 + // Compute threshold scaling coefficient 4.178 + inline double threshold_scale(CompLevel level, int feedback_k); 4.179 + // If a method is old enough and is still in the interpreter we would want to 4.180 + // start profiling without waiting for the compiled method to arrive. This function 4.181 + // determines whether we should do that. 4.182 + inline bool should_create_mdo(methodOop method, CompLevel cur_level); 4.183 + // Create MDO if necessary. 4.184 + void create_mdo(methodHandle mh, TRAPS); 4.185 + // Is method profiled enough? 4.186 + bool is_method_profiled(methodOop method); 4.187 + 4.188 +protected: 4.189 + void print_specific(EventType type, methodHandle mh, methodHandle imh, int bci, CompLevel level); 4.190 + 4.191 + void set_start_time(jlong t) { _start_time = t; } 4.192 + jlong start_time() const { return _start_time; } 4.193 + 4.194 + // Submit a given method for compilation (and update the rate). 4.195 + virtual void submit_compile(methodHandle mh, int bci, CompLevel level, TRAPS); 4.196 + // event() from SimpleThresholdPolicy would call these. 4.197 + virtual void method_invocation_event(methodHandle method, methodHandle inlinee, 4.198 + CompLevel level, TRAPS); 4.199 + virtual void method_back_branch_event(methodHandle method, methodHandle inlinee, 4.200 + int bci, CompLevel level, TRAPS); 4.201 +public: 4.202 + AdvancedThresholdPolicy() : _start_time(0) { } 4.203 + // Select task is called by CompileBroker. We should return a task or NULL. 4.204 + virtual CompileTask* select_task(CompileQueue* compile_queue); 4.205 + virtual void initialize(); 4.206 +}; 4.207 + 4.208 +#endif // TIERED 4.209 + 4.210 +#endif // SHARE_VM_RUNTIME_ADVANCEDTHRESHOLDPOLICY_HPP
5.1 --- a/src/share/vm/runtime/arguments.cpp Thu Mar 03 23:31:45 2011 -0800 5.2 +++ b/src/share/vm/runtime/arguments.cpp Fri Mar 04 15:14:16 2011 -0800 5.3 @@ -1026,8 +1026,9 @@ 5.4 } 5.5 5.6 void Arguments::set_tiered_flags() { 5.7 + // With tiered, set default policy to AdvancedThresholdPolicy, which is 3. 5.8 if (FLAG_IS_DEFAULT(CompilationPolicyChoice)) { 5.9 - FLAG_SET_DEFAULT(CompilationPolicyChoice, 2); 5.10 + FLAG_SET_DEFAULT(CompilationPolicyChoice, 3); 5.11 } 5.12 if (CompilationPolicyChoice < 2) { 5.13 vm_exit_during_initialization(
6.1 --- a/src/share/vm/runtime/compilationPolicy.cpp Thu Mar 03 23:31:45 2011 -0800 6.2 +++ b/src/share/vm/runtime/compilationPolicy.cpp Fri Mar 04 15:14:16 2011 -0800 6.3 @@ -1,5 +1,5 @@ 6.4 /* 6.5 - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 6.6 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. 6.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6.8 * 6.9 * This code is free software; you can redistribute it and/or modify it 6.10 @@ -32,6 +32,7 @@ 6.11 #include "oops/methodOop.hpp" 6.12 #include "oops/oop.inline.hpp" 6.13 #include "prims/nativeLookup.hpp" 6.14 +#include "runtime/advancedThresholdPolicy.hpp" 6.15 #include "runtime/compilationPolicy.hpp" 6.16 #include "runtime/frame.hpp" 6.17 #include "runtime/handles.inline.hpp" 6.18 @@ -72,8 +73,15 @@ 6.19 Unimplemented(); 6.20 #endif 6.21 break; 6.22 + case 3: 6.23 +#ifdef TIERED 6.24 + CompilationPolicy::set_policy(new AdvancedThresholdPolicy()); 6.25 +#else 6.26 + Unimplemented(); 6.27 +#endif 6.28 + break; 6.29 default: 6.30 - fatal("CompilationPolicyChoice must be in the range: [0-2]"); 6.31 + fatal("CompilationPolicyChoice must be in the range: [0-3]"); 6.32 } 6.33 CompilationPolicy::policy()->initialize(); 6.34 }