Tue, 13 Apr 2010 13:52:10 -0700
6858496: Clear all SoftReferences before an out-of-memory due to GC overhead limit.
Summary: Ensure a full GC that clears SoftReferences before throwing an out-of-memory
Reviewed-by: ysr, jcoomes
1 /*
2 * Copyright 2004-2010 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24 #include "incls/_precompiled.incl"
25 #include "incls/_adaptiveSizePolicy.cpp.incl"
27 elapsedTimer AdaptiveSizePolicy::_minor_timer;
28 elapsedTimer AdaptiveSizePolicy::_major_timer;
30 // The throughput goal is implemented as
31 // _throughput_goal = 1 - ( 1 / (1 + gc_cost_ratio))
32 // gc_cost_ratio is the ratio
33 // application cost / gc cost
34 // For example a gc_cost_ratio of 4 translates into a
35 // throughput goal of .80
37 AdaptiveSizePolicy::AdaptiveSizePolicy(size_t init_eden_size,
38 size_t init_promo_size,
39 size_t init_survivor_size,
40 double gc_pause_goal_sec,
41 uint gc_cost_ratio) :
42 _eden_size(init_eden_size),
43 _promo_size(init_promo_size),
44 _survivor_size(init_survivor_size),
45 _gc_pause_goal_sec(gc_pause_goal_sec),
46 _throughput_goal(1.0 - double(1.0 / (1.0 + (double) gc_cost_ratio))),
47 _gc_overhead_limit_exceeded(false),
48 _print_gc_overhead_limit_would_be_exceeded(false),
49 _gc_overhead_limit_count(0),
50 _latest_minor_mutator_interval_seconds(0),
51 _threshold_tolerance_percent(1.0 + ThresholdTolerance/100.0),
52 _young_gen_change_for_minor_throughput(0),
53 _old_gen_change_for_major_throughput(0) {
54 assert(AdaptiveSizePolicyGCTimeLimitThreshold > 0,
55 "No opportunity to clear SoftReferences before GC overhead limit");
56 _avg_minor_pause =
57 new AdaptivePaddedAverage(AdaptiveTimeWeight, PausePadding);
58 _avg_minor_interval = new AdaptiveWeightedAverage(AdaptiveTimeWeight);
59 _avg_minor_gc_cost = new AdaptiveWeightedAverage(AdaptiveTimeWeight);
60 _avg_major_gc_cost = new AdaptiveWeightedAverage(AdaptiveTimeWeight);
62 _avg_young_live = new AdaptiveWeightedAverage(AdaptiveSizePolicyWeight);
63 _avg_old_live = new AdaptiveWeightedAverage(AdaptiveSizePolicyWeight);
64 _avg_eden_live = new AdaptiveWeightedAverage(AdaptiveSizePolicyWeight);
66 _avg_survived = new AdaptivePaddedAverage(AdaptiveSizePolicyWeight,
67 SurvivorPadding);
68 _avg_pretenured = new AdaptivePaddedNoZeroDevAverage(
69 AdaptiveSizePolicyWeight,
70 SurvivorPadding);
72 _minor_pause_old_estimator =
73 new LinearLeastSquareFit(AdaptiveSizePolicyWeight);
74 _minor_pause_young_estimator =
75 new LinearLeastSquareFit(AdaptiveSizePolicyWeight);
76 _minor_collection_estimator =
77 new LinearLeastSquareFit(AdaptiveSizePolicyWeight);
78 _major_collection_estimator =
79 new LinearLeastSquareFit(AdaptiveSizePolicyWeight);
81 // Start the timers
82 _minor_timer.start();
84 _young_gen_policy_is_ready = false;
85 }
87 bool AdaptiveSizePolicy::tenuring_threshold_change() const {
88 return decrement_tenuring_threshold_for_gc_cost() ||
89 increment_tenuring_threshold_for_gc_cost() ||
90 decrement_tenuring_threshold_for_survivor_limit();
91 }
93 void AdaptiveSizePolicy::minor_collection_begin() {
94 // Update the interval time
95 _minor_timer.stop();
96 // Save most recent collection time
97 _latest_minor_mutator_interval_seconds = _minor_timer.seconds();
98 _minor_timer.reset();
99 _minor_timer.start();
100 }
102 void AdaptiveSizePolicy::update_minor_pause_young_estimator(
103 double minor_pause_in_ms) {
104 double eden_size_in_mbytes = ((double)_eden_size)/((double)M);
105 _minor_pause_young_estimator->update(eden_size_in_mbytes,
106 minor_pause_in_ms);
107 }
109 void AdaptiveSizePolicy::minor_collection_end(GCCause::Cause gc_cause) {
110 // Update the pause time.
111 _minor_timer.stop();
113 if (gc_cause != GCCause::_java_lang_system_gc ||
114 UseAdaptiveSizePolicyWithSystemGC) {
115 double minor_pause_in_seconds = _minor_timer.seconds();
116 double minor_pause_in_ms = minor_pause_in_seconds * MILLIUNITS;
118 // Sample for performance counter
119 _avg_minor_pause->sample(minor_pause_in_seconds);
121 // Cost of collection (unit-less)
122 double collection_cost = 0.0;
123 if ((_latest_minor_mutator_interval_seconds > 0.0) &&
124 (minor_pause_in_seconds > 0.0)) {
125 double interval_in_seconds =
126 _latest_minor_mutator_interval_seconds + minor_pause_in_seconds;
127 collection_cost =
128 minor_pause_in_seconds / interval_in_seconds;
129 _avg_minor_gc_cost->sample(collection_cost);
130 // Sample for performance counter
131 _avg_minor_interval->sample(interval_in_seconds);
132 }
134 // The policy does not have enough data until at least some
135 // minor collections have been done.
136 _young_gen_policy_is_ready =
137 (_avg_minor_gc_cost->count() >= AdaptiveSizePolicyReadyThreshold);
139 // Calculate variables used to estimate pause time vs. gen sizes
140 double eden_size_in_mbytes = ((double)_eden_size)/((double)M);
141 update_minor_pause_young_estimator(minor_pause_in_ms);
142 update_minor_pause_old_estimator(minor_pause_in_ms);
144 if (PrintAdaptiveSizePolicy && Verbose) {
145 gclog_or_tty->print("AdaptiveSizePolicy::minor_collection_end: "
146 "minor gc cost: %f average: %f", collection_cost,
147 _avg_minor_gc_cost->average());
148 gclog_or_tty->print_cr(" minor pause: %f minor period %f",
149 minor_pause_in_ms,
150 _latest_minor_mutator_interval_seconds * MILLIUNITS);
151 }
153 // Calculate variable used to estimate collection cost vs. gen sizes
154 assert(collection_cost >= 0.0, "Expected to be non-negative");
155 _minor_collection_estimator->update(eden_size_in_mbytes, collection_cost);
156 }
158 // Interval times use this timer to measure the mutator time.
159 // Reset the timer after the GC pause.
160 _minor_timer.reset();
161 _minor_timer.start();
162 }
164 size_t AdaptiveSizePolicy::eden_increment(size_t cur_eden,
165 uint percent_change) {
166 size_t eden_heap_delta;
167 eden_heap_delta = cur_eden / 100 * percent_change;
168 return eden_heap_delta;
169 }
171 size_t AdaptiveSizePolicy::eden_increment(size_t cur_eden) {
172 return eden_increment(cur_eden, YoungGenerationSizeIncrement);
173 }
175 size_t AdaptiveSizePolicy::eden_decrement(size_t cur_eden) {
176 size_t eden_heap_delta = eden_increment(cur_eden) /
177 AdaptiveSizeDecrementScaleFactor;
178 return eden_heap_delta;
179 }
181 size_t AdaptiveSizePolicy::promo_increment(size_t cur_promo,
182 uint percent_change) {
183 size_t promo_heap_delta;
184 promo_heap_delta = cur_promo / 100 * percent_change;
185 return promo_heap_delta;
186 }
188 size_t AdaptiveSizePolicy::promo_increment(size_t cur_promo) {
189 return promo_increment(cur_promo, TenuredGenerationSizeIncrement);
190 }
192 size_t AdaptiveSizePolicy::promo_decrement(size_t cur_promo) {
193 size_t promo_heap_delta = promo_increment(cur_promo);
194 promo_heap_delta = promo_heap_delta / AdaptiveSizeDecrementScaleFactor;
195 return promo_heap_delta;
196 }
198 double AdaptiveSizePolicy::time_since_major_gc() const {
199 _major_timer.stop();
200 double result = _major_timer.seconds();
201 _major_timer.start();
202 return result;
203 }
205 // Linear decay of major gc cost
206 double AdaptiveSizePolicy::decaying_major_gc_cost() const {
207 double major_interval = major_gc_interval_average_for_decay();
208 double major_gc_cost_average = major_gc_cost();
209 double decayed_major_gc_cost = major_gc_cost_average;
210 if(time_since_major_gc() > 0.0) {
211 decayed_major_gc_cost = major_gc_cost() *
212 (((double) AdaptiveSizeMajorGCDecayTimeScale) * major_interval)
213 / time_since_major_gc();
214 }
216 // The decayed cost should always be smaller than the
217 // average cost but the vagaries of finite arithmetic could
218 // produce a larger value in decayed_major_gc_cost so protect
219 // against that.
220 return MIN2(major_gc_cost_average, decayed_major_gc_cost);
221 }
223 // Use a value of the major gc cost that has been decayed
224 // by the factor
225 //
226 // average-interval-between-major-gc * AdaptiveSizeMajorGCDecayTimeScale /
227 // time-since-last-major-gc
228 //
229 // if the average-interval-between-major-gc * AdaptiveSizeMajorGCDecayTimeScale
230 // is less than time-since-last-major-gc.
231 //
232 // In cases where there are initial major gc's that
233 // are of a relatively high cost but no later major
234 // gc's, the total gc cost can remain high because
235 // the major gc cost remains unchanged (since there are no major
236 // gc's). In such a situation the value of the unchanging
237 // major gc cost can keep the mutator throughput below
238 // the goal when in fact the major gc cost is becoming diminishingly
239 // small. Use the decaying gc cost only to decide whether to
240 // adjust for throughput. Using it also to determine the adjustment
241 // to be made for throughput also seems reasonable but there is
242 // no test case to use to decide if it is the right thing to do
243 // don't do it yet.
245 double AdaptiveSizePolicy::decaying_gc_cost() const {
246 double decayed_major_gc_cost = major_gc_cost();
247 double avg_major_interval = major_gc_interval_average_for_decay();
248 if (UseAdaptiveSizeDecayMajorGCCost &&
249 (AdaptiveSizeMajorGCDecayTimeScale > 0) &&
250 (avg_major_interval > 0.00)) {
251 double time_since_last_major_gc = time_since_major_gc();
253 // Decay the major gc cost?
254 if (time_since_last_major_gc >
255 ((double) AdaptiveSizeMajorGCDecayTimeScale) * avg_major_interval) {
257 // Decay using the time-since-last-major-gc
258 decayed_major_gc_cost = decaying_major_gc_cost();
259 if (PrintGCDetails && Verbose) {
260 gclog_or_tty->print_cr("\ndecaying_gc_cost: major interval average:"
261 " %f time since last major gc: %f",
262 avg_major_interval, time_since_last_major_gc);
263 gclog_or_tty->print_cr(" major gc cost: %f decayed major gc cost: %f",
264 major_gc_cost(), decayed_major_gc_cost);
265 }
266 }
267 }
268 double result = MIN2(1.0, decayed_major_gc_cost + minor_gc_cost());
269 return result;
270 }
273 void AdaptiveSizePolicy::clear_generation_free_space_flags() {
274 set_change_young_gen_for_min_pauses(0);
275 set_change_old_gen_for_maj_pauses(0);
277 set_change_old_gen_for_throughput(0);
278 set_change_young_gen_for_throughput(0);
279 set_decrease_for_footprint(0);
280 set_decide_at_full_gc(0);
281 }
283 void AdaptiveSizePolicy::check_gc_overhead_limit(
284 size_t young_live,
285 size_t eden_live,
286 size_t max_old_gen_size,
287 size_t max_eden_size,
288 bool is_full_gc,
289 GCCause::Cause gc_cause,
290 CollectorPolicy* collector_policy) {
292 // Ignore explicit GC's. Exiting here does not set the flag and
293 // does not reset the count. Updating of the averages for system
294 // GC's is still controlled by UseAdaptiveSizePolicyWithSystemGC.
295 if (GCCause::is_user_requested_gc(gc_cause) ||
296 GCCause::is_serviceability_requested_gc(gc_cause)) {
297 return;
298 }
299 // eden_limit is the upper limit on the size of eden based on
300 // the maximum size of the young generation and the sizes
301 // of the survivor space.
302 // The question being asked is whether the gc costs are high
303 // and the space being recovered by a collection is low.
304 // free_in_young_gen is the free space in the young generation
305 // after a collection and promo_live is the free space in the old
306 // generation after a collection.
307 //
308 // Use the minimum of the current value of the live in the
309 // young gen or the average of the live in the young gen.
310 // If the current value drops quickly, that should be taken
311 // into account (i.e., don't trigger if the amount of free
312 // space has suddenly jumped up). If the current is much
313 // higher than the average, use the average since it represents
314 // the longer term behavor.
315 const size_t live_in_eden =
316 MIN2(eden_live, (size_t) avg_eden_live()->average());
317 const size_t free_in_eden = max_eden_size > live_in_eden ?
318 max_eden_size - live_in_eden : 0;
319 const size_t free_in_old_gen = (size_t)(max_old_gen_size - avg_old_live()->average());
320 const size_t total_free_limit = free_in_old_gen + free_in_eden;
321 const size_t total_mem = max_old_gen_size + max_eden_size;
322 const double mem_free_limit = total_mem * (GCHeapFreeLimit/100.0);
323 const double mem_free_old_limit = max_old_gen_size * (GCHeapFreeLimit/100.0);
324 const double mem_free_eden_limit = max_eden_size * (GCHeapFreeLimit/100.0);
325 const double gc_cost_limit = GCTimeLimit/100.0;
326 size_t promo_limit = (size_t)(max_old_gen_size - avg_old_live()->average());
327 // But don't force a promo size below the current promo size. Otherwise,
328 // the promo size will shrink for no good reason.
329 promo_limit = MAX2(promo_limit, _promo_size);
332 if (PrintAdaptiveSizePolicy && (Verbose ||
333 (free_in_old_gen < (size_t) mem_free_old_limit &&
334 free_in_eden < (size_t) mem_free_eden_limit))) {
335 gclog_or_tty->print_cr(
336 "PSAdaptiveSizePolicy::compute_generation_free_space limits:"
337 " promo_limit: " SIZE_FORMAT
338 " max_eden_size: " SIZE_FORMAT
339 " total_free_limit: " SIZE_FORMAT
340 " max_old_gen_size: " SIZE_FORMAT
341 " max_eden_size: " SIZE_FORMAT
342 " mem_free_limit: " SIZE_FORMAT,
343 promo_limit, max_eden_size, total_free_limit,
344 max_old_gen_size, max_eden_size,
345 (size_t) mem_free_limit);
346 }
348 bool print_gc_overhead_limit_would_be_exceeded = false;
349 if (is_full_gc) {
350 if (gc_cost() > gc_cost_limit &&
351 free_in_old_gen < (size_t) mem_free_old_limit &&
352 free_in_eden < (size_t) mem_free_eden_limit) {
353 // Collections, on average, are taking too much time, and
354 // gc_cost() > gc_cost_limit
355 // we have too little space available after a full gc.
356 // total_free_limit < mem_free_limit
357 // where
358 // total_free_limit is the free space available in
359 // both generations
360 // total_mem is the total space available for allocation
361 // in both generations (survivor spaces are not included
362 // just as they are not included in eden_limit).
363 // mem_free_limit is a fraction of total_mem judged to be an
364 // acceptable amount that is still unused.
365 // The heap can ask for the value of this variable when deciding
366 // whether to thrown an OutOfMemory error.
367 // Note that the gc time limit test only works for the collections
368 // of the young gen + tenured gen and not for collections of the
369 // permanent gen. That is because the calculation of the space
370 // freed by the collection is the free space in the young gen +
371 // tenured gen.
372 // At this point the GC overhead limit is being exceeded.
373 inc_gc_overhead_limit_count();
374 if (UseGCOverheadLimit) {
375 if (gc_overhead_limit_count() >=
376 AdaptiveSizePolicyGCTimeLimitThreshold){
377 // All conditions have been met for throwing an out-of-memory
378 set_gc_overhead_limit_exceeded(true);
379 // Avoid consecutive OOM due to the gc time limit by resetting
380 // the counter.
381 reset_gc_overhead_limit_count();
382 } else {
383 // The required consecutive collections which exceed the
384 // GC time limit may or may not have been reached. We
385 // are approaching that condition and so as not to
386 // throw an out-of-memory before all SoftRef's have been
387 // cleared, set _should_clear_all_soft_refs in CollectorPolicy.
388 // The clearing will be done on the next GC.
389 bool near_limit = gc_overhead_limit_near();
390 if (near_limit) {
391 collector_policy->set_should_clear_all_soft_refs(true);
392 if (PrintGCDetails && Verbose) {
393 gclog_or_tty->print_cr(" Nearing GC overhead limit, "
394 "will be clearing all SoftReference");
395 }
396 }
397 }
398 }
399 // Set this even when the overhead limit will not
400 // cause an out-of-memory. Diagnostic message indicating
401 // that the overhead limit is being exceeded is sometimes
402 // printed.
403 print_gc_overhead_limit_would_be_exceeded = true;
405 } else {
406 // Did not exceed overhead limits
407 reset_gc_overhead_limit_count();
408 }
409 }
411 if (UseGCOverheadLimit && PrintGCDetails && Verbose) {
412 if (gc_overhead_limit_exceeded()) {
413 gclog_or_tty->print_cr(" GC is exceeding overhead limit "
414 "of %d%%", GCTimeLimit);
415 reset_gc_overhead_limit_count();
416 } else if (print_gc_overhead_limit_would_be_exceeded) {
417 assert(gc_overhead_limit_count() > 0, "Should not be printing");
418 gclog_or_tty->print_cr(" GC would exceed overhead limit "
419 "of %d%% %d consecutive time(s)",
420 GCTimeLimit, gc_overhead_limit_count());
421 }
422 }
423 }
424 // Printing
426 bool AdaptiveSizePolicy::print_adaptive_size_policy_on(outputStream* st) const {
428 // Should only be used with adaptive size policy turned on.
429 // Otherwise, there may be variables that are undefined.
430 if (!UseAdaptiveSizePolicy) return false;
432 // Print goal for which action is needed.
433 char* action = NULL;
434 bool change_for_pause = false;
435 if ((change_old_gen_for_maj_pauses() ==
436 decrease_old_gen_for_maj_pauses_true) ||
437 (change_young_gen_for_min_pauses() ==
438 decrease_young_gen_for_min_pauses_true)) {
439 action = (char*) " *** pause time goal ***";
440 change_for_pause = true;
441 } else if ((change_old_gen_for_throughput() ==
442 increase_old_gen_for_throughput_true) ||
443 (change_young_gen_for_throughput() ==
444 increase_young_gen_for_througput_true)) {
445 action = (char*) " *** throughput goal ***";
446 } else if (decrease_for_footprint()) {
447 action = (char*) " *** reduced footprint ***";
448 } else {
449 // No actions were taken. This can legitimately be the
450 // situation if not enough data has been gathered to make
451 // decisions.
452 return false;
453 }
455 // Pauses
456 // Currently the size of the old gen is only adjusted to
457 // change the major pause times.
458 char* young_gen_action = NULL;
459 char* tenured_gen_action = NULL;
461 char* shrink_msg = (char*) "(attempted to shrink)";
462 char* grow_msg = (char*) "(attempted to grow)";
463 char* no_change_msg = (char*) "(no change)";
464 if (change_young_gen_for_min_pauses() ==
465 decrease_young_gen_for_min_pauses_true) {
466 young_gen_action = shrink_msg;
467 } else if (change_for_pause) {
468 young_gen_action = no_change_msg;
469 }
471 if (change_old_gen_for_maj_pauses() == decrease_old_gen_for_maj_pauses_true) {
472 tenured_gen_action = shrink_msg;
473 } else if (change_for_pause) {
474 tenured_gen_action = no_change_msg;
475 }
477 // Throughput
478 if (change_old_gen_for_throughput() == increase_old_gen_for_throughput_true) {
479 assert(change_young_gen_for_throughput() ==
480 increase_young_gen_for_througput_true,
481 "Both generations should be growing");
482 young_gen_action = grow_msg;
483 tenured_gen_action = grow_msg;
484 } else if (change_young_gen_for_throughput() ==
485 increase_young_gen_for_througput_true) {
486 // Only the young generation may grow at start up (before
487 // enough full collections have been done to grow the old generation).
488 young_gen_action = grow_msg;
489 tenured_gen_action = no_change_msg;
490 }
492 // Minimum footprint
493 if (decrease_for_footprint() != 0) {
494 young_gen_action = shrink_msg;
495 tenured_gen_action = shrink_msg;
496 }
498 st->print_cr(" UseAdaptiveSizePolicy actions to meet %s", action);
499 st->print_cr(" GC overhead (%%)");
500 st->print_cr(" Young generation: %7.2f\t %s",
501 100.0 * avg_minor_gc_cost()->average(),
502 young_gen_action);
503 st->print_cr(" Tenured generation: %7.2f\t %s",
504 100.0 * avg_major_gc_cost()->average(),
505 tenured_gen_action);
506 return true;
507 }
509 bool AdaptiveSizePolicy::print_adaptive_size_policy_on(
510 outputStream* st,
511 int tenuring_threshold_arg) const {
512 if (!AdaptiveSizePolicy::print_adaptive_size_policy_on(st)) {
513 return false;
514 }
516 // Tenuring threshold
517 bool tenuring_threshold_changed = true;
518 if (decrement_tenuring_threshold_for_survivor_limit()) {
519 st->print(" Tenuring threshold: (attempted to decrease to avoid"
520 " survivor space overflow) = ");
521 } else if (decrement_tenuring_threshold_for_gc_cost()) {
522 st->print(" Tenuring threshold: (attempted to decrease to balance"
523 " GC costs) = ");
524 } else if (increment_tenuring_threshold_for_gc_cost()) {
525 st->print(" Tenuring threshold: (attempted to increase to balance"
526 " GC costs) = ");
527 } else {
528 tenuring_threshold_changed = false;
529 assert(!tenuring_threshold_change(), "(no change was attempted)");
530 }
531 if (tenuring_threshold_changed) {
532 st->print_cr("%d", tenuring_threshold_arg);
533 }
534 return true;
535 }