1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Dec 20 20:29:35 2011 -0800 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Dec 21 07:53:53 2011 -0500 1.3 @@ -230,7 +230,9 @@ 1.4 _inc_cset_bytes_used_before(0), 1.5 _inc_cset_max_finger(NULL), 1.6 _inc_cset_recorded_rs_lengths(0), 1.7 + _inc_cset_recorded_rs_lengths_diffs(0), 1.8 _inc_cset_predicted_elapsed_time_ms(0.0), 1.9 + _inc_cset_predicted_elapsed_time_ms_diffs(0.0), 1.10 1.11 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 1.12 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 1.13 @@ -1551,10 +1553,19 @@ 1.14 } 1.15 } 1.16 1.17 - // It turns out that, sometimes, _max_rs_lengths can get smaller 1.18 - // than _recorded_rs_lengths which causes rs_length_diff to get 1.19 - // very large and mess up the RSet length predictions. We'll be 1.20 - // defensive until we work out why this happens. 1.21 + // This is defensive. For a while _max_rs_lengths could get 1.22 + // smaller than _recorded_rs_lengths which was causing 1.23 + // rs_length_diff to get very large and mess up the RSet length 1.24 + // predictions. The reason was unsafe concurrent updates to the 1.25 + // _inc_cset_recorded_rs_lengths field which the code below guards 1.26 + // against (see CR 7118202). This bug has now been fixed (see CR 1.27 + // 7119027). However, I'm still worried that 1.28 + // _inc_cset_recorded_rs_lengths might still end up somewhat 1.29 + // inaccurate. The concurrent refinement thread calculates an 1.30 + // RSet's length concurrently with other CR threads updating it 1.31 + // which might cause it to calculate the length incorrectly (if, 1.32 + // say, it's in mid-coarsening). So I'll leave in the defensive 1.33 + // conditional below just in case. 1.34 size_t rs_length_diff = 0; 1.35 if (_max_rs_lengths > _recorded_rs_lengths) { 1.36 rs_length_diff = _max_rs_lengths - _recorded_rs_lengths; 1.37 @@ -2436,10 +2447,45 @@ 1.38 1.39 _inc_cset_max_finger = 0; 1.40 _inc_cset_recorded_rs_lengths = 0; 1.41 - _inc_cset_predicted_elapsed_time_ms = 0; 1.42 + _inc_cset_recorded_rs_lengths_diffs = 0; 1.43 + _inc_cset_predicted_elapsed_time_ms = 0.0; 1.44 + _inc_cset_predicted_elapsed_time_ms_diffs = 0.0; 1.45 _inc_cset_build_state = Active; 1.46 } 1.47 1.48 +void G1CollectorPolicy::finalize_incremental_cset_building() { 1.49 + assert(_inc_cset_build_state == Active, "Precondition"); 1.50 + assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint"); 1.51 + 1.52 + // The two "main" fields, _inc_cset_recorded_rs_lengths and 1.53 + // _inc_cset_predicted_elapsed_time_ms, are updated by the thread 1.54 + // that adds a new region to the CSet. Further updates by the 1.55 + // concurrent refinement thread that samples the young RSet lengths 1.56 + // are accumulated in the *_diffs fields. Here we add the diffs to 1.57 + // the "main" fields. 1.58 + 1.59 + if (_inc_cset_recorded_rs_lengths_diffs >= 0) { 1.60 + _inc_cset_recorded_rs_lengths += _inc_cset_recorded_rs_lengths_diffs; 1.61 + } else { 1.62 + // This is defensive. The diff should in theory be always positive 1.63 + // as RSets can only grow between GCs. However, given that we 1.64 + // sample their size concurrently with other threads updating them 1.65 + // it's possible that we might get the wrong size back, which 1.66 + // could make the calculations somewhat inaccurate. 1.67 + size_t diffs = (size_t) (-_inc_cset_recorded_rs_lengths_diffs); 1.68 + if (_inc_cset_recorded_rs_lengths >= diffs) { 1.69 + _inc_cset_recorded_rs_lengths -= diffs; 1.70 + } else { 1.71 + _inc_cset_recorded_rs_lengths = 0; 1.72 + } 1.73 + } 1.74 + _inc_cset_predicted_elapsed_time_ms += 1.75 + _inc_cset_predicted_elapsed_time_ms_diffs; 1.76 + 1.77 + _inc_cset_recorded_rs_lengths_diffs = 0; 1.78 + _inc_cset_predicted_elapsed_time_ms_diffs = 0.0; 1.79 +} 1.80 + 1.81 void G1CollectorPolicy::add_to_incremental_cset_info(HeapRegion* hr, size_t rs_length) { 1.82 // This routine is used when: 1.83 // * adding survivor regions to the incremental cset at the end of an 1.84 @@ -2455,10 +2501,8 @@ 1.85 1.86 double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true); 1.87 size_t used_bytes = hr->used(); 1.88 - 1.89 _inc_cset_recorded_rs_lengths += rs_length; 1.90 _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms; 1.91 - 1.92 _inc_cset_bytes_used_before += used_bytes; 1.93 1.94 // Cache the values we have added to the aggregated informtion 1.95 @@ -2469,37 +2513,33 @@ 1.96 hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms); 1.97 } 1.98 1.99 -void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) { 1.100 - // This routine is currently only called as part of the updating of 1.101 - // existing policy information for regions in the incremental cset that 1.102 - // is performed by the concurrent refine thread(s) as part of young list 1.103 - // RSet sampling. Therefore we should not be at a safepoint. 1.104 - 1.105 - assert(!SafepointSynchronize::is_at_safepoint(), "should not be at safepoint"); 1.106 - assert(hr->is_young(), "it should be"); 1.107 - 1.108 - size_t used_bytes = hr->used(); 1.109 - size_t old_rs_length = hr->recorded_rs_length(); 1.110 +void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, 1.111 + size_t new_rs_length) { 1.112 + // Update the CSet information that is dependent on the new RS length 1.113 + assert(hr->is_young(), "Precondition"); 1.114 + assert(!SafepointSynchronize::is_at_safepoint(), 1.115 + "should not be at a safepoint"); 1.116 + 1.117 + // We could have updated _inc_cset_recorded_rs_lengths and 1.118 + // _inc_cset_predicted_elapsed_time_ms directly but we'd need to do 1.119 + // that atomically, as this code is executed by a concurrent 1.120 + // refinement thread, potentially concurrently with a mutator thread 1.121 + // allocating a new region and also updating the same fields. To 1.122 + // avoid the atomic operations we accumulate these updates on two 1.123 + // separate fields (*_diffs) and we'll just add them to the "main" 1.124 + // fields at the start of a GC. 1.125 + 1.126 + ssize_t old_rs_length = (ssize_t) hr->recorded_rs_length(); 1.127 + ssize_t rs_lengths_diff = (ssize_t) new_rs_length - old_rs_length; 1.128 + _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff; 1.129 + 1.130 double old_elapsed_time_ms = hr->predicted_elapsed_time_ms(); 1.131 - 1.132 - // Subtract the old recorded/predicted policy information for 1.133 - // the given heap region from the collection set info. 1.134 - _inc_cset_recorded_rs_lengths -= old_rs_length; 1.135 - _inc_cset_predicted_elapsed_time_ms -= old_elapsed_time_ms; 1.136 - 1.137 - _inc_cset_bytes_used_before -= used_bytes; 1.138 - 1.139 - // Clear the values cached in the heap region 1.140 - hr->set_recorded_rs_length(0); 1.141 - hr->set_predicted_elapsed_time_ms(0); 1.142 -} 1.143 - 1.144 -void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) { 1.145 - // Update the collection set information that is dependent on the new RS length 1.146 - assert(hr->is_young(), "Precondition"); 1.147 - 1.148 - remove_from_incremental_cset_info(hr); 1.149 - add_to_incremental_cset_info(hr, new_rs_length); 1.150 + double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true); 1.151 + double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms; 1.152 + _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff; 1.153 + 1.154 + hr->set_recorded_rs_length(new_rs_length); 1.155 + hr->set_predicted_elapsed_time_ms(new_region_elapsed_time_ms); 1.156 } 1.157 1.158 void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) { 1.159 @@ -2591,6 +2631,7 @@ 1.160 double non_young_start_time_sec = os::elapsedTime(); 1.161 1.162 YoungList* young_list = _g1->young_list(); 1.163 + finalize_incremental_cset_building(); 1.164 1.165 guarantee(target_pause_time_ms > 0.0, 1.166 err_msg("target_pause_time_ms = %1.6lf should be positive",