Mon, 01 Aug 2011 10:04:28 -0700
7068240: G1: Long "parallel other time" and "ext root scanning" when running specific benchmark
Summary: In root processing, move the scanning of the reference processor's discovered lists to before RSet updating and scanning. When scanning the reference processor's discovered lists, use a buffering closure so that the time spent copying any reference object is correctly attributed. Also removed a couple of unused and irrelevant timers.
Reviewed-by: ysr, jmasa
1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Sat Jul 23 10:42:20 2011 -0400 1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Mon Aug 01 10:04:28 2011 -0700 1.3 @@ -4834,6 +4834,7 @@ 1.4 scan_perm_cl, 1.5 i); 1.6 pss.end_strong_roots(); 1.7 + 1.8 { 1.9 double start = os::elapsedTime(); 1.10 G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator); 1.11 @@ -4890,41 +4891,43 @@ 1.12 &eager_scan_code_roots, 1.13 &buf_scan_perm); 1.14 1.15 - // Finish up any enqueued closure apps. 1.16 - buf_scan_non_heap_roots.done(); 1.17 - buf_scan_perm.done(); 1.18 - double ext_roots_end = os::elapsedTime(); 1.19 - g1_policy()->reset_obj_copy_time(worker_i); 1.20 - double obj_copy_time_sec = 1.21 - buf_scan_non_heap_roots.closure_app_seconds() + 1.22 - buf_scan_perm.closure_app_seconds(); 1.23 - g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); 1.24 - double ext_root_time_ms = 1.25 - ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0; 1.26 - g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms); 1.27 - 1.28 - // Scan strong roots in mark stack. 1.29 - if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) { 1.30 - concurrent_mark()->oops_do(scan_non_heap_roots); 1.31 - } 1.32 - double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0; 1.33 - g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms); 1.34 - 1.35 - // XXX What should this be doing in the parallel case? 1.36 - g1_policy()->record_collection_pause_end_CH_strong_roots(); 1.37 - // Now scan the complement of the collection set. 1.38 - if (scan_rs != NULL) { 1.39 - g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i); 1.40 - } 1.41 - // Finish with the ref_processor roots. 1.42 + // Now the ref_processor roots. 1.43 if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) { 1.44 // We need to treat the discovered reference lists as roots and 1.45 // keep entries (which are added by the marking threads) on them 1.46 // live until they can be processed at the end of marking. 1.47 - ref_processor()->weak_oops_do(scan_non_heap_roots); 1.48 - ref_processor()->oops_do(scan_non_heap_roots); 1.49 - } 1.50 - g1_policy()->record_collection_pause_end_G1_strong_roots(); 1.51 + ref_processor()->weak_oops_do(&buf_scan_non_heap_roots); 1.52 + ref_processor()->oops_do(&buf_scan_non_heap_roots); 1.53 + } 1.54 + 1.55 + // Finish up any enqueued closure apps (attributed as object copy time). 1.56 + buf_scan_non_heap_roots.done(); 1.57 + buf_scan_perm.done(); 1.58 + 1.59 + double ext_roots_end = os::elapsedTime(); 1.60 + 1.61 + g1_policy()->reset_obj_copy_time(worker_i); 1.62 + double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() + 1.63 + buf_scan_non_heap_roots.closure_app_seconds(); 1.64 + g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0); 1.65 + 1.66 + double ext_root_time_ms = 1.67 + ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0; 1.68 + 1.69 + g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms); 1.70 + 1.71 + // Scan strong roots in mark stack. 1.72 + if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) { 1.73 + concurrent_mark()->oops_do(scan_non_heap_roots); 1.74 + } 1.75 + double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0; 1.76 + g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms); 1.77 + 1.78 + // Now scan the complement of the collection set. 1.79 + if (scan_rs != NULL) { 1.80 + g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i); 1.81 + } 1.82 + 1.83 _process_strong_tasks->all_tasks_completed(); 1.84 } 1.85
2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Sat Jul 23 10:42:20 2011 -0400 2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Mon Aug 01 10:04:28 2011 -0700 2.3 @@ -393,8 +393,7 @@ 2.4 // Perform finalization stuff on all allocation regions. 2.5 void retire_all_alloc_regions(); 2.6 2.7 - // The number of regions allocated to hold humongous objects. 2.8 - int _num_humongous_regions; 2.9 + // The young region list. 2.10 YoungList* _young_list; 2.11 2.12 // The current policy object for the collector.
3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Sat Jul 23 10:42:20 2011 -0400 3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Mon Aug 01 10:04:28 2011 -0700 3.3 @@ -134,13 +134,10 @@ 3.4 3.5 G1CollectorPolicy::G1CollectorPolicy() : 3.6 _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads() 3.7 - ? ParallelGCThreads : 1), 3.8 - 3.9 + ? ParallelGCThreads : 1), 3.10 3.11 _n_pauses(0), 3.12 - _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.13 - _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.14 - _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.15 + _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.16 _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.17 _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.18 _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 3.19 @@ -1050,18 +1047,6 @@ 3.20 void G1CollectorPolicy::record_concurrent_pause_end() { 3.21 } 3.22 3.23 -void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() { 3.24 - _cur_CH_strong_roots_end_sec = os::elapsedTime(); 3.25 - _cur_CH_strong_roots_dur_ms = 3.26 - (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0; 3.27 -} 3.28 - 3.29 -void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() { 3.30 - _cur_G1_strong_roots_end_sec = os::elapsedTime(); 3.31 - _cur_G1_strong_roots_dur_ms = 3.32 - (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0; 3.33 -} 3.34 - 3.35 template<class T> 3.36 T sum_of(T* sum_arr, int start, int n, int N) { 3.37 T sum = (T)0; 3.38 @@ -1183,7 +1168,6 @@ 3.39 double end_time_sec = os::elapsedTime(); 3.40 double elapsed_ms = _last_pause_time_ms; 3.41 bool parallel = G1CollectedHeap::use_parallel_gc_threads(); 3.42 - double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0; 3.43 size_t rs_size = 3.44 _cur_collection_pause_used_regions_at_start - collection_set_size(); 3.45 size_t cur_used_bytes = _g1->used(); 3.46 @@ -1256,14 +1240,52 @@ 3.47 3.48 _n_pauses++; 3.49 3.50 + double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); 3.51 + double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms); 3.52 + double update_rs_time = avg_value(_par_last_update_rs_times_ms); 3.53 + double update_rs_processed_buffers = 3.54 + sum_of_values(_par_last_update_rs_processed_buffers); 3.55 + double scan_rs_time = avg_value(_par_last_scan_rs_times_ms); 3.56 + double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); 3.57 + double termination_time = avg_value(_par_last_termination_times_ms); 3.58 + 3.59 + double parallel_known_time = update_rs_time + 3.60 + ext_root_scan_time + 3.61 + mark_stack_scan_time + 3.62 + scan_rs_time + 3.63 + obj_copy_time + 3.64 + termination_time; 3.65 + 3.66 + double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time; 3.67 + 3.68 + PauseSummary* summary = _summary; 3.69 + 3.70 if (update_stats) { 3.71 - _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms); 3.72 - _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms); 3.73 - _recent_evac_times_ms->add(evac_ms); 3.74 + _recent_rs_scan_times_ms->add(scan_rs_time); 3.75 _recent_pause_times_ms->add(elapsed_ms); 3.76 - 3.77 _recent_rs_sizes->add(rs_size); 3.78 3.79 + MainBodySummary* body_summary = summary->main_body_summary(); 3.80 + guarantee(body_summary != NULL, "should not be null!"); 3.81 + 3.82 + if (_satb_drain_time_set) 3.83 + body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); 3.84 + else 3.85 + body_summary->record_satb_drain_time_ms(0.0); 3.86 + 3.87 + body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); 3.88 + body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time); 3.89 + body_summary->record_update_rs_time_ms(update_rs_time); 3.90 + body_summary->record_scan_rs_time_ms(scan_rs_time); 3.91 + body_summary->record_obj_copy_time_ms(obj_copy_time); 3.92 + if (parallel) { 3.93 + body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); 3.94 + body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); 3.95 + body_summary->record_termination_time_ms(termination_time); 3.96 + body_summary->record_parallel_other_time_ms(parallel_other_time); 3.97 + } 3.98 + body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); 3.99 + 3.100 // We exempt parallel collection from this check because Alloc Buffer 3.101 // fragmentation can produce negative collections. Same with evac 3.102 // failure. 3.103 @@ -1328,56 +1350,12 @@ 3.104 gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); 3.105 } 3.106 3.107 - PauseSummary* summary = _summary; 3.108 - 3.109 - double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); 3.110 - double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms); 3.111 - double update_rs_time = avg_value(_par_last_update_rs_times_ms); 3.112 - double update_rs_processed_buffers = 3.113 - sum_of_values(_par_last_update_rs_processed_buffers); 3.114 - double scan_rs_time = avg_value(_par_last_scan_rs_times_ms); 3.115 - double obj_copy_time = avg_value(_par_last_obj_copy_times_ms); 3.116 - double termination_time = avg_value(_par_last_termination_times_ms); 3.117 - 3.118 - double parallel_other_time = _cur_collection_par_time_ms - 3.119 - (update_rs_time + ext_root_scan_time + mark_stack_scan_time + 3.120 - scan_rs_time + obj_copy_time + termination_time); 3.121 - if (update_stats) { 3.122 - MainBodySummary* body_summary = summary->main_body_summary(); 3.123 - guarantee(body_summary != NULL, "should not be null!"); 3.124 - 3.125 - if (_satb_drain_time_set) 3.126 - body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms); 3.127 - else 3.128 - body_summary->record_satb_drain_time_ms(0.0); 3.129 - body_summary->record_ext_root_scan_time_ms(ext_root_scan_time); 3.130 - body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time); 3.131 - body_summary->record_update_rs_time_ms(update_rs_time); 3.132 - body_summary->record_scan_rs_time_ms(scan_rs_time); 3.133 - body_summary->record_obj_copy_time_ms(obj_copy_time); 3.134 - if (parallel) { 3.135 - body_summary->record_parallel_time_ms(_cur_collection_par_time_ms); 3.136 - body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms); 3.137 - body_summary->record_termination_time_ms(termination_time); 3.138 - body_summary->record_parallel_other_time_ms(parallel_other_time); 3.139 - } 3.140 - body_summary->record_mark_closure_time_ms(_mark_closure_time_ms); 3.141 - } 3.142 - 3.143 if (G1PolicyVerbose > 1) { 3.144 gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n" 3.145 - " CH Strong: %10.6f ms (avg: %10.6f ms)\n" 3.146 - " G1 Strong: %10.6f ms (avg: %10.6f ms)\n" 3.147 - " Evac: %10.6f ms (avg: %10.6f ms)\n" 3.148 " ET-RS: %10.6f ms (avg: %10.6f ms)\n" 3.149 " |RS|: " SIZE_FORMAT, 3.150 elapsed_ms, recent_avg_time_for_pauses_ms(), 3.151 - _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(), 3.152 - _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(), 3.153 - evac_ms, recent_avg_time_for_evac_ms(), 3.154 - scan_rs_time, 3.155 - recent_avg_time_for_pauses_ms() - 3.156 - recent_avg_time_for_G1_strong_ms(), 3.157 + scan_rs_time, recent_avg_time_for_rs_scan_ms(), 3.158 rs_size); 3.159 3.160 gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K" 3.161 @@ -1438,7 +1416,7 @@ 3.162 } 3.163 print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms); 3.164 3.165 - print_stats(2, "Other", parallel_other_time); 3.166 + print_stats(2, "Parallel Other", parallel_other_time); 3.167 print_stats(1, "Clear CT", _cur_clear_ct_time_ms); 3.168 } else { 3.169 print_stats(1, "Update RS", update_rs_time); 3.170 @@ -1967,38 +1945,27 @@ 3.171 } 3.172 3.173 double G1CollectorPolicy::recent_avg_time_for_pauses_ms() { 3.174 - if (_recent_pause_times_ms->num() == 0) return (double) MaxGCPauseMillis; 3.175 - else return _recent_pause_times_ms->avg(); 3.176 + if (_recent_pause_times_ms->num() == 0) { 3.177 + return (double) MaxGCPauseMillis; 3.178 + } 3.179 + return _recent_pause_times_ms->avg(); 3.180 } 3.181 3.182 -double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() { 3.183 - if (_recent_CH_strong_roots_times_ms->num() == 0) 3.184 +double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() { 3.185 + if (_recent_rs_scan_times_ms->num() == 0) { 3.186 return (double)MaxGCPauseMillis/3.0; 3.187 - else return _recent_CH_strong_roots_times_ms->avg(); 3.188 + } 3.189 + return _recent_rs_scan_times_ms->avg(); 3.190 } 3.191 3.192 -double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() { 3.193 - if (_recent_G1_strong_roots_times_ms->num() == 0) 3.194 - return (double)MaxGCPauseMillis/3.0; 3.195 - else return _recent_G1_strong_roots_times_ms->avg(); 3.196 -} 3.197 - 3.198 -double G1CollectorPolicy::recent_avg_time_for_evac_ms() { 3.199 - if (_recent_evac_times_ms->num() == 0) return (double)MaxGCPauseMillis/3.0; 3.200 - else return _recent_evac_times_ms->avg(); 3.201 -} 3.202 - 3.203 int G1CollectorPolicy::number_of_recent_gcs() { 3.204 - assert(_recent_CH_strong_roots_times_ms->num() == 3.205 - _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync"); 3.206 - assert(_recent_G1_strong_roots_times_ms->num() == 3.207 - _recent_evac_times_ms->num(), "Sequence out of sync"); 3.208 - assert(_recent_evac_times_ms->num() == 3.209 + assert(_recent_rs_scan_times_ms->num() == 3.210 _recent_pause_times_ms->num(), "Sequence out of sync"); 3.211 assert(_recent_pause_times_ms->num() == 3.212 _recent_CS_bytes_used_before->num(), "Sequence out of sync"); 3.213 assert(_recent_CS_bytes_used_before->num() == 3.214 _recent_CS_bytes_surviving->num(), "Sequence out of sync"); 3.215 + 3.216 return _recent_pause_times_ms->num(); 3.217 } 3.218
4.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Sat Jul 23 10:42:20 2011 -0400 4.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Mon Aug 01 10:04:28 2011 -0700 4.3 @@ -129,15 +129,9 @@ 4.4 jlong _num_cc_clears; // number of times the card count cache has been cleared 4.5 #endif 4.6 4.7 - double _cur_CH_strong_roots_end_sec; 4.8 - double _cur_CH_strong_roots_dur_ms; 4.9 - double _cur_G1_strong_roots_end_sec; 4.10 - double _cur_G1_strong_roots_dur_ms; 4.11 + // Statistics for recent GC pauses. See below for how indexed. 4.12 + TruncatedSeq* _recent_rs_scan_times_ms; 4.13 4.14 - // Statistics for recent GC pauses. See below for how indexed. 4.15 - TruncatedSeq* _recent_CH_strong_roots_times_ms; 4.16 - TruncatedSeq* _recent_G1_strong_roots_times_ms; 4.17 - TruncatedSeq* _recent_evac_times_ms; 4.18 // These exclude marking times. 4.19 TruncatedSeq* _recent_pause_times_ms; 4.20 TruncatedSeq* _recent_gc_times_ms; 4.21 @@ -692,17 +686,11 @@ 4.22 // The average time in ms per collection pause, averaged over recent pauses. 4.23 double recent_avg_time_for_pauses_ms(); 4.24 4.25 - // The average time in ms for processing CollectedHeap strong roots, per 4.26 - // collection pause, averaged over recent pauses. 4.27 - double recent_avg_time_for_CH_strong_ms(); 4.28 - 4.29 - // The average time in ms for processing the G1 remembered set, per 4.30 - // pause, averaged over recent pauses. 4.31 - double recent_avg_time_for_G1_strong_ms(); 4.32 - 4.33 - // The average time in ms for "evacuating followers", per pause, averaged 4.34 - // over recent pauses. 4.35 - double recent_avg_time_for_evac_ms(); 4.36 + // The average time in ms for RS scanning, per pause, averaged 4.37 + // over recent pauses. (Note the RS scanning time for a pause 4.38 + // is itself an average of the RS scanning time for each worker 4.39 + // thread.) 4.40 + double recent_avg_time_for_rs_scan_ms(); 4.41 4.42 // The number of "recent" GCs recorded in the number sequences 4.43 int number_of_recent_gcs(); 4.44 @@ -887,9 +875,6 @@ 4.45 virtual void record_concurrent_pause(); 4.46 virtual void record_concurrent_pause_end(); 4.47 4.48 - virtual void record_collection_pause_end_CH_strong_roots(); 4.49 - virtual void record_collection_pause_end_G1_strong_roots(); 4.50 - 4.51 virtual void record_collection_pause_end(); 4.52 void print_heap_transition(); 4.53