7068240: G1: Long "parallel other time" and "ext root scanning" when running specific benchmark

Mon, 01 Aug 2011 10:04:28 -0700

author
johnc
date
Mon, 01 Aug 2011 10:04:28 -0700
changeset 3021
14a2fd14c0db
parent 3019
8107273fd204
child 3022
6aa4feb8a366

7068240: G1: Long "parallel other time" and "ext root scanning" when running specific benchmark
Summary: In root processing, move the scanning of the reference processor's discovered lists to before RSet updating and scanning. When scanning the reference processor's discovered lists, use a buffering closure so that the time spent copying any reference object is correctly attributed. Also removed a couple of unused and irrelevant timers.
Reviewed-by: ysr, jmasa

src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Jul 23 10:42:20 2011 -0400
     1.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 01 10:04:28 2011 -0700
     1.3 @@ -4834,6 +4834,7 @@
     1.4                                    scan_perm_cl,
     1.5                                    i);
     1.6      pss.end_strong_roots();
     1.7 +
     1.8      {
     1.9        double start = os::elapsedTime();
    1.10        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
    1.11 @@ -4890,41 +4891,43 @@
    1.12                         &eager_scan_code_roots,
    1.13                         &buf_scan_perm);
    1.14  
    1.15 -  // Finish up any enqueued closure apps.
    1.16 -  buf_scan_non_heap_roots.done();
    1.17 -  buf_scan_perm.done();
    1.18 -  double ext_roots_end = os::elapsedTime();
    1.19 -  g1_policy()->reset_obj_copy_time(worker_i);
    1.20 -  double obj_copy_time_sec =
    1.21 -    buf_scan_non_heap_roots.closure_app_seconds() +
    1.22 -    buf_scan_perm.closure_app_seconds();
    1.23 -  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
    1.24 -  double ext_root_time_ms =
    1.25 -    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
    1.26 -  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
    1.27 -
    1.28 -  // Scan strong roots in mark stack.
    1.29 -  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
    1.30 -    concurrent_mark()->oops_do(scan_non_heap_roots);
    1.31 -  }
    1.32 -  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
    1.33 -  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
    1.34 -
    1.35 -  // XXX What should this be doing in the parallel case?
    1.36 -  g1_policy()->record_collection_pause_end_CH_strong_roots();
    1.37 -  // Now scan the complement of the collection set.
    1.38 -  if (scan_rs != NULL) {
    1.39 -    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
    1.40 -  }
    1.41 -  // Finish with the ref_processor roots.
    1.42 +  // Now the ref_processor roots.
    1.43    if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
    1.44      // We need to treat the discovered reference lists as roots and
    1.45      // keep entries (which are added by the marking threads) on them
    1.46      // live until they can be processed at the end of marking.
    1.47 -    ref_processor()->weak_oops_do(scan_non_heap_roots);
    1.48 -    ref_processor()->oops_do(scan_non_heap_roots);
    1.49 -  }
    1.50 -  g1_policy()->record_collection_pause_end_G1_strong_roots();
    1.51 +    ref_processor()->weak_oops_do(&buf_scan_non_heap_roots);
    1.52 +    ref_processor()->oops_do(&buf_scan_non_heap_roots);
    1.53 +  }
    1.54 +
    1.55 +  // Finish up any enqueued closure apps (attributed as object copy time).
    1.56 +  buf_scan_non_heap_roots.done();
    1.57 +  buf_scan_perm.done();
    1.58 +
    1.59 +  double ext_roots_end = os::elapsedTime();
    1.60 +
    1.61 +  g1_policy()->reset_obj_copy_time(worker_i);
    1.62 +  double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() +
    1.63 +                                buf_scan_non_heap_roots.closure_app_seconds();
    1.64 +  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
    1.65 +
    1.66 +  double ext_root_time_ms =
    1.67 +    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
    1.68 +
    1.69 +  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
    1.70 +
    1.71 +  // Scan strong roots in mark stack.
    1.72 +  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
    1.73 +    concurrent_mark()->oops_do(scan_non_heap_roots);
    1.74 +  }
    1.75 +  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
    1.76 +  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
    1.77 +
    1.78 +  // Now scan the complement of the collection set.
    1.79 +  if (scan_rs != NULL) {
    1.80 +    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
    1.81 +  }
    1.82 +
    1.83    _process_strong_tasks->all_tasks_completed();
    1.84  }
    1.85  
     2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat Jul 23 10:42:20 2011 -0400
     2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Aug 01 10:04:28 2011 -0700
     2.3 @@ -393,8 +393,7 @@
     2.4    // Perform finalization stuff on all allocation regions.
     2.5    void retire_all_alloc_regions();
     2.6  
     2.7 -  // The number of regions allocated to hold humongous objects.
     2.8 -  int         _num_humongous_regions;
     2.9 +  // The young region list.
    2.10    YoungList*  _young_list;
    2.11  
    2.12    // The current policy object for the collector.
     3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sat Jul 23 10:42:20 2011 -0400
     3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Aug 01 10:04:28 2011 -0700
     3.3 @@ -134,13 +134,10 @@
     3.4  
     3.5  G1CollectorPolicy::G1CollectorPolicy() :
     3.6    _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
     3.7 -    ? ParallelGCThreads : 1),
     3.8 -
     3.9 +                        ? ParallelGCThreads : 1),
    3.10  
    3.11    _n_pauses(0),
    3.12 -  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.13 -  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.14 -  _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.15 +  _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.16    _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.17    _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.18    _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    3.19 @@ -1050,18 +1047,6 @@
    3.20  void G1CollectorPolicy::record_concurrent_pause_end() {
    3.21  }
    3.22  
    3.23 -void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
    3.24 -  _cur_CH_strong_roots_end_sec = os::elapsedTime();
    3.25 -  _cur_CH_strong_roots_dur_ms =
    3.26 -    (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
    3.27 -}
    3.28 -
    3.29 -void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
    3.30 -  _cur_G1_strong_roots_end_sec = os::elapsedTime();
    3.31 -  _cur_G1_strong_roots_dur_ms =
    3.32 -    (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
    3.33 -}
    3.34 -
    3.35  template<class T>
    3.36  T sum_of(T* sum_arr, int start, int n, int N) {
    3.37    T sum = (T)0;
    3.38 @@ -1183,7 +1168,6 @@
    3.39    double end_time_sec = os::elapsedTime();
    3.40    double elapsed_ms = _last_pause_time_ms;
    3.41    bool parallel = G1CollectedHeap::use_parallel_gc_threads();
    3.42 -  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
    3.43    size_t rs_size =
    3.44      _cur_collection_pause_used_regions_at_start - collection_set_size();
    3.45    size_t cur_used_bytes = _g1->used();
    3.46 @@ -1256,14 +1240,52 @@
    3.47  
    3.48    _n_pauses++;
    3.49  
    3.50 +  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
    3.51 +  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
    3.52 +  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
    3.53 +  double update_rs_processed_buffers =
    3.54 +    sum_of_values(_par_last_update_rs_processed_buffers);
    3.55 +  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
    3.56 +  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
    3.57 +  double termination_time = avg_value(_par_last_termination_times_ms);
    3.58 +
    3.59 +  double parallel_known_time = update_rs_time +
    3.60 +                               ext_root_scan_time +
    3.61 +                               mark_stack_scan_time +
    3.62 +                               scan_rs_time +
    3.63 +                               obj_copy_time +
    3.64 +                               termination_time;
    3.65 +
    3.66 +  double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
    3.67 +
    3.68 +  PauseSummary* summary = _summary;
    3.69 +
    3.70    if (update_stats) {
    3.71 -    _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
    3.72 -    _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
    3.73 -    _recent_evac_times_ms->add(evac_ms);
    3.74 +    _recent_rs_scan_times_ms->add(scan_rs_time);
    3.75      _recent_pause_times_ms->add(elapsed_ms);
    3.76 -
    3.77      _recent_rs_sizes->add(rs_size);
    3.78  
    3.79 +    MainBodySummary* body_summary = summary->main_body_summary();
    3.80 +    guarantee(body_summary != NULL, "should not be null!");
    3.81 +
    3.82 +    if (_satb_drain_time_set)
    3.83 +      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
    3.84 +    else
    3.85 +      body_summary->record_satb_drain_time_ms(0.0);
    3.86 +
    3.87 +    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
    3.88 +    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
    3.89 +    body_summary->record_update_rs_time_ms(update_rs_time);
    3.90 +    body_summary->record_scan_rs_time_ms(scan_rs_time);
    3.91 +    body_summary->record_obj_copy_time_ms(obj_copy_time);
    3.92 +    if (parallel) {
    3.93 +      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
    3.94 +      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
    3.95 +      body_summary->record_termination_time_ms(termination_time);
    3.96 +      body_summary->record_parallel_other_time_ms(parallel_other_time);
    3.97 +    }
    3.98 +    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
    3.99 +
   3.100      // We exempt parallel collection from this check because Alloc Buffer
   3.101      // fragmentation can produce negative collections.  Same with evac
   3.102      // failure.
   3.103 @@ -1328,56 +1350,12 @@
   3.104      gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   3.105    }
   3.106  
   3.107 -  PauseSummary* summary = _summary;
   3.108 -
   3.109 -  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
   3.110 -  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
   3.111 -  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   3.112 -  double update_rs_processed_buffers =
   3.113 -    sum_of_values(_par_last_update_rs_processed_buffers);
   3.114 -  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
   3.115 -  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
   3.116 -  double termination_time = avg_value(_par_last_termination_times_ms);
   3.117 -
   3.118 -  double parallel_other_time = _cur_collection_par_time_ms -
   3.119 -    (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
   3.120 -     scan_rs_time + obj_copy_time + termination_time);
   3.121 -  if (update_stats) {
   3.122 -    MainBodySummary* body_summary = summary->main_body_summary();
   3.123 -    guarantee(body_summary != NULL, "should not be null!");
   3.124 -
   3.125 -    if (_satb_drain_time_set)
   3.126 -      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
   3.127 -    else
   3.128 -      body_summary->record_satb_drain_time_ms(0.0);
   3.129 -    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
   3.130 -    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
   3.131 -    body_summary->record_update_rs_time_ms(update_rs_time);
   3.132 -    body_summary->record_scan_rs_time_ms(scan_rs_time);
   3.133 -    body_summary->record_obj_copy_time_ms(obj_copy_time);
   3.134 -    if (parallel) {
   3.135 -      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
   3.136 -      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
   3.137 -      body_summary->record_termination_time_ms(termination_time);
   3.138 -      body_summary->record_parallel_other_time_ms(parallel_other_time);
   3.139 -    }
   3.140 -    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
   3.141 -  }
   3.142 -
   3.143    if (G1PolicyVerbose > 1) {
   3.144      gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
   3.145 -                           "        CH Strong: %10.6f ms    (avg: %10.6f ms)\n"
   3.146 -                           "        G1 Strong: %10.6f ms    (avg: %10.6f ms)\n"
   3.147 -                           "        Evac:      %10.6f ms    (avg: %10.6f ms)\n"
   3.148                             "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
   3.149                             "      |RS|: " SIZE_FORMAT,
   3.150                             elapsed_ms, recent_avg_time_for_pauses_ms(),
   3.151 -                           _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
   3.152 -                           _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
   3.153 -                           evac_ms, recent_avg_time_for_evac_ms(),
   3.154 -                           scan_rs_time,
   3.155 -                           recent_avg_time_for_pauses_ms() -
   3.156 -                           recent_avg_time_for_G1_strong_ms(),
   3.157 +                           scan_rs_time, recent_avg_time_for_rs_scan_ms(),
   3.158                             rs_size);
   3.159  
   3.160      gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
   3.161 @@ -1438,7 +1416,7 @@
   3.162        }
   3.163        print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
   3.164  
   3.165 -      print_stats(2, "Other", parallel_other_time);
   3.166 +      print_stats(2, "Parallel Other", parallel_other_time);
   3.167        print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   3.168      } else {
   3.169        print_stats(1, "Update RS", update_rs_time);
   3.170 @@ -1967,38 +1945,27 @@
   3.171  }
   3.172  
   3.173  double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
   3.174 -  if (_recent_pause_times_ms->num() == 0) return (double) MaxGCPauseMillis;
   3.175 -  else return _recent_pause_times_ms->avg();
   3.176 +  if (_recent_pause_times_ms->num() == 0) {
   3.177 +    return (double) MaxGCPauseMillis;
   3.178 +  }
   3.179 +  return _recent_pause_times_ms->avg();
   3.180  }
   3.181  
   3.182 -double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
   3.183 -  if (_recent_CH_strong_roots_times_ms->num() == 0)
   3.184 +double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() {
   3.185 +  if (_recent_rs_scan_times_ms->num() == 0) {
   3.186      return (double)MaxGCPauseMillis/3.0;
   3.187 -  else return _recent_CH_strong_roots_times_ms->avg();
   3.188 +  }
   3.189 +  return _recent_rs_scan_times_ms->avg();
   3.190  }
   3.191  
   3.192 -double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
   3.193 -  if (_recent_G1_strong_roots_times_ms->num() == 0)
   3.194 -    return (double)MaxGCPauseMillis/3.0;
   3.195 -  else return _recent_G1_strong_roots_times_ms->avg();
   3.196 -}
   3.197 -
   3.198 -double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
   3.199 -  if (_recent_evac_times_ms->num() == 0) return (double)MaxGCPauseMillis/3.0;
   3.200 -  else return _recent_evac_times_ms->avg();
   3.201 -}
   3.202 -
   3.203  int G1CollectorPolicy::number_of_recent_gcs() {
   3.204 -  assert(_recent_CH_strong_roots_times_ms->num() ==
   3.205 -         _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
   3.206 -  assert(_recent_G1_strong_roots_times_ms->num() ==
   3.207 -         _recent_evac_times_ms->num(), "Sequence out of sync");
   3.208 -  assert(_recent_evac_times_ms->num() ==
   3.209 +  assert(_recent_rs_scan_times_ms->num() ==
   3.210           _recent_pause_times_ms->num(), "Sequence out of sync");
   3.211    assert(_recent_pause_times_ms->num() ==
   3.212           _recent_CS_bytes_used_before->num(), "Sequence out of sync");
   3.213    assert(_recent_CS_bytes_used_before->num() ==
   3.214           _recent_CS_bytes_surviving->num(), "Sequence out of sync");
   3.215 +
   3.216    return _recent_pause_times_ms->num();
   3.217  }
   3.218  
     4.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sat Jul 23 10:42:20 2011 -0400
     4.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Aug 01 10:04:28 2011 -0700
     4.3 @@ -129,15 +129,9 @@
     4.4    jlong  _num_cc_clears;                // number of times the card count cache has been cleared
     4.5  #endif
     4.6  
     4.7 -  double _cur_CH_strong_roots_end_sec;
     4.8 -  double _cur_CH_strong_roots_dur_ms;
     4.9 -  double _cur_G1_strong_roots_end_sec;
    4.10 -  double _cur_G1_strong_roots_dur_ms;
    4.11 +  // Statistics for recent GC pauses.  See below for how indexed.
    4.12 +  TruncatedSeq* _recent_rs_scan_times_ms;
    4.13  
    4.14 -  // Statistics for recent GC pauses.  See below for how indexed.
    4.15 -  TruncatedSeq* _recent_CH_strong_roots_times_ms;
    4.16 -  TruncatedSeq* _recent_G1_strong_roots_times_ms;
    4.17 -  TruncatedSeq* _recent_evac_times_ms;
    4.18    // These exclude marking times.
    4.19    TruncatedSeq* _recent_pause_times_ms;
    4.20    TruncatedSeq* _recent_gc_times_ms;
    4.21 @@ -692,17 +686,11 @@
    4.22    // The average time in ms per collection pause, averaged over recent pauses.
    4.23    double recent_avg_time_for_pauses_ms();
    4.24  
    4.25 -  // The average time in ms for processing CollectedHeap strong roots, per
    4.26 -  // collection pause, averaged over recent pauses.
    4.27 -  double recent_avg_time_for_CH_strong_ms();
    4.28 -
    4.29 -  // The average time in ms for processing the G1 remembered set, per
    4.30 -  // pause, averaged over recent pauses.
    4.31 -  double recent_avg_time_for_G1_strong_ms();
    4.32 -
    4.33 -  // The average time in ms for "evacuating followers", per pause, averaged
    4.34 -  // over recent pauses.
    4.35 -  double recent_avg_time_for_evac_ms();
    4.36 +  // The average time in ms for RS scanning, per pause, averaged
    4.37 +  // over recent pauses. (Note the RS scanning time for a pause
    4.38 +  // is itself an average of the RS scanning time for each worker
    4.39 +  // thread.)
    4.40 +  double recent_avg_time_for_rs_scan_ms();
    4.41  
    4.42    // The number of "recent" GCs recorded in the number sequences
    4.43    int number_of_recent_gcs();
    4.44 @@ -887,9 +875,6 @@
    4.45    virtual void record_concurrent_pause();
    4.46    virtual void record_concurrent_pause_end();
    4.47  
    4.48 -  virtual void record_collection_pause_end_CH_strong_roots();
    4.49 -  virtual void record_collection_pause_end_G1_strong_roots();
    4.50 -
    4.51    virtual void record_collection_pause_end();
    4.52    void print_heap_transition();
    4.53  

mercurial