7143511: G1: Another instance of high GC Worker Other time (50ms)

Tue, 13 Mar 2012 11:05:32 -0700

author
johnc
date
Tue, 13 Mar 2012 11:05:32 -0700
changeset 3689
500023bd0818
parent 3675
9a9bb0010c91
child 3690
748051fd24ce

7143511: G1: Another instance of high GC Worker Other time (50ms)
Summary: Tiered compilation has increased the number of nmethods in the code cache. This has, in turn, significantly increased the number of marked nmethods processed during the StrongRootsScope destructor. Create a specialized version of CodeBlobToOopClosure for G1 which places only those nmethods that contain pointers into the collection set on to the marked nmethods list.
Reviewed-by: iveresov, tonyp

src/share/vm/code/nmethod.hpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp file | annotate | diff | comparison | revisions
src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/code/nmethod.hpp	Tue Mar 27 10:29:59 2012 +0200
     1.2 +++ b/src/share/vm/code/nmethod.hpp	Tue Mar 13 11:05:32 2012 -0700
     1.3 @@ -553,7 +553,7 @@
     1.4    static void oops_do_marking_prologue();
     1.5    static void oops_do_marking_epilogue();
     1.6    static bool oops_do_marking_is_active() { return _oops_do_mark_nmethods != NULL; }
     1.7 -  DEBUG_ONLY(bool test_oops_do_mark() { return _oops_do_mark_link != NULL; })
     1.8 +  bool test_oops_do_mark() { return _oops_do_mark_link != NULL; }
     1.9  
    1.10    // ScopeDesc for an instruction
    1.11    ScopeDesc* scope_desc_at(address pc);
     2.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 27 10:29:59 2012 +0200
     2.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Tue Mar 13 11:05:32 2012 -0700
     2.3 @@ -4675,67 +4675,74 @@
     2.4      double start_time_ms = os::elapsedTime() * 1000.0;
     2.5      _g1h->g1_policy()->record_gc_worker_start_time(worker_id, start_time_ms);
     2.6  
     2.7 -    ResourceMark rm;
     2.8 -    HandleMark   hm;
     2.9 -
    2.10 -    ReferenceProcessor*             rp = _g1h->ref_processor_stw();
    2.11 -
    2.12 -    G1ParScanThreadState            pss(_g1h, worker_id);
    2.13 -    G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
    2.14 -    G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
    2.15 -    G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
    2.16 -
    2.17 -    pss.set_evac_closure(&scan_evac_cl);
    2.18 -    pss.set_evac_failure_closure(&evac_failure_cl);
    2.19 -    pss.set_partial_scan_closure(&partial_scan_cl);
    2.20 -
    2.21 -    G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
    2.22 -    G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
    2.23 -
    2.24 -    G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
    2.25 -    G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
    2.26 -
    2.27 -    OopClosure*                    scan_root_cl = &only_scan_root_cl;
    2.28 -    OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
    2.29 -
    2.30 -    if (_g1h->g1_policy()->during_initial_mark_pause()) {
    2.31 -      // We also need to mark copied objects.
    2.32 -      scan_root_cl = &scan_mark_root_cl;
    2.33 -      scan_perm_cl = &scan_mark_perm_cl;
    2.34 +    {
    2.35 +      ResourceMark rm;
    2.36 +      HandleMark   hm;
    2.37 +
    2.38 +      ReferenceProcessor*             rp = _g1h->ref_processor_stw();
    2.39 +
    2.40 +      G1ParScanThreadState            pss(_g1h, worker_id);
    2.41 +      G1ParScanHeapEvacClosure        scan_evac_cl(_g1h, &pss, rp);
    2.42 +      G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp);
    2.43 +      G1ParScanPartialArrayClosure    partial_scan_cl(_g1h, &pss, rp);
    2.44 +
    2.45 +      pss.set_evac_closure(&scan_evac_cl);
    2.46 +      pss.set_evac_failure_closure(&evac_failure_cl);
    2.47 +      pss.set_partial_scan_closure(&partial_scan_cl);
    2.48 +
    2.49 +      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
    2.50 +      G1ParScanPermClosure           only_scan_perm_cl(_g1h, &pss, rp);
    2.51 +
    2.52 +      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
    2.53 +      G1ParScanAndMarkPermClosure    scan_mark_perm_cl(_g1h, &pss, rp);
    2.54 +
    2.55 +      OopClosure*                    scan_root_cl = &only_scan_root_cl;
    2.56 +      OopsInHeapRegionClosure*       scan_perm_cl = &only_scan_perm_cl;
    2.57 +
    2.58 +      if (_g1h->g1_policy()->during_initial_mark_pause()) {
    2.59 +        // We also need to mark copied objects.
    2.60 +        scan_root_cl = &scan_mark_root_cl;
    2.61 +        scan_perm_cl = &scan_mark_perm_cl;
    2.62 +      }
    2.63 +
    2.64 +      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
    2.65 +
    2.66 +      pss.start_strong_roots();
    2.67 +      _g1h->g1_process_strong_roots(/* not collecting perm */ false,
    2.68 +                                    SharedHeap::SO_AllClasses,
    2.69 +                                    scan_root_cl,
    2.70 +                                    &push_heap_rs_cl,
    2.71 +                                    scan_perm_cl,
    2.72 +                                    worker_id);
    2.73 +      pss.end_strong_roots();
    2.74 +
    2.75 +      {
    2.76 +        double start = os::elapsedTime();
    2.77 +        G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
    2.78 +        evac.do_void();
    2.79 +        double elapsed_ms = (os::elapsedTime()-start)*1000.0;
    2.80 +        double term_ms = pss.term_time()*1000.0;
    2.81 +        _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
    2.82 +        _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
    2.83 +      }
    2.84 +      _g1h->g1_policy()->record_thread_age_table(pss.age_table());
    2.85 +      _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
    2.86 +
    2.87 +      // Clean up any par-expanded rem sets.
    2.88 +      HeapRegionRemSet::par_cleanup();
    2.89 +
    2.90 +      if (ParallelGCVerbose) {
    2.91 +        MutexLocker x(stats_lock());
    2.92 +        pss.print_termination_stats(worker_id);
    2.93 +      }
    2.94 +
    2.95 +      assert(pss.refs()->is_empty(), "should be empty");
    2.96 +
    2.97 +      // Close the inner scope so that the ResourceMark and HandleMark
    2.98 +      // destructors are executed here and are included as part of the
    2.99 +      // "GC Worker Time".
   2.100      }
   2.101  
   2.102 -    G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
   2.103 -
   2.104 -    pss.start_strong_roots();
   2.105 -    _g1h->g1_process_strong_roots(/* not collecting perm */ false,
   2.106 -                                  SharedHeap::SO_AllClasses,
   2.107 -                                  scan_root_cl,
   2.108 -                                  &push_heap_rs_cl,
   2.109 -                                  scan_perm_cl,
   2.110 -                                  worker_id);
   2.111 -    pss.end_strong_roots();
   2.112 -
   2.113 -    {
   2.114 -      double start = os::elapsedTime();
   2.115 -      G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
   2.116 -      evac.do_void();
   2.117 -      double elapsed_ms = (os::elapsedTime()-start)*1000.0;
   2.118 -      double term_ms = pss.term_time()*1000.0;
   2.119 -      _g1h->g1_policy()->record_obj_copy_time(worker_id, elapsed_ms-term_ms);
   2.120 -      _g1h->g1_policy()->record_termination(worker_id, term_ms, pss.term_attempts());
   2.121 -    }
   2.122 -    _g1h->g1_policy()->record_thread_age_table(pss.age_table());
   2.123 -    _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
   2.124 -
   2.125 -    // Clean up any par-expanded rem sets.
   2.126 -    HeapRegionRemSet::par_cleanup();
   2.127 -
   2.128 -    if (ParallelGCVerbose) {
   2.129 -      MutexLocker x(stats_lock());
   2.130 -      pss.print_termination_stats(worker_id);
   2.131 -    }
   2.132 -
   2.133 -    assert(pss.refs()->is_empty(), "should be empty");
   2.134      double end_time_ms = os::elapsedTime() * 1000.0;
   2.135      _g1h->g1_policy()->record_gc_worker_end_time(worker_id, end_time_ms);
   2.136    }
   2.137 @@ -4743,6 +4750,67 @@
   2.138  
   2.139  // *** Common G1 Evacuation Stuff
   2.140  
   2.141 +// Closures that support the filtering of CodeBlobs scanned during
   2.142 +// external root scanning.
   2.143 +
   2.144 +// Closure applied to reference fields in code blobs (specifically nmethods)
   2.145 +// to determine whether an nmethod contains references that point into
   2.146 +// the collection set. Used as a predicate when walking code roots so
   2.147 +// that only nmethods that point into the collection set are added to the
   2.148 +// 'marked' list.
   2.149 +
   2.150 +class G1FilteredCodeBlobToOopClosure : public CodeBlobToOopClosure {
   2.151 +
   2.152 +  class G1PointsIntoCSOopClosure : public OopClosure {
   2.153 +    G1CollectedHeap* _g1;
   2.154 +    bool _points_into_cs;
   2.155 +  public:
   2.156 +    G1PointsIntoCSOopClosure(G1CollectedHeap* g1) :
   2.157 +      _g1(g1), _points_into_cs(false) { }
   2.158 +
   2.159 +    bool points_into_cs() const { return _points_into_cs; }
   2.160 +
   2.161 +    template <class T>
   2.162 +    void do_oop_nv(T* p) {
   2.163 +      if (!_points_into_cs) {
   2.164 +        T heap_oop = oopDesc::load_heap_oop(p);
   2.165 +        if (!oopDesc::is_null(heap_oop) &&
   2.166 +            _g1->in_cset_fast_test(oopDesc::decode_heap_oop_not_null(heap_oop))) {
   2.167 +          _points_into_cs = true;
   2.168 +        }
   2.169 +      }
   2.170 +    }
   2.171 +
   2.172 +    virtual void do_oop(oop* p)        { do_oop_nv(p); }
   2.173 +    virtual void do_oop(narrowOop* p)  { do_oop_nv(p); }
   2.174 +  };
   2.175 +
   2.176 +  G1CollectedHeap* _g1;
   2.177 +
   2.178 +public:
   2.179 +  G1FilteredCodeBlobToOopClosure(G1CollectedHeap* g1, OopClosure* cl) :
   2.180 +    CodeBlobToOopClosure(cl, true), _g1(g1) { }
   2.181 +
   2.182 +  virtual void do_code_blob(CodeBlob* cb) {
   2.183 +    nmethod* nm = cb->as_nmethod_or_null();
   2.184 +    if (nm != NULL && !(nm->test_oops_do_mark())) {
   2.185 +      G1PointsIntoCSOopClosure predicate_cl(_g1);
   2.186 +      nm->oops_do(&predicate_cl);
   2.187 +
   2.188 +      if (predicate_cl.points_into_cs()) {
   2.189 +        // At least one of the reference fields or the oop relocations
   2.190 +        // in the nmethod points into the collection set. We have to
   2.191 +        // 'mark' this nmethod.
   2.192 +        // Note: Revisit the following if CodeBlobToOopClosure::do_code_blob()
   2.193 +        // or MarkingCodeBlobClosure::do_code_blob() change.
   2.194 +        if (!nm->test_set_oops_do_mark()) {
   2.195 +          do_newly_marked_nmethod(nm);
   2.196 +        }
   2.197 +      }
   2.198 +    }
   2.199 +  }
   2.200 +};
   2.201 +
   2.202  // This method is run in a GC worker.
   2.203  
   2.204  void
   2.205 @@ -4764,7 +4832,7 @@
   2.206  
   2.207    // Walk the code cache w/o buffering, because StarTask cannot handle
   2.208    // unaligned oop locations.
   2.209 -  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, /*do_marking=*/ true);
   2.210 +  G1FilteredCodeBlobToOopClosure eager_scan_code_roots(this, scan_non_heap_roots);
   2.211  
   2.212    process_strong_roots(false, // no scoping; this is parallel code
   2.213                         collecting_perm_gen, so,
   2.214 @@ -5378,25 +5446,39 @@
   2.215    rem_set()->prepare_for_younger_refs_iterate(true);
   2.216  
   2.217    assert(dirty_card_queue_set().completed_buffers_num() == 0, "Should be empty");
   2.218 -  double start_par = os::elapsedTime();
   2.219 -
   2.220 -  if (G1CollectedHeap::use_parallel_gc_threads()) {
   2.221 -    // The individual threads will set their evac-failure closures.
   2.222 +  double start_par_time_sec = os::elapsedTime();
   2.223 +  double end_par_time_sec;
   2.224 +
   2.225 +  {
   2.226      StrongRootsScope srs(this);
   2.227 -    if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
   2.228 -    // These tasks use ShareHeap::_process_strong_tasks
   2.229 -    assert(UseDynamicNumberOfGCThreads ||
   2.230 -           workers()->active_workers() == workers()->total_workers(),
   2.231 -           "If not dynamic should be using all the  workers");
   2.232 -    workers()->run_task(&g1_par_task);
   2.233 -  } else {
   2.234 -    StrongRootsScope srs(this);
   2.235 -    g1_par_task.set_for_termination(n_workers);
   2.236 -    g1_par_task.work(0);
   2.237 -  }
   2.238 -
   2.239 -  double par_time = (os::elapsedTime() - start_par) * 1000.0;
   2.240 -  g1_policy()->record_par_time(par_time);
   2.241 +
   2.242 +    if (G1CollectedHeap::use_parallel_gc_threads()) {
   2.243 +      // The individual threads will set their evac-failure closures.
   2.244 +      if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
   2.245 +      // These tasks use ShareHeap::_process_strong_tasks
   2.246 +      assert(UseDynamicNumberOfGCThreads ||
   2.247 +             workers()->active_workers() == workers()->total_workers(),
   2.248 +             "If not dynamic should be using all the  workers");
   2.249 +      workers()->run_task(&g1_par_task);
   2.250 +    } else {
   2.251 +      g1_par_task.set_for_termination(n_workers);
   2.252 +      g1_par_task.work(0);
   2.253 +    }
   2.254 +    end_par_time_sec = os::elapsedTime();
   2.255 +
   2.256 +    // Closing the inner scope will execute the destructor
   2.257 +    // for the StrongRootsScope object. We record the current
   2.258 +    // elapsed time before closing the scope so that time
   2.259 +    // taken for the SRS destructor is NOT included in the
   2.260 +    // reported parallel time.
   2.261 +  }
   2.262 +
   2.263 +  double par_time_ms = (end_par_time_sec - start_par_time_sec) * 1000.0;
   2.264 +  g1_policy()->record_par_time(par_time_ms);
   2.265 +
   2.266 +  double code_root_fixup_time_ms =
   2.267 +        (os::elapsedTime() - end_par_time_sec) * 1000.0;
   2.268 +  g1_policy()->record_code_root_fixup_time(code_root_fixup_time_ms);
   2.269  
   2.270    set_par_threads(0);
   2.271  
     3.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Mar 27 10:29:59 2012 +0200
     3.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Tue Mar 13 11:05:32 2012 -0700
     3.3 @@ -1261,6 +1261,9 @@
     3.4      other_time_ms -= known_time;
     3.5    }
     3.6  
     3.7 +  // Now subtract the time taken to fix up roots in generated code
     3.8 +  other_time_ms -= _cur_collection_code_root_fixup_time_ms;
     3.9 +
    3.10    // Subtract the time taken to clean the card table from the
    3.11    // current value of "other time"
    3.12    other_time_ms -= _cur_clear_ct_time_ms;
    3.13 @@ -1401,10 +1404,10 @@
    3.14        print_par_stats(2, "Object Copy", _par_last_obj_copy_times_ms);
    3.15        print_par_stats(2, "Termination", _par_last_termination_times_ms);
    3.16        print_par_sizes(3, "Termination Attempts", _par_last_termination_attempts);
    3.17 -      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
    3.18  
    3.19        for (int i = 0; i < _parallel_gc_threads; i++) {
    3.20 -        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
    3.21 +        _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] -
    3.22 +                                          _par_last_gc_worker_start_times_ms[i];
    3.23  
    3.24          double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
    3.25                                     _par_last_satb_filtering_times_ms[i] +
    3.26 @@ -1413,10 +1416,13 @@
    3.27                                     _par_last_obj_copy_times_ms[i] +
    3.28                                     _par_last_termination_times_ms[i];
    3.29  
    3.30 -        _par_last_gc_worker_other_times_ms[i] = _cur_collection_par_time_ms - worker_known_time;
    3.31 +        _par_last_gc_worker_other_times_ms[i] = _par_last_gc_worker_times_ms[i] -
    3.32 +                                                worker_known_time;
    3.33        }
    3.34 -      print_par_stats(2, "GC Worker", _par_last_gc_worker_times_ms);
    3.35 +
    3.36        print_par_stats(2, "GC Worker Other", _par_last_gc_worker_other_times_ms);
    3.37 +      print_par_stats(2, "GC Worker Total", _par_last_gc_worker_times_ms);
    3.38 +      print_par_stats(2, "GC Worker End", _par_last_gc_worker_end_times_ms);
    3.39      } else {
    3.40        print_stats(1, "Ext Root Scanning", ext_root_scan_time);
    3.41        if (print_marking_info) {
    3.42 @@ -1427,6 +1433,7 @@
    3.43        print_stats(1, "Scan RS", scan_rs_time);
    3.44        print_stats(1, "Object Copying", obj_copy_time);
    3.45      }
    3.46 +    print_stats(1, "Code Root Fixup", _cur_collection_code_root_fixup_time_ms);
    3.47      if (print_marking_info) {
    3.48        print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
    3.49      }
     4.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 27 10:29:59 2012 +0200
     4.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Tue Mar 13 11:05:32 2012 -0700
     4.3 @@ -179,6 +179,9 @@
     4.4    size_t _cur_collection_pause_used_at_start_bytes;
     4.5    size_t _cur_collection_pause_used_regions_at_start;
     4.6    double _cur_collection_par_time_ms;
     4.7 +
     4.8 +  double _cur_collection_code_root_fixup_time_ms;
     4.9 +
    4.10    double _cur_satb_drain_time_ms;
    4.11    double _cur_clear_ct_time_ms;
    4.12    double _cur_ref_proc_time_ms;
    4.13 @@ -226,8 +229,8 @@
    4.14    double* _par_last_gc_worker_times_ms;
    4.15  
    4.16    // Each workers 'other' time i.e. the elapsed time of the parallel
    4.17 -  // phase of the pause minus the sum of the individual sub-phase
    4.18 -  // times for a given worker thread.
    4.19 +  // code executed by a worker minus the sum of the individual sub-phase
    4.20 +  // times for that worker thread.
    4.21    double* _par_last_gc_worker_other_times_ms;
    4.22  
    4.23    // indicates whether we are in young or mixed GC mode
    4.24 @@ -897,6 +900,10 @@
    4.25      _cur_collection_par_time_ms = ms;
    4.26    }
    4.27  
    4.28 +  void record_code_root_fixup_time(double ms) {
    4.29 +    _cur_collection_code_root_fixup_time_ms = ms;
    4.30 +  }
    4.31 +
    4.32    void record_aux_start_time(int i) {
    4.33      guarantee(i < _aux_num, "should be within range");
    4.34      _cur_aux_start_times_ms[i] = os::elapsedTime() * 1000.0;

mercurial