src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp

changeset 2188
8b10f48633dc
parent 2132
179464550c7d
child 2191
894b1d7c7e01
     1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Sep 16 13:45:55 2010 -0700
     1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Sep 20 14:38:38 2010 -0700
     1.3 @@ -195,7 +195,7 @@
     1.4             "Offset of FreeChunk::_prev within FreeChunk must match"
     1.5             "  that of OopDesc::_klass within OopDesc");
     1.6    )
     1.7 -  if (ParallelGCThreads > 0) {
     1.8 +  if (CollectedHeap::use_parallel_gc_threads()) {
     1.9      typedef CMSParGCThreadState* CMSParGCThreadStatePtr;
    1.10      _par_gc_thread_states =
    1.11        NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads);
    1.12 @@ -616,7 +616,7 @@
    1.13    }
    1.14  
    1.15    // Support for multi-threaded concurrent phases
    1.16 -  if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) {
    1.17 +  if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) {
    1.18      if (FLAG_IS_DEFAULT(ConcGCThreads)) {
    1.19        // just for now
    1.20        FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4);
    1.21 @@ -628,6 +628,8 @@
    1.22          warning("GC/CMS: _conc_workers allocation failure: "
    1.23                "forcing -CMSConcurrentMTEnabled");
    1.24          CMSConcurrentMTEnabled = false;
    1.25 +      } else {
    1.26 +        _conc_workers->initialize_workers();
    1.27        }
    1.28      } else {
    1.29        CMSConcurrentMTEnabled = false;
    1.30 @@ -936,7 +938,7 @@
    1.31    // along with all the other pointers into the heap but
    1.32    // compaction is expected to be a rare event with
    1.33    // a heap using cms so don't do it without seeing the need.
    1.34 -  if (ParallelGCThreads > 0) {
    1.35 +  if (CollectedHeap::use_parallel_gc_threads()) {
    1.36      for (uint i = 0; i < ParallelGCThreads; i++) {
    1.37        _par_gc_thread_states[i]->promo.reset();
    1.38      }
    1.39 @@ -2630,7 +2632,8 @@
    1.40    // Should call gc_prologue_work() for all cms gens we are responsible for
    1.41    bool registerClosure =    _collectorState >= Marking
    1.42                           && _collectorState < Sweeping;
    1.43 -  ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar
    1.44 +  ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ?
    1.45 +                                               &_modUnionClosurePar
    1.46                                                 : &_modUnionClosure;
    1.47    _cmsGen->gc_prologue_work(full, registerClosure, muc);
    1.48    _permGen->gc_prologue_work(full, registerClosure, muc);
    1.49 @@ -2731,7 +2734,7 @@
    1.50    collector()->gc_epilogue(full);
    1.51  
    1.52    // Also reset promotion tracking in par gc thread states.
    1.53 -  if (ParallelGCThreads > 0) {
    1.54 +  if (CollectedHeap::use_parallel_gc_threads()) {
    1.55      for (uint i = 0; i < ParallelGCThreads; i++) {
    1.56        _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
    1.57      }
    1.58 @@ -3731,7 +3734,6 @@
    1.59  // MT Concurrent Marking Task
    1.60  class CMSConcMarkingTask: public YieldingFlexibleGangTask {
    1.61    CMSCollector* _collector;
    1.62 -  YieldingFlexibleWorkGang* _workers;        // the whole gang
    1.63    int           _n_workers;                  // requested/desired # workers
    1.64    bool          _asynch;
    1.65    bool          _result;
    1.66 @@ -3751,21 +3753,19 @@
    1.67    CMSConcMarkingTask(CMSCollector* collector,
    1.68                   CompactibleFreeListSpace* cms_space,
    1.69                   CompactibleFreeListSpace* perm_space,
    1.70 -                 bool asynch, int n_workers,
    1.71 +                 bool asynch,
    1.72                   YieldingFlexibleWorkGang* workers,
    1.73                   OopTaskQueueSet* task_queues):
    1.74      YieldingFlexibleGangTask("Concurrent marking done multi-threaded"),
    1.75      _collector(collector),
    1.76      _cms_space(cms_space),
    1.77      _perm_space(perm_space),
    1.78 -    _asynch(asynch), _n_workers(n_workers), _result(true),
    1.79 -    _workers(workers), _task_queues(task_queues),
    1.80 -    _term(n_workers, task_queues, _collector, asynch),
    1.81 +    _asynch(asynch), _n_workers(0), _result(true),
    1.82 +    _task_queues(task_queues),
    1.83 +    _term(_n_workers, task_queues, _collector, asynch),
    1.84      _bit_map_lock(collector->bitMapLock())
    1.85    {
    1.86 -    assert(n_workers <= workers->total_workers(),
    1.87 -           "Else termination won't work correctly today"); // XXX FIX ME!
    1.88 -    _requested_size = n_workers;
    1.89 +    _requested_size = _n_workers;
    1.90      _term.set_task(this);
    1.91      assert(_cms_space->bottom() < _perm_space->bottom(),
    1.92             "Finger incorrectly initialized below");
    1.93 @@ -3781,6 +3781,10 @@
    1.94  
    1.95    CMSConcMarkingTerminator* terminator() { return &_term; }
    1.96  
    1.97 +  virtual void set_for_termination(int active_workers) {
    1.98 +    terminator()->reset_for_reuse(active_workers);
    1.99 +  }
   1.100 +
   1.101    void work(int i);
   1.102  
   1.103    virtual void coordinator_yield();  // stuff done by coordinator
   1.104 @@ -4220,9 +4224,12 @@
   1.105    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
   1.106    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
   1.107  
   1.108 -  CMSConcMarkingTask tsk(this, cms_space, perm_space,
   1.109 -                         asynch, num_workers /* number requested XXX */,
   1.110 -                         conc_workers(), task_queues());
   1.111 +  CMSConcMarkingTask tsk(this,
   1.112 +                         cms_space,
   1.113 +                         perm_space,
   1.114 +                         asynch,
   1.115 +                         conc_workers(),
   1.116 +                         task_queues());
   1.117  
   1.118    // Since the actual number of workers we get may be different
   1.119    // from the number we requested above, do we need to do anything different
   1.120 @@ -4326,6 +4333,10 @@
   1.121    verify_overflow_empty();
   1.122    _abort_preclean = false;
   1.123    if (CMSPrecleaningEnabled) {
   1.124 +    // Precleaning is currently not MT but the reference processor
   1.125 +    // may be set for MT.  Disable it temporarily here.
   1.126 +    ReferenceProcessor* rp = ref_processor();
   1.127 +    ReferenceProcessorMTProcMutator z(rp, false);
   1.128      _eden_chunk_index = 0;
   1.129      size_t used = get_eden_used();
   1.130      size_t capacity = get_eden_capacity();
   1.131 @@ -4918,7 +4929,7 @@
   1.132        // dirtied since the first checkpoint in this GC cycle and prior to
   1.133        // the most recent young generation GC, minus those cleaned up by the
   1.134        // concurrent precleaning.
   1.135 -      if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) {
   1.136 +      if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) {
   1.137          TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty);
   1.138          do_remark_parallel();
   1.139        } else {
   1.140 @@ -5012,7 +5023,6 @@
   1.141  // Parallel remark task
   1.142  class CMSParRemarkTask: public AbstractGangTask {
   1.143    CMSCollector* _collector;
   1.144 -  WorkGang*     _workers;
   1.145    int           _n_workers;
   1.146    CompactibleFreeListSpace* _cms_space;
   1.147    CompactibleFreeListSpace* _perm_space;
   1.148 @@ -5025,21 +5035,21 @@
   1.149    CMSParRemarkTask(CMSCollector* collector,
   1.150                     CompactibleFreeListSpace* cms_space,
   1.151                     CompactibleFreeListSpace* perm_space,
   1.152 -                   int n_workers, WorkGang* workers,
   1.153 +                   int n_workers, FlexibleWorkGang* workers,
   1.154                     OopTaskQueueSet* task_queues):
   1.155      AbstractGangTask("Rescan roots and grey objects in parallel"),
   1.156      _collector(collector),
   1.157      _cms_space(cms_space), _perm_space(perm_space),
   1.158      _n_workers(n_workers),
   1.159 -    _workers(workers),
   1.160      _task_queues(task_queues),
   1.161 -    _term(workers->total_workers(), task_queues) { }
   1.162 +    _term(n_workers, task_queues) { }
   1.163  
   1.164    OopTaskQueueSet* task_queues() { return _task_queues; }
   1.165  
   1.166    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
   1.167  
   1.168    ParallelTaskTerminator* terminator() { return &_term; }
   1.169 +  int n_workers() { return _n_workers; }
   1.170  
   1.171    void work(int i);
   1.172  
   1.173 @@ -5057,6 +5067,11 @@
   1.174    void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed);
   1.175  };
   1.176  
   1.177 +// work_queue(i) is passed to the closure
   1.178 +// Par_MarkRefsIntoAndScanClosure.  The "i" parameter
   1.179 +// also is passed to do_dirty_card_rescan_tasks() and to
   1.180 +// do_work_steal() to select the i-th task_queue.
   1.181 +
   1.182  void CMSParRemarkTask::work(int i) {
   1.183    elapsedTimer _timer;
   1.184    ResourceMark rm;
   1.185 @@ -5128,6 +5143,7 @@
   1.186  
   1.187    // Do the rescan tasks for each of the two spaces
   1.188    // (cms_space and perm_space) in turn.
   1.189 +  // "i" is passed to select the "i-th" task_queue
   1.190    do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl);
   1.191    do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl);
   1.192    _timer.stop();
   1.193 @@ -5150,6 +5166,7 @@
   1.194    }
   1.195  }
   1.196  
   1.197 +// Note that parameter "i" is not used.
   1.198  void
   1.199  CMSParRemarkTask::do_young_space_rescan(int i,
   1.200    Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space,
   1.201 @@ -5309,8 +5326,13 @@
   1.202      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
   1.203                                           (size_t)ParGCDesiredObjsFromOverflowList);
   1.204      // Now check if there's any work in the overflow list
   1.205 +    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
   1.206 +    // only affects the number of attempts made to get work from the
   1.207 +    // overflow list and does not affect the number of workers.  Just
   1.208 +    // pass ParallelGCThreads so this behavior is unchanged.
   1.209      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
   1.210 -                                                work_q)) {
   1.211 +                                                work_q,
   1.212 +                                                ParallelGCThreads)) {
   1.213        // found something in global overflow list;
   1.214        // not yet ready to go stealing work from others.
   1.215        // We'd like to assert(work_q->size() != 0, ...)
   1.216 @@ -5367,11 +5389,12 @@
   1.217  // Merge the per-thread plab arrays into the global survivor chunk
   1.218  // array which will provide the partitioning of the survivor space
   1.219  // for CMS rescan.
   1.220 -void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) {
   1.221 +void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv,
   1.222 +                                              int no_of_gc_threads) {
   1.223    assert(_survivor_plab_array  != NULL, "Error");
   1.224    assert(_survivor_chunk_array != NULL, "Error");
   1.225    assert(_collectorState == FinalMarking, "Error");
   1.226 -  for (uint j = 0; j < ParallelGCThreads; j++) {
   1.227 +  for (int j = 0; j < no_of_gc_threads; j++) {
   1.228      _cursor[j] = 0;
   1.229    }
   1.230    HeapWord* top = surv->top();
   1.231 @@ -5379,7 +5402,7 @@
   1.232    for (i = 0; i < _survivor_chunk_capacity; i++) {  // all sca entries
   1.233      HeapWord* min_val = top;          // Higher than any PLAB address
   1.234      uint      min_tid = 0;            // position of min_val this round
   1.235 -    for (uint j = 0; j < ParallelGCThreads; j++) {
   1.236 +    for (int j = 0; j < no_of_gc_threads; j++) {
   1.237        ChunkArray* cur_sca = &_survivor_plab_array[j];
   1.238        if (_cursor[j] == cur_sca->end()) {
   1.239          continue;
   1.240 @@ -5413,7 +5436,7 @@
   1.241    // Verify that we used up all the recorded entries
   1.242    #ifdef ASSERT
   1.243      size_t total = 0;
   1.244 -    for (uint j = 0; j < ParallelGCThreads; j++) {
   1.245 +    for (int j = 0; j < no_of_gc_threads; j++) {
   1.246        assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant");
   1.247        total += _cursor[j];
   1.248      }
   1.249 @@ -5448,13 +5471,15 @@
   1.250      // Each valid entry in [0, _eden_chunk_index) represents a task.
   1.251      size_t n_tasks = _eden_chunk_index + 1;
   1.252      assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error");
   1.253 -    pst->set_par_threads(n_threads);
   1.254 +    // Sets the condition for completion of the subtask (how many threads
   1.255 +    // need to finish in order to be done).
   1.256 +    pst->set_n_threads(n_threads);
   1.257      pst->set_n_tasks((int)n_tasks);
   1.258    }
   1.259  
   1.260    // Merge the survivor plab arrays into _survivor_chunk_array
   1.261    if (_survivor_plab_array != NULL) {
   1.262 -    merge_survivor_plab_arrays(dng->from());
   1.263 +    merge_survivor_plab_arrays(dng->from(), n_threads);
   1.264    } else {
   1.265      assert(_survivor_chunk_index == 0, "Error");
   1.266    }
   1.267 @@ -5463,7 +5488,9 @@
   1.268    {
   1.269      SequentialSubTasksDone* pst = dng->to()->par_seq_tasks();
   1.270      assert(!pst->valid(), "Clobbering existing data?");
   1.271 -    pst->set_par_threads(n_threads);
   1.272 +    // Sets the condition for completion of the subtask (how many threads
   1.273 +    // need to finish in order to be done).
   1.274 +    pst->set_n_threads(n_threads);
   1.275      pst->set_n_tasks(1);
   1.276      assert(pst->valid(), "Error");
   1.277    }
   1.278 @@ -5474,7 +5501,9 @@
   1.279      assert(!pst->valid(), "Clobbering existing data?");
   1.280      size_t n_tasks = _survivor_chunk_index + 1;
   1.281      assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error");
   1.282 -    pst->set_par_threads(n_threads);
   1.283 +    // Sets the condition for completion of the subtask (how many threads
   1.284 +    // need to finish in order to be done).
   1.285 +    pst->set_n_threads(n_threads);
   1.286      pst->set_n_tasks((int)n_tasks);
   1.287      assert(pst->valid(), "Error");
   1.288    }
   1.289 @@ -5483,7 +5512,7 @@
   1.290  // Parallel version of remark
   1.291  void CMSCollector::do_remark_parallel() {
   1.292    GenCollectedHeap* gch = GenCollectedHeap::heap();
   1.293 -  WorkGang* workers = gch->workers();
   1.294 +  FlexibleWorkGang* workers = gch->workers();
   1.295    assert(workers != NULL, "Need parallel worker threads.");
   1.296    int n_workers = workers->total_workers();
   1.297    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
   1.298 @@ -5636,13 +5665,11 @@
   1.299  ////////////////////////////////////////////////////////
   1.300  // Parallel Reference Processing Task Proxy Class
   1.301  ////////////////////////////////////////////////////////
   1.302 -class CMSRefProcTaskProxy: public AbstractGangTask {
   1.303 +class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues {
   1.304    typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
   1.305    CMSCollector*          _collector;
   1.306    CMSBitMap*             _mark_bit_map;
   1.307    const MemRegion        _span;
   1.308 -  OopTaskQueueSet*       _task_queues;
   1.309 -  ParallelTaskTerminator _term;
   1.310    ProcessTask&           _task;
   1.311  
   1.312  public:
   1.313 @@ -5650,24 +5677,21 @@
   1.314                        CMSCollector*    collector,
   1.315                        const MemRegion& span,
   1.316                        CMSBitMap*       mark_bit_map,
   1.317 -                      int              total_workers,
   1.318 +                      AbstractWorkGang* workers,
   1.319                        OopTaskQueueSet* task_queues):
   1.320 -    AbstractGangTask("Process referents by policy in parallel"),
   1.321 +    AbstractGangTaskWOopQueues("Process referents by policy in parallel",
   1.322 +      task_queues),
   1.323      _task(task),
   1.324 -    _collector(collector), _span(span), _mark_bit_map(mark_bit_map),
   1.325 -    _task_queues(task_queues),
   1.326 -    _term(total_workers, task_queues)
   1.327 +    _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
   1.328      {
   1.329        assert(_collector->_span.equals(_span) && !_span.is_empty(),
   1.330               "Inconsistency in _span");
   1.331      }
   1.332  
   1.333 -  OopTaskQueueSet* task_queues() { return _task_queues; }
   1.334 +  OopTaskQueueSet* task_queues() { return queues(); }
   1.335  
   1.336    OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); }
   1.337  
   1.338 -  ParallelTaskTerminator* terminator() { return &_term; }
   1.339 -
   1.340    void do_work_steal(int i,
   1.341                       CMSParDrainMarkingStackClosure* drain,
   1.342                       CMSParKeepAliveClosure* keep_alive,
   1.343 @@ -5739,8 +5763,13 @@
   1.344      size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4,
   1.345                                           (size_t)ParGCDesiredObjsFromOverflowList);
   1.346      // Now check if there's any work in the overflow list
   1.347 +    // Passing ParallelGCThreads as the third parameter, no_of_gc_threads,
   1.348 +    // only affects the number of attempts made to get work from the
   1.349 +    // overflow list and does not affect the number of workers.  Just
   1.350 +    // pass ParallelGCThreads so this behavior is unchanged.
   1.351      if (_collector->par_take_from_overflow_list(num_from_overflow_list,
   1.352 -                                                work_q)) {
   1.353 +                                                work_q,
   1.354 +                                                ParallelGCThreads)) {
   1.355        // Found something in global overflow list;
   1.356        // not yet ready to go stealing work from others.
   1.357        // We'd like to assert(work_q->size() != 0, ...)
   1.358 @@ -5773,13 +5802,12 @@
   1.359  void CMSRefProcTaskExecutor::execute(ProcessTask& task)
   1.360  {
   1.361    GenCollectedHeap* gch = GenCollectedHeap::heap();
   1.362 -  WorkGang* workers = gch->workers();
   1.363 +  FlexibleWorkGang* workers = gch->workers();
   1.364    assert(workers != NULL, "Need parallel worker threads.");
   1.365 -  int n_workers = workers->total_workers();
   1.366    CMSRefProcTaskProxy rp_task(task, &_collector,
   1.367                                _collector.ref_processor()->span(),
   1.368                                _collector.markBitMap(),
   1.369 -                              n_workers, _collector.task_queues());
   1.370 +                              workers, _collector.task_queues());
   1.371    workers->run_task(&rp_task);
   1.372  }
   1.373  
   1.374 @@ -5787,7 +5815,7 @@
   1.375  {
   1.376  
   1.377    GenCollectedHeap* gch = GenCollectedHeap::heap();
   1.378 -  WorkGang* workers = gch->workers();
   1.379 +  FlexibleWorkGang* workers = gch->workers();
   1.380    assert(workers != NULL, "Need parallel worker threads.");
   1.381    CMSRefEnqueueTaskProxy enq_task(task);
   1.382    workers->run_task(&enq_task);
   1.383 @@ -5814,6 +5842,14 @@
   1.384    {
   1.385      TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty);
   1.386      if (rp->processing_is_mt()) {
   1.387 +      // Set the degree of MT here.  If the discovery is done MT, there
   1.388 +      // may have been a different number of threads doing the discovery
   1.389 +      // and a different number of discovered lists may have Ref objects.
   1.390 +      // That is OK as long as the Reference lists are balanced (see
   1.391 +      // balance_all_queues() and balance_queues()).
   1.392 +
   1.393 +
   1.394 +      rp->set_mt_degree(ParallelGCThreads);
   1.395        CMSRefProcTaskExecutor task_executor(*this);
   1.396        rp->process_discovered_references(&_is_alive_closure,
   1.397                                          &cmsKeepAliveClosure,
   1.398 @@ -5874,6 +5910,7 @@
   1.399  
   1.400    rp->set_enqueuing_is_done(true);
   1.401    if (rp->processing_is_mt()) {
   1.402 +    rp->balance_all_queues();
   1.403      CMSRefProcTaskExecutor task_executor(*this);
   1.404      rp->enqueue_discovered_references(&task_executor);
   1.405    } else {
   1.406 @@ -8708,7 +8745,8 @@
   1.407  // similar changes might be needed.
   1.408  // CR 6797058 has been filed to consolidate the common code.
   1.409  bool CMSCollector::par_take_from_overflow_list(size_t num,
   1.410 -                                               OopTaskQueue* work_q) {
   1.411 +                                               OopTaskQueue* work_q,
   1.412 +                                               int no_of_gc_threads) {
   1.413    assert(work_q->size() == 0, "First empty local work queue");
   1.414    assert(num < work_q->max_elems(), "Can't bite more than we can chew");
   1.415    if (_overflow_list == NULL) {
   1.416 @@ -8717,7 +8755,9 @@
   1.417    // Grab the entire list; we'll put back a suffix
   1.418    oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list);
   1.419    Thread* tid = Thread::current();
   1.420 -  size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads;
   1.421 +  // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was
   1.422 +  // set to ParallelGCThreads.
   1.423 +  size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads;
   1.424    size_t sleep_time_millis = MAX2((size_t)1, num/100);
   1.425    // If the list is busy, we spin for a short while,
   1.426    // sleeping between attempts to get the list.
   1.427 @@ -9256,4 +9296,3 @@
   1.428               true /* recordGCEndTime */,
   1.429               true /* countCollection */ );
   1.430  }
   1.431 -

mercurial