Merge

Fri, 02 Dec 2011 08:52:53 -0500

author
tonyp
date
Fri, 02 Dec 2011 08:52:53 -0500
changeset 3299
1bbf5b6fb7b0
parent 3288
81a08cd7f6a1
parent 3298
7913e93dca52
child 3300
6de8c9ba5907

Merge

src/share/vm/runtime/globals.hpp file | annotate | diff | comparison | revisions
     1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Dec 01 13:42:41 2011 -0500
     1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Dec 02 08:52:53 2011 -0500
     1.3 @@ -668,12 +668,16 @@
     1.4  
     1.5  // We de-virtualize the block-related calls below, since we know that our
     1.6  // space is a CompactibleFreeListSpace.
     1.7 +
     1.8  #define FreeListSpace_DCTOC__walk_mem_region_with_cl_DEFN(ClosureType)          \
     1.9  void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr,                 \
    1.10                                                   HeapWord* bottom,              \
    1.11                                                   HeapWord* top,                 \
    1.12                                                   ClosureType* cl) {             \
    1.13 -   if (SharedHeap::heap()->n_par_threads() > 0) {                               \
    1.14 +   bool is_par = SharedHeap::heap()->n_par_threads() > 0;                       \
    1.15 +   if (is_par) {                                                                \
    1.16 +     assert(SharedHeap::heap()->n_par_threads() ==                              \
    1.17 +            SharedHeap::heap()->workers()->active_workers(), "Mismatch");       \
    1.18       walk_mem_region_with_cl_par(mr, bottom, top, cl);                          \
    1.19     } else {                                                                     \
    1.20       walk_mem_region_with_cl_nopar(mr, bottom, top, cl);                        \
    1.21 @@ -1925,6 +1929,9 @@
    1.22    if (rem_size < SmallForDictionary) {
    1.23      bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
    1.24      if (is_par) _indexedFreeListParLocks[rem_size]->lock();
    1.25 +    assert(!is_par ||
    1.26 +           (SharedHeap::heap()->n_par_threads() ==
    1.27 +            SharedHeap::heap()->workers()->active_workers()), "Mismatch");
    1.28      returnChunkToFreeList(ffc);
    1.29      split(size, rem_size);
    1.30      if (is_par) _indexedFreeListParLocks[rem_size]->unlock();
     2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Dec 01 13:42:41 2011 -0500
     2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Dec 02 08:52:53 2011 -0500
     2.3 @@ -3582,16 +3582,6 @@
     2.4         " or no bits are set in the gc_prologue before the start of the next "
     2.5         "subsequent marking phase.");
     2.6  
     2.7 -  // Temporarily disabled, since pre/post-consumption closures don't
     2.8 -  // care about precleaned cards
     2.9 -  #if 0
    2.10 -  {
    2.11 -    MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(),
    2.12 -                             (HeapWord*)_virtual_space.high());
    2.13 -    _ct->ct_bs()->preclean_dirty_cards(mr);
    2.14 -  }
    2.15 -  #endif
    2.16 -
    2.17    // Save the end of the used_region of the constituent generations
    2.18    // to be used to limit the extent of sweep in each generation.
    2.19    save_sweep_limits();
    2.20 @@ -4244,9 +4234,11 @@
    2.21  
    2.22  bool CMSCollector::do_marking_mt(bool asynch) {
    2.23    assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition");
    2.24 -  // In the future this would be determined ergonomically, based
    2.25 -  // on #cpu's, # active mutator threads (and load), and mutation rate.
    2.26 -  int num_workers = ConcGCThreads;
    2.27 +  int num_workers = AdaptiveSizePolicy::calc_active_conc_workers(
    2.28 +                                       conc_workers()->total_workers(),
    2.29 +                                       conc_workers()->active_workers(),
    2.30 +                                       Threads::number_of_non_daemon_threads());
    2.31 +  conc_workers()->set_active_workers(num_workers);
    2.32  
    2.33    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
    2.34    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
    2.35 @@ -5062,6 +5054,8 @@
    2.36    ParallelTaskTerminator _term;
    2.37  
    2.38   public:
    2.39 +  // A value of 0 passed to n_workers will cause the number of
    2.40 +  // workers to be taken from the active workers in the work gang.
    2.41    CMSParRemarkTask(CMSCollector* collector,
    2.42                     CompactibleFreeListSpace* cms_space,
    2.43                     CompactibleFreeListSpace* perm_space,
    2.44 @@ -5544,7 +5538,15 @@
    2.45    GenCollectedHeap* gch = GenCollectedHeap::heap();
    2.46    FlexibleWorkGang* workers = gch->workers();
    2.47    assert(workers != NULL, "Need parallel worker threads.");
    2.48 -  int n_workers = workers->total_workers();
    2.49 +  // Choose to use the number of GC workers most recently set
    2.50 +  // into "active_workers".  If active_workers is not set, set it
    2.51 +  // to ParallelGCThreads.
    2.52 +  int n_workers = workers->active_workers();
    2.53 +  if (n_workers == 0) {
    2.54 +    assert(n_workers > 0, "Should have been set during scavenge");
    2.55 +    n_workers = ParallelGCThreads;
    2.56 +    workers->set_active_workers(n_workers);
    2.57 +  }
    2.58    CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
    2.59    CompactibleFreeListSpace* perm_space = _permGen->cmsSpace();
    2.60  
    2.61 @@ -5884,8 +5886,17 @@
    2.62        // and a different number of discovered lists may have Ref objects.
    2.63        // That is OK as long as the Reference lists are balanced (see
    2.64        // balance_all_queues() and balance_queues()).
    2.65 -
    2.66 -      rp->set_active_mt_degree(ParallelGCThreads);
    2.67 +      GenCollectedHeap* gch = GenCollectedHeap::heap();
    2.68 +      int active_workers = ParallelGCThreads;
    2.69 +      FlexibleWorkGang* workers = gch->workers();
    2.70 +      if (workers != NULL) {
    2.71 +        active_workers = workers->active_workers();
    2.72 +        // The expectation is that active_workers will have already
    2.73 +        // been set to a reasonable value.  If it has not been set,
    2.74 +        // investigate.
    2.75 +        assert(active_workers > 0, "Should have been set during scavenge");
    2.76 +      }
    2.77 +      rp->set_active_mt_degree(active_workers);
    2.78        CMSRefProcTaskExecutor task_executor(*this);
    2.79        rp->process_discovered_references(&_is_alive_closure,
    2.80                                          &cmsKeepAliveClosure,
     3.1 --- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Thu Dec 01 13:42:41 2011 -0500
     3.2 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp	Fri Dec 02 08:52:53 2011 -0500
     3.3 @@ -255,7 +255,18 @@
     3.4  CollectionSetChooser::
     3.5  prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) {
     3.6    _first_par_unreserved_idx = 0;
     3.7 -  size_t max_waste = ParallelGCThreads * chunkSize;
     3.8 +  int n_threads = ParallelGCThreads;
     3.9 +  if (UseDynamicNumberOfGCThreads) {
    3.10 +    assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
    3.11 +      "Should have been set earlier");
    3.12 +    // This is defensive code. As the assertion above says, the number
    3.13 +    // of active threads should be > 0, but in case there is some path
    3.14 +    // or some improperly initialized variable with leads to no
    3.15 +    // active threads, protect against that in a product build.
    3.16 +    n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
    3.17 +                     1);
    3.18 +  }
    3.19 +  size_t max_waste = n_threads * chunkSize;
    3.20    // it should be aligned with respect to chunkSize
    3.21    size_t aligned_n_regions =
    3.22                       (n_regions + (chunkSize - 1)) / chunkSize * chunkSize;
    3.23 @@ -265,6 +276,11 @@
    3.24  
    3.25  jint
    3.26  CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) {
    3.27 +  // Don't do this assert because this can be called at a point
    3.28 +  // where the loop up stream will not execute again but might
    3.29 +  // try to claim more chunks (loop test has not been done yet).
    3.30 +  // assert(_markedRegions.length() > _first_par_unreserved_idx,
    3.31 +  //  "Striding beyond the marked regions");
    3.32    jint res = Atomic::add(n_regions, &_first_par_unreserved_idx);
    3.33    assert(_markedRegions.length() > res + n_regions - 1,
    3.34           "Should already have been expanded");
     4.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Dec 01 13:42:41 2011 -0500
     4.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Dec 02 08:52:53 2011 -0500
     4.3 @@ -44,7 +44,7 @@
     4.4  //
     4.5  // CMS Bit Map Wrapper
     4.6  
     4.7 -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
     4.8 +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
     4.9    _bm((uintptr_t*)NULL,0),
    4.10    _shifter(shifter) {
    4.11    _bmStartWord = (HeapWord*)(rs.base());
    4.12 @@ -458,12 +458,17 @@
    4.13  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
    4.14  #endif // _MSC_VER
    4.15  
    4.16 +size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
    4.17 +  return MAX2((n_par_threads + 2) / 4, (size_t)1);
    4.18 +}
    4.19 +
    4.20  ConcurrentMark::ConcurrentMark(ReservedSpace rs,
    4.21                                 int max_regions) :
    4.22    _markBitMap1(rs, MinObjAlignment - 1),
    4.23    _markBitMap2(rs, MinObjAlignment - 1),
    4.24  
    4.25    _parallel_marking_threads(0),
    4.26 +  _max_parallel_marking_threads(0),
    4.27    _sleep_factor(0.0),
    4.28    _marking_task_overhead(1.0),
    4.29    _cleanup_sleep_factor(0.0),
    4.30 @@ -554,15 +559,17 @@
    4.31    if (ParallelGCThreads == 0) {
    4.32      // if we are not running with any parallel GC threads we will not
    4.33      // spawn any marking threads either
    4.34 -    _parallel_marking_threads =   0;
    4.35 -    _sleep_factor             = 0.0;
    4.36 -    _marking_task_overhead    = 1.0;
    4.37 +    _parallel_marking_threads =       0;
    4.38 +    _max_parallel_marking_threads =   0;
    4.39 +    _sleep_factor             =     0.0;
    4.40 +    _marking_task_overhead    =     1.0;
    4.41    } else {
    4.42      if (ConcGCThreads > 0) {
    4.43        // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
    4.44        // if both are set
    4.45  
    4.46        _parallel_marking_threads = ConcGCThreads;
    4.47 +      _max_parallel_marking_threads = _parallel_marking_threads;
    4.48        _sleep_factor             = 0.0;
    4.49        _marking_task_overhead    = 1.0;
    4.50      } else if (G1MarkingOverheadPercent > 0) {
    4.51 @@ -583,10 +590,12 @@
    4.52                           (1.0 - marking_task_overhead) / marking_task_overhead;
    4.53  
    4.54        _parallel_marking_threads = (size_t) marking_thread_num;
    4.55 +      _max_parallel_marking_threads = _parallel_marking_threads;
    4.56        _sleep_factor             = sleep_factor;
    4.57        _marking_task_overhead    = marking_task_overhead;
    4.58      } else {
    4.59 -      _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
    4.60 +      _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
    4.61 +      _max_parallel_marking_threads = _parallel_marking_threads;
    4.62        _sleep_factor             = 0.0;
    4.63        _marking_task_overhead    = 1.0;
    4.64      }
    4.65 @@ -609,7 +618,7 @@
    4.66  
    4.67      guarantee(parallel_marking_threads() > 0, "peace of mind");
    4.68      _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
    4.69 -         (int) _parallel_marking_threads, false, true);
    4.70 +         (int) _max_parallel_marking_threads, false, true);
    4.71      if (_parallel_workers == NULL) {
    4.72        vm_exit_during_initialization("Failed necessary allocation.");
    4.73      } else {
    4.74 @@ -1106,6 +1115,33 @@
    4.75    ~CMConcurrentMarkingTask() { }
    4.76  };
    4.77  
    4.78 +// Calculates the number of active workers for a concurrent
    4.79 +// phase.
    4.80 +int ConcurrentMark::calc_parallel_marking_threads() {
    4.81 +
    4.82 +  size_t n_conc_workers;
    4.83 +  if (!G1CollectedHeap::use_parallel_gc_threads()) {
    4.84 +    n_conc_workers = 1;
    4.85 +  } else {
    4.86 +    if (!UseDynamicNumberOfGCThreads ||
    4.87 +        (!FLAG_IS_DEFAULT(ConcGCThreads) &&
    4.88 +         !ForceDynamicNumberOfGCThreads)) {
    4.89 +      n_conc_workers = max_parallel_marking_threads();
    4.90 +    } else {
    4.91 +      n_conc_workers =
    4.92 +        AdaptiveSizePolicy::calc_default_active_workers(
    4.93 +                                     max_parallel_marking_threads(),
    4.94 +                                     1, /* Minimum workers */
    4.95 +                                     parallel_marking_threads(),
    4.96 +                                     Threads::number_of_non_daemon_threads());
    4.97 +      // Don't scale down "n_conc_workers" by scale_parallel_threads() because
    4.98 +      // that scaling has already gone into "_max_parallel_marking_threads".
    4.99 +    }
   4.100 +  }
   4.101 +  assert(n_conc_workers > 0, "Always need at least 1");
   4.102 +  return (int) MAX2(n_conc_workers, (size_t) 1);
   4.103 +}
   4.104 +
   4.105  void ConcurrentMark::markFromRoots() {
   4.106    // we might be tempted to assert that:
   4.107    // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
   4.108 @@ -1116,9 +1152,20 @@
   4.109  
   4.110    _restart_for_overflow = false;
   4.111  
   4.112 -  size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
   4.113 +  // Parallel task terminator is set in "set_phase()".
   4.114    force_overflow_conc()->init();
   4.115 -  set_phase(active_workers, true /* concurrent */);
   4.116 +
   4.117 +  // _g1h has _n_par_threads
   4.118 +
   4.119 +  _parallel_marking_threads = calc_parallel_marking_threads();
   4.120 +  assert(parallel_marking_threads() <= max_parallel_marking_threads(),
   4.121 +    "Maximum number of marking threads exceeded");
   4.122 +  _parallel_workers->set_active_workers((int)_parallel_marking_threads);
   4.123 +  // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
   4.124 +  // and the decisions on that MT processing is made elsewhere.
   4.125 +
   4.126 +  assert( _parallel_workers->active_workers() > 0, "Should have been set");
   4.127 +  set_phase(_parallel_workers->active_workers(), true /* concurrent */);
   4.128  
   4.129    CMConcurrentMarkingTask markingTask(this, cmThread());
   4.130    if (parallel_marking_threads() > 0) {
   4.131 @@ -1181,6 +1228,7 @@
   4.132                                         true /* expected_active */);
   4.133  
   4.134      if (VerifyDuringGC) {
   4.135 +
   4.136        HandleMark hm;  // handle scope
   4.137        gclog_or_tty->print(" VerifyDuringGC:(after)");
   4.138        Universe::heap()->prepare_for_verify();
   4.139 @@ -1463,12 +1511,20 @@
   4.140    G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
   4.141                        BitMap* region_bm, BitMap* card_bm)
   4.142      : AbstractGangTask("G1 final counting"), _g1h(g1h),
   4.143 -      _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
   4.144 -    if (ParallelGCThreads > 0) {
   4.145 -      _n_workers = _g1h->workers()->total_workers();
   4.146 +    _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
   4.147 +    _n_workers(0)
   4.148 +  {
   4.149 +    // Use the value already set as the number of active threads
   4.150 +    // in the call to run_task().  Needed for the allocation of
   4.151 +    // _live_bytes and _used_bytes.
   4.152 +    if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.153 +      assert( _g1h->workers()->active_workers() > 0,
   4.154 +        "Should have been previously set");
   4.155 +      _n_workers = _g1h->workers()->active_workers();
   4.156      } else {
   4.157        _n_workers = 1;
   4.158      }
   4.159 +
   4.160      _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
   4.161      _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
   4.162    }
   4.163 @@ -1485,6 +1541,7 @@
   4.164      calccl.no_yield();
   4.165      if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.166        _g1h->heap_region_par_iterate_chunked(&calccl, i,
   4.167 +                                            (int) _n_workers,
   4.168                                              HeapRegion::FinalCountClaimValue);
   4.169      } else {
   4.170        _g1h->heap_region_iterate(&calccl);
   4.171 @@ -1530,10 +1587,42 @@
   4.172                               FreeRegionList* local_cleanup_list,
   4.173                               OldRegionSet* old_proxy_set,
   4.174                               HumongousRegionSet* humongous_proxy_set,
   4.175 -                             HRRSCleanupTask* hrrs_cleanup_task);
   4.176 +                             HRRSCleanupTask* hrrs_cleanup_task) :
   4.177 +    _g1(g1), _worker_num(worker_num),
   4.178 +    _max_live_bytes(0), _regions_claimed(0),
   4.179 +    _freed_bytes(0),
   4.180 +    _claimed_region_time(0.0), _max_region_time(0.0),
   4.181 +    _local_cleanup_list(local_cleanup_list),
   4.182 +    _old_proxy_set(old_proxy_set),
   4.183 +    _humongous_proxy_set(humongous_proxy_set),
   4.184 +    _hrrs_cleanup_task(hrrs_cleanup_task) { }
   4.185 +
   4.186    size_t freed_bytes() { return _freed_bytes; }
   4.187  
   4.188 -  bool doHeapRegion(HeapRegion *r);
   4.189 +  bool doHeapRegion(HeapRegion *hr) {
   4.190 +    // We use a claim value of zero here because all regions
   4.191 +    // were claimed with value 1 in the FinalCount task.
   4.192 +    hr->reset_gc_time_stamp();
   4.193 +    if (!hr->continuesHumongous()) {
   4.194 +      double start = os::elapsedTime();
   4.195 +      _regions_claimed++;
   4.196 +      hr->note_end_of_marking();
   4.197 +      _max_live_bytes += hr->max_live_bytes();
   4.198 +      _g1->free_region_if_empty(hr,
   4.199 +                                &_freed_bytes,
   4.200 +                                _local_cleanup_list,
   4.201 +                                _old_proxy_set,
   4.202 +                                _humongous_proxy_set,
   4.203 +                                _hrrs_cleanup_task,
   4.204 +                                true /* par */);
   4.205 +      double region_time = (os::elapsedTime() - start);
   4.206 +      _claimed_region_time += region_time;
   4.207 +      if (region_time > _max_region_time) {
   4.208 +        _max_region_time = region_time;
   4.209 +      }
   4.210 +    }
   4.211 +    return false;
   4.212 +  }
   4.213  
   4.214    size_t max_live_bytes() { return _max_live_bytes; }
   4.215    size_t regions_claimed() { return _regions_claimed; }
   4.216 @@ -1568,6 +1657,7 @@
   4.217                                             &hrrs_cleanup_task);
   4.218      if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.219        _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
   4.220 +                                            _g1h->workers()->active_workers(),
   4.221                                              HeapRegion::NoteEndClaimValue);
   4.222      } else {
   4.223        _g1h->heap_region_iterate(&g1_note_end);
   4.224 @@ -1644,47 +1734,6 @@
   4.225  
   4.226  };
   4.227  
   4.228 -G1NoteEndOfConcMarkClosure::
   4.229 -G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
   4.230 -                           int worker_num,
   4.231 -                           FreeRegionList* local_cleanup_list,
   4.232 -                           OldRegionSet* old_proxy_set,
   4.233 -                           HumongousRegionSet* humongous_proxy_set,
   4.234 -                           HRRSCleanupTask* hrrs_cleanup_task)
   4.235 -  : _g1(g1), _worker_num(worker_num),
   4.236 -    _max_live_bytes(0), _regions_claimed(0),
   4.237 -    _freed_bytes(0),
   4.238 -    _claimed_region_time(0.0), _max_region_time(0.0),
   4.239 -    _local_cleanup_list(local_cleanup_list),
   4.240 -    _old_proxy_set(old_proxy_set),
   4.241 -    _humongous_proxy_set(humongous_proxy_set),
   4.242 -    _hrrs_cleanup_task(hrrs_cleanup_task) { }
   4.243 -
   4.244 -bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
   4.245 -  // We use a claim value of zero here because all regions
   4.246 -  // were claimed with value 1 in the FinalCount task.
   4.247 -  hr->reset_gc_time_stamp();
   4.248 -  if (!hr->continuesHumongous()) {
   4.249 -    double start = os::elapsedTime();
   4.250 -    _regions_claimed++;
   4.251 -    hr->note_end_of_marking();
   4.252 -    _max_live_bytes += hr->max_live_bytes();
   4.253 -    _g1->free_region_if_empty(hr,
   4.254 -                              &_freed_bytes,
   4.255 -                              _local_cleanup_list,
   4.256 -                              _old_proxy_set,
   4.257 -                              _humongous_proxy_set,
   4.258 -                              _hrrs_cleanup_task,
   4.259 -                              true /* par */);
   4.260 -    double region_time = (os::elapsedTime() - start);
   4.261 -    _claimed_region_time += region_time;
   4.262 -    if (region_time > _max_region_time) {
   4.263 -      _max_region_time = region_time;
   4.264 -    }
   4.265 -  }
   4.266 -  return false;
   4.267 -}
   4.268 -
   4.269  void ConcurrentMark::cleanup() {
   4.270    // world is stopped at this checkpoint
   4.271    assert(SafepointSynchronize::is_at_safepoint(),
   4.272 @@ -1716,6 +1765,9 @@
   4.273  
   4.274    HeapRegionRemSet::reset_for_cleanup_tasks();
   4.275  
   4.276 +  g1h->set_par_threads();
   4.277 +  size_t n_workers = g1h->n_par_threads();
   4.278 +
   4.279    // Do counting once more with the world stopped for good measure.
   4.280    G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
   4.281                                          &_region_bm, &_card_bm);
   4.282 @@ -1724,9 +1776,10 @@
   4.283                                                 HeapRegion::InitialClaimValue),
   4.284             "sanity check");
   4.285  
   4.286 -    int n_workers = g1h->workers()->total_workers();
   4.287 -    g1h->set_par_threads(n_workers);
   4.288 +    assert(g1h->n_par_threads() == (int) n_workers,
   4.289 +      "Should not have been reset");
   4.290      g1h->workers()->run_task(&g1_par_count_task);
   4.291 +    // Done with the parallel phase so reset to 0.
   4.292      g1h->set_par_threads(0);
   4.293  
   4.294      assert(g1h->check_heap_region_claim_values(
   4.295 @@ -1776,8 +1829,7 @@
   4.296    double note_end_start = os::elapsedTime();
   4.297    G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
   4.298    if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.299 -    int n_workers = g1h->workers()->total_workers();
   4.300 -    g1h->set_par_threads(n_workers);
   4.301 +    g1h->set_par_threads((int)n_workers);
   4.302      g1h->workers()->run_task(&g1_par_note_end_task);
   4.303      g1h->set_par_threads(0);
   4.304  
   4.305 @@ -1806,8 +1858,7 @@
   4.306      double rs_scrub_start = os::elapsedTime();
   4.307      G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
   4.308      if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.309 -      int n_workers = g1h->workers()->total_workers();
   4.310 -      g1h->set_par_threads(n_workers);
   4.311 +      g1h->set_par_threads((int)n_workers);
   4.312        g1h->workers()->run_task(&g1_par_scrub_rs_task);
   4.313        g1h->set_par_threads(0);
   4.314  
   4.315 @@ -1825,7 +1876,7 @@
   4.316  
   4.317    // this will also free any regions totally full of garbage objects,
   4.318    // and sort the regions.
   4.319 -  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
   4.320 +  g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
   4.321  
   4.322    // Statistics.
   4.323    double end = os::elapsedTime();
   4.324 @@ -1991,16 +2042,12 @@
   4.325  class G1CMParKeepAliveAndDrainClosure: public OopClosure {
   4.326    ConcurrentMark*  _cm;
   4.327    CMTask*          _task;
   4.328 -  CMBitMap*        _bitMap;
   4.329    int              _ref_counter_limit;
   4.330    int              _ref_counter;
   4.331   public:
   4.332 -  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
   4.333 -                                  CMTask* task,
   4.334 -                                  CMBitMap* bitMap) :
   4.335 -    _cm(cm), _task(task), _bitMap(bitMap),
   4.336 -    _ref_counter_limit(G1RefProcDrainInterval)
   4.337 -  {
   4.338 +  G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
   4.339 +    _cm(cm), _task(task),
   4.340 +    _ref_counter_limit(G1RefProcDrainInterval) {
   4.341      assert(_ref_counter_limit > 0, "sanity");
   4.342      _ref_counter = _ref_counter_limit;
   4.343    }
   4.344 @@ -2091,19 +2138,16 @@
   4.345  private:
   4.346    G1CollectedHeap* _g1h;
   4.347    ConcurrentMark*  _cm;
   4.348 -  CMBitMap*        _bitmap;
   4.349    WorkGang*        _workers;
   4.350    int              _active_workers;
   4.351  
   4.352  public:
   4.353    G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
   4.354                          ConcurrentMark* cm,
   4.355 -                        CMBitMap* bitmap,
   4.356                          WorkGang* workers,
   4.357                          int n_workers) :
   4.358 -    _g1h(g1h), _cm(cm), _bitmap(bitmap),
   4.359 -    _workers(workers), _active_workers(n_workers)
   4.360 -  { }
   4.361 +    _g1h(g1h), _cm(cm),
   4.362 +    _workers(workers), _active_workers(n_workers) { }
   4.363  
   4.364    // Executes the given task using concurrent marking worker threads.
   4.365    virtual void execute(ProcessTask& task);
   4.366 @@ -2115,21 +2159,18 @@
   4.367    ProcessTask&     _proc_task;
   4.368    G1CollectedHeap* _g1h;
   4.369    ConcurrentMark*  _cm;
   4.370 -  CMBitMap*        _bitmap;
   4.371  
   4.372  public:
   4.373    G1CMRefProcTaskProxy(ProcessTask& proc_task,
   4.374                       G1CollectedHeap* g1h,
   4.375 -                     ConcurrentMark* cm,
   4.376 -                     CMBitMap* bitmap) :
   4.377 +                     ConcurrentMark* cm) :
   4.378      AbstractGangTask("Process reference objects in parallel"),
   4.379 -    _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
   4.380 -  {}
   4.381 +    _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
   4.382  
   4.383    virtual void work(int i) {
   4.384      CMTask* marking_task = _cm->task(i);
   4.385      G1CMIsAliveClosure g1_is_alive(_g1h);
   4.386 -    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
   4.387 +    G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
   4.388      G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
   4.389  
   4.390      _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
   4.391 @@ -2139,7 +2180,7 @@
   4.392  void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
   4.393    assert(_workers != NULL, "Need parallel worker threads.");
   4.394  
   4.395 -  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
   4.396 +  G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
   4.397  
   4.398    // We need to reset the phase for each task execution so that
   4.399    // the termination protocol of CMTask::do_marking_step works.
   4.400 @@ -2156,8 +2197,7 @@
   4.401  public:
   4.402    G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
   4.403      AbstractGangTask("Enqueue reference objects in parallel"),
   4.404 -    _enq_task(enq_task)
   4.405 -  { }
   4.406 +    _enq_task(enq_task) { }
   4.407  
   4.408    virtual void work(int i) {
   4.409      _enq_task.work(i);
   4.410 @@ -2207,10 +2247,10 @@
   4.411  
   4.412      // We use the work gang from the G1CollectedHeap and we utilize all
   4.413      // the worker threads.
   4.414 -    int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
   4.415 +    int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
   4.416      active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
   4.417  
   4.418 -    G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
   4.419 +    G1CMRefProcTaskExecutor par_task_executor(g1h, this,
   4.420                                                g1h->workers(), active_workers);
   4.421  
   4.422      if (rp->processing_is_mt()) {
   4.423 @@ -2290,7 +2330,9 @@
   4.424    }
   4.425  
   4.426    CMRemarkTask(ConcurrentMark* cm) :
   4.427 -    AbstractGangTask("Par Remark"), _cm(cm) { }
   4.428 +    AbstractGangTask("Par Remark"), _cm(cm) {
   4.429 +    _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers());
   4.430 +  }
   4.431  };
   4.432  
   4.433  void ConcurrentMark::checkpointRootsFinalWork() {
   4.434 @@ -2302,16 +2344,21 @@
   4.435  
   4.436    if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.437      G1CollectedHeap::StrongRootsScope srs(g1h);
   4.438 -    // this is remark, so we'll use up all available threads
   4.439 -    int active_workers = ParallelGCThreads;
   4.440 +    // this is remark, so we'll use up all active threads
   4.441 +    int active_workers = g1h->workers()->active_workers();
   4.442 +    if (active_workers == 0) {
   4.443 +      assert(active_workers > 0, "Should have been set earlier");
   4.444 +      active_workers = ParallelGCThreads;
   4.445 +      g1h->workers()->set_active_workers(active_workers);
   4.446 +    }
   4.447      set_phase(active_workers, false /* concurrent */);
   4.448 +    // Leave _parallel_marking_threads at it's
   4.449 +    // value originally calculated in the ConcurrentMark
   4.450 +    // constructor and pass values of the active workers
   4.451 +    // through the gang in the task.
   4.452  
   4.453      CMRemarkTask remarkTask(this);
   4.454 -    // We will start all available threads, even if we decide that the
   4.455 -    // active_workers will be fewer. The extra ones will just bail out
   4.456 -    // immediately.
   4.457 -    int n_workers = g1h->workers()->total_workers();
   4.458 -    g1h->set_par_threads(n_workers);
   4.459 +    g1h->set_par_threads(active_workers);
   4.460      g1h->workers()->run_task(&remarkTask);
   4.461      g1h->set_par_threads(0);
   4.462    } else {
   4.463 @@ -2859,8 +2906,10 @@
   4.464    }
   4.465  }
   4.466  
   4.467 -class CSMarkOopClosure: public OopClosure {
   4.468 -  friend class CSMarkBitMapClosure;
   4.469 +// Closures used by ConcurrentMark::complete_marking_in_collection_set().
   4.470 +
   4.471 +class CSetMarkOopClosure: public OopClosure {
   4.472 +  friend class CSetMarkBitMapClosure;
   4.473  
   4.474    G1CollectedHeap* _g1h;
   4.475    CMBitMap*        _bm;
   4.476 @@ -2870,6 +2919,7 @@
   4.477    int              _ms_size;
   4.478    int              _ms_ind;
   4.479    int              _array_increment;
   4.480 +  int              _worker_i;
   4.481  
   4.482    bool push(oop obj, int arr_ind = 0) {
   4.483      if (_ms_ind == _ms_size) {
   4.484 @@ -2910,7 +2960,6 @@
   4.485          for (int j = arr_ind; j < lim; j++) {
   4.486            do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
   4.487          }
   4.488 -
   4.489        } else {
   4.490          obj->oop_iterate(this);
   4.491        }
   4.492 @@ -2920,17 +2969,17 @@
   4.493    }
   4.494  
   4.495  public:
   4.496 -  CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
   4.497 +  CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
   4.498      _g1h(G1CollectedHeap::heap()),
   4.499      _cm(cm),
   4.500      _bm(cm->nextMarkBitMap()),
   4.501      _ms_size(ms_size), _ms_ind(0),
   4.502      _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
   4.503      _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
   4.504 -    _array_increment(MAX2(ms_size/8, 16))
   4.505 -  {}
   4.506 -
   4.507 -  ~CSMarkOopClosure() {
   4.508 +    _array_increment(MAX2(ms_size/8, 16)),
   4.509 +    _worker_i(worker_i) { }
   4.510 +
   4.511 +  ~CSetMarkOopClosure() {
   4.512      FREE_C_HEAP_ARRAY(oop, _ms);
   4.513      FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
   4.514    }
   4.515 @@ -2953,10 +3002,11 @@
   4.516      if (hr != NULL) {
   4.517        if (hr->in_collection_set()) {
   4.518          if (_g1h->is_obj_ill(obj)) {
   4.519 -          _bm->mark((HeapWord*)obj);
   4.520 -          if (!push(obj)) {
   4.521 -            gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
   4.522 -            set_abort();
   4.523 +          if (_bm->parMark((HeapWord*)obj)) {
   4.524 +            if (!push(obj)) {
   4.525 +              gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
   4.526 +              set_abort();
   4.527 +            }
   4.528            }
   4.529          }
   4.530        } else {
   4.531 @@ -2967,19 +3017,19 @@
   4.532    }
   4.533  };
   4.534  
   4.535 -class CSMarkBitMapClosure: public BitMapClosure {
   4.536 -  G1CollectedHeap* _g1h;
   4.537 -  CMBitMap*        _bitMap;
   4.538 -  ConcurrentMark*  _cm;
   4.539 -  CSMarkOopClosure _oop_cl;
   4.540 +class CSetMarkBitMapClosure: public BitMapClosure {
   4.541 +  G1CollectedHeap*   _g1h;
   4.542 +  CMBitMap*          _bitMap;
   4.543 +  ConcurrentMark*    _cm;
   4.544 +  CSetMarkOopClosure _oop_cl;
   4.545 +  int                _worker_i;
   4.546 +
   4.547  public:
   4.548 -  CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
   4.549 +  CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
   4.550      _g1h(G1CollectedHeap::heap()),
   4.551      _bitMap(cm->nextMarkBitMap()),
   4.552 -    _oop_cl(cm, ms_size)
   4.553 -  {}
   4.554 -
   4.555 -  ~CSMarkBitMapClosure() {}
   4.556 +    _oop_cl(cm, ms_size, worker_i),
   4.557 +    _worker_i(worker_i) { }
   4.558  
   4.559    bool do_bit(size_t offset) {
   4.560      // convert offset into a HeapWord*
   4.561 @@ -3001,53 +3051,69 @@
   4.562    }
   4.563  };
   4.564  
   4.565 -
   4.566 -class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
   4.567 -  CMBitMap* _bm;
   4.568 -  CSMarkBitMapClosure _bit_cl;
   4.569 +class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
   4.570 +  CMBitMap*             _bm;
   4.571 +  CSetMarkBitMapClosure _bit_cl;
   4.572 +  int                   _worker_i;
   4.573 +
   4.574    enum SomePrivateConstants {
   4.575      MSSize = 1000
   4.576    };
   4.577 -  bool _completed;
   4.578 +
   4.579  public:
   4.580 -  CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
   4.581 +  CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
   4.582      _bm(cm->nextMarkBitMap()),
   4.583 -    _bit_cl(cm, MSSize),
   4.584 -    _completed(true)
   4.585 -  {}
   4.586 -
   4.587 -  ~CompleteMarkingInCSHRClosure() {}
   4.588 -
   4.589 -  bool doHeapRegion(HeapRegion* r) {
   4.590 -    if (!r->evacuation_failed()) {
   4.591 -      MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
   4.592 -      if (!mr.is_empty()) {
   4.593 -        if (!_bm->iterate(&_bit_cl, mr)) {
   4.594 -          _completed = false;
   4.595 -          return true;
   4.596 +    _bit_cl(cm, MSSize, worker_i),
   4.597 +    _worker_i(worker_i) { }
   4.598 +
   4.599 +  bool doHeapRegion(HeapRegion* hr) {
   4.600 +    if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
   4.601 +      // The current worker has successfully claimed the region.
   4.602 +      if (!hr->evacuation_failed()) {
   4.603 +        MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
   4.604 +        if (!mr.is_empty()) {
   4.605 +          bool done = false;
   4.606 +          while (!done) {
   4.607 +            done = _bm->iterate(&_bit_cl, mr);
   4.608 +          }
   4.609          }
   4.610        }
   4.611      }
   4.612      return false;
   4.613    }
   4.614 -
   4.615 -  bool completed() { return _completed; }
   4.616  };
   4.617  
   4.618 -class ClearMarksInHRClosure: public HeapRegionClosure {
   4.619 -  CMBitMap* _bm;
   4.620 +class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
   4.621 +  jint _claim_value;
   4.622 +
   4.623  public:
   4.624 -  ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
   4.625 -
   4.626 -  bool doHeapRegion(HeapRegion* r) {
   4.627 -    if (!r->used_region().is_empty() && !r->evacuation_failed()) {
   4.628 -      MemRegion usedMR = r->used_region();
   4.629 -      _bm->clearRange(r->used_region());
   4.630 -    }
   4.631 +  SetClaimValuesInCSetHRClosure(jint claim_value) :
   4.632 +    _claim_value(claim_value) { }
   4.633 +
   4.634 +  bool doHeapRegion(HeapRegion* hr) {
   4.635 +    hr->set_claim_value(_claim_value);
   4.636      return false;
   4.637    }
   4.638  };
   4.639  
   4.640 +class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
   4.641 +protected:
   4.642 +  G1CollectedHeap* _g1h;
   4.643 +  ConcurrentMark*  _cm;
   4.644 +
   4.645 +public:
   4.646 +  G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
   4.647 +                              ConcurrentMark* cm) :
   4.648 +    AbstractGangTask("Complete Mark in CSet"),
   4.649 +    _g1h(g1h), _cm(cm) { }
   4.650 +
   4.651 +  void work(int worker_i) {
   4.652 +    CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
   4.653 +    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
   4.654 +    _g1h->collection_set_iterate_from(hr, &cmplt);
   4.655 +  }
   4.656 +};
   4.657 +
   4.658  void ConcurrentMark::complete_marking_in_collection_set() {
   4.659    G1CollectedHeap* g1h =  G1CollectedHeap::heap();
   4.660  
   4.661 @@ -3056,20 +3122,32 @@
   4.662      return;
   4.663    }
   4.664  
   4.665 -  int i = 1;
   4.666    double start = os::elapsedTime();
   4.667 -  while (true) {
   4.668 -    i++;
   4.669 -    CompleteMarkingInCSHRClosure cmplt(this);
   4.670 -    g1h->collection_set_iterate(&cmplt);
   4.671 -    if (cmplt.completed()) break;
   4.672 +  int n_workers = g1h->workers()->total_workers();
   4.673 +
   4.674 +  G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
   4.675 +
   4.676 +  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   4.677 +
   4.678 +  if (G1CollectedHeap::use_parallel_gc_threads()) {
   4.679 +    g1h->set_par_threads(n_workers);
   4.680 +    g1h->workers()->run_task(&complete_mark_task);
   4.681 +    g1h->set_par_threads(0);
   4.682 +  } else {
   4.683 +    complete_mark_task.work(0);
   4.684    }
   4.685 +
   4.686 +  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
   4.687 +
   4.688 +  // Now reset the claim values in the regions in the collection set.
   4.689 +  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
   4.690 +  g1h->collection_set_iterate(&set_cv_cl);
   4.691 +
   4.692 +  assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   4.693 +
   4.694    double end_time = os::elapsedTime();
   4.695    double elapsed_time_ms = (end_time - start) * 1000.0;
   4.696    g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
   4.697 -
   4.698 -  ClearMarksInHRClosure clr(nextMarkBitMap());
   4.699 -  g1h->collection_set_iterate(&clr);
   4.700  }
   4.701  
   4.702  // The next two methods deal with the following optimisation. Some
     5.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Dec 01 13:42:41 2011 -0500
     5.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Dec 02 08:52:53 2011 -0500
     5.3 @@ -360,7 +360,7 @@
     5.4    friend class ConcurrentMarkThread;
     5.5    friend class CMTask;
     5.6    friend class CMBitMapClosure;
     5.7 -  friend class CSMarkOopClosure;
     5.8 +  friend class CSetMarkOopClosure;
     5.9    friend class CMGlobalObjectClosure;
    5.10    friend class CMRemarkTask;
    5.11    friend class CMConcurrentMarkingTask;
    5.12 @@ -375,7 +375,9 @@
    5.13    ConcurrentMarkThread* _cmThread;   // the thread doing the work
    5.14    G1CollectedHeap*      _g1h;        // the heap.
    5.15    size_t                _parallel_marking_threads; // the number of marking
    5.16 -                                                   // threads we'll use
    5.17 +                                                   // threads we're use
    5.18 +  size_t                _max_parallel_marking_threads; // max number of marking
    5.19 +                                                   // threads we'll ever use
    5.20    double                _sleep_factor; // how much we have to sleep, with
    5.21                                         // respect to the work we just did, to
    5.22                                         // meet the marking overhead goal
    5.23 @@ -473,7 +475,7 @@
    5.24  
    5.25    double*   _accum_task_vtime;   // accumulated task vtime
    5.26  
    5.27 -  WorkGang* _parallel_workers;
    5.28 +  FlexibleWorkGang* _parallel_workers;
    5.29  
    5.30    ForceOverflowSettings _force_overflow_conc;
    5.31    ForceOverflowSettings _force_overflow_stw;
    5.32 @@ -504,6 +506,7 @@
    5.33  
    5.34    // accessor methods
    5.35    size_t parallel_marking_threads() { return _parallel_marking_threads; }
    5.36 +  size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;}
    5.37    double sleep_factor()             { return _sleep_factor; }
    5.38    double marking_task_overhead()    { return _marking_task_overhead;}
    5.39    double cleanup_sleep_factor()     { return _cleanup_sleep_factor; }
    5.40 @@ -709,6 +712,14 @@
    5.41    CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; }
    5.42    CMBitMap*   nextMarkBitMap() const { return _nextMarkBitMap; }
    5.43  
    5.44 +  // Returns the number of GC threads to be used in a concurrent
    5.45 +  // phase based on the number of GC threads being used in a STW
    5.46 +  // phase.
    5.47 +  size_t scale_parallel_threads(size_t n_par_threads);
    5.48 +
    5.49 +  // Calculates the number of GC threads to be used in a concurrent phase.
    5.50 +  int calc_parallel_marking_threads();
    5.51 +
    5.52    // The following three are interaction between CM and
    5.53    // G1CollectedHeap
    5.54  
     6.1 --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Dec 01 13:42:41 2011 -0500
     6.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Fri Dec 02 08:52:53 2011 -0500
     6.3 @@ -191,7 +191,11 @@
     6.4          VM_CGC_Operation op(&cl_cl, verbose_str);
     6.5          VMThread::execute(&op);
     6.6        } else {
     6.7 +        // We don't want to update the marking status if a GC pause
     6.8 +        // is already underway.
     6.9 +        _sts.join();
    6.10          g1h->set_marking_complete();
    6.11 +        _sts.leave();
    6.12        }
    6.13  
    6.14        // Check if cleanup set the free_regions_coming flag. If it
     7.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Dec 01 13:42:41 2011 -0500
     7.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Dec 02 08:52:53 2011 -0500
     7.3 @@ -66,6 +66,18 @@
     7.4  // apply to TLAB allocation, which is not part of this interface: it
     7.5  // is done by clients of this interface.)
     7.6  
     7.7 +// Notes on implementation of parallelism in different tasks.
     7.8 +//
     7.9 +// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
    7.10 +// The number of GC workers is passed to heap_region_par_iterate_chunked().
    7.11 +// It does use run_task() which sets _n_workers in the task.
    7.12 +// G1ParTask executes g1_process_strong_roots() ->
    7.13 +// SharedHeap::process_strong_roots() which calls eventuall to
    7.14 +// CardTableModRefBS::par_non_clean_card_iterate_work() which uses
    7.15 +// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
    7.16 +// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
    7.17 +//
    7.18 +
    7.19  // Local to this file.
    7.20  
    7.21  class RefineCardTableEntryClosure: public CardTableEntryClosure {
    7.22 @@ -176,8 +188,7 @@
    7.23    hr->set_next_young_region(_head);
    7.24    _head = hr;
    7.25  
    7.26 -  hr->set_young();
    7.27 -  double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length);
    7.28 +  _g1h->g1_policy()->set_region_eden(hr, (int) _length);
    7.29    ++_length;
    7.30  }
    7.31  
    7.32 @@ -190,7 +201,6 @@
    7.33      _survivor_tail = hr;
    7.34    }
    7.35    _survivor_head = hr;
    7.36 -
    7.37    ++_survivor_length;
    7.38  }
    7.39  
    7.40 @@ -315,16 +325,20 @@
    7.41    _g1h->g1_policy()->note_start_adding_survivor_regions();
    7.42    _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */);
    7.43  
    7.44 +  int young_index_in_cset = 0;
    7.45    for (HeapRegion* curr = _survivor_head;
    7.46         curr != NULL;
    7.47         curr = curr->get_next_young_region()) {
    7.48 -    _g1h->g1_policy()->set_region_survivors(curr);
    7.49 +    _g1h->g1_policy()->set_region_survivor(curr, young_index_in_cset);
    7.50  
    7.51      // The region is a non-empty survivor so let's add it to
    7.52      // the incremental collection set for the next evacuation
    7.53      // pause.
    7.54      _g1h->g1_policy()->add_region_to_incremental_cset_rhs(curr);
    7.55 -  }
    7.56 +    young_index_in_cset += 1;
    7.57 +  }
    7.58 +  assert((size_t) young_index_in_cset == _survivor_length,
    7.59 +         "post-condition");
    7.60    _g1h->g1_policy()->note_stop_adding_survivor_regions();
    7.61  
    7.62    _head   = _survivor_head;
    7.63 @@ -1154,6 +1168,7 @@
    7.64    void work(int i) {
    7.65      RebuildRSOutOfRegionClosure rebuild_rs(_g1, i);
    7.66      _g1->heap_region_par_iterate_chunked(&rebuild_rs, i,
    7.67 +                                          _g1->workers()->active_workers(),
    7.68                                           HeapRegion::RebuildRSClaimValue);
    7.69    }
    7.70  };
    7.71 @@ -1358,12 +1373,32 @@
    7.72      }
    7.73  
    7.74      // Rebuild remembered sets of all regions.
    7.75 -
    7.76      if (G1CollectedHeap::use_parallel_gc_threads()) {
    7.77 +      int n_workers =
    7.78 +        AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
    7.79 +                                       workers()->active_workers(),
    7.80 +                                       Threads::number_of_non_daemon_threads());
    7.81 +      assert(UseDynamicNumberOfGCThreads ||
    7.82 +             n_workers == workers()->total_workers(),
    7.83 +             "If not dynamic should be using all the  workers");
    7.84 +      workers()->set_active_workers(n_workers);
    7.85 +      // Set parallel threads in the heap (_n_par_threads) only
    7.86 +      // before a parallel phase and always reset it to 0 after
    7.87 +      // the phase so that the number of parallel threads does
    7.88 +      // no get carried forward to a serial phase where there
    7.89 +      // may be code that is "possibly_parallel".
    7.90 +      set_par_threads(n_workers);
    7.91 +
    7.92        ParRebuildRSTask rebuild_rs_task(this);
    7.93        assert(check_heap_region_claim_values(
    7.94               HeapRegion::InitialClaimValue), "sanity check");
    7.95 -      set_par_threads(workers()->total_workers());
    7.96 +      assert(UseDynamicNumberOfGCThreads ||
    7.97 +             workers()->active_workers() == workers()->total_workers(),
    7.98 +        "Unless dynamic should use total workers");
    7.99 +      // Use the most recent number of  active workers
   7.100 +      assert(workers()->active_workers() > 0,
   7.101 +        "Active workers not properly set");
   7.102 +      set_par_threads(workers()->active_workers());
   7.103        workers()->run_task(&rebuild_rs_task);
   7.104        set_par_threads(0);
   7.105        assert(check_heap_region_claim_values(
   7.106 @@ -2475,11 +2510,17 @@
   7.107  void
   7.108  G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl,
   7.109                                                   int worker,
   7.110 +                                                 int no_of_par_workers,
   7.111                                                   jint claim_value) {
   7.112    const size_t regions = n_regions();
   7.113 -  const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1);
   7.114 +  const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
   7.115 +                             no_of_par_workers :
   7.116 +                             1);
   7.117 +  assert(UseDynamicNumberOfGCThreads ||
   7.118 +         no_of_par_workers == workers()->total_workers(),
   7.119 +         "Non dynamic should use fixed number of workers");
   7.120    // try to spread out the starting points of the workers
   7.121 -  const size_t start_index = regions / worker_num * (size_t) worker;
   7.122 +  const size_t start_index = regions / max_workers * (size_t) worker;
   7.123  
   7.124    // each worker will actually look at all regions
   7.125    for (size_t count = 0; count < regions; ++count) {
   7.126 @@ -2576,10 +2617,10 @@
   7.127      _claim_value(claim_value), _failures(0), _sh_region(NULL) { }
   7.128    bool doHeapRegion(HeapRegion* r) {
   7.129      if (r->claim_value() != _claim_value) {
   7.130 -      gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
   7.131 +      gclog_or_tty->print_cr("Region " HR_FORMAT ", "
   7.132                               "claim value = %d, should be %d",
   7.133 -                             r->bottom(), r->end(), r->claim_value(),
   7.134 -                             _claim_value);
   7.135 +                             HR_FORMAT_PARAMS(r),
   7.136 +                             r->claim_value(), _claim_value);
   7.137        ++_failures;
   7.138      }
   7.139      if (!r->isHumongous()) {
   7.140 @@ -2588,9 +2629,9 @@
   7.141        _sh_region = r;
   7.142      } else if (r->continuesHumongous()) {
   7.143        if (r->humongous_start_region() != _sh_region) {
   7.144 -        gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), "
   7.145 +        gclog_or_tty->print_cr("Region " HR_FORMAT ", "
   7.146                                 "HS = "PTR_FORMAT", should be "PTR_FORMAT,
   7.147 -                               r->bottom(), r->end(),
   7.148 +                               HR_FORMAT_PARAMS(r),
   7.149                                 r->humongous_start_region(),
   7.150                                 _sh_region);
   7.151          ++_failures;
   7.152 @@ -2608,8 +2649,63 @@
   7.153    heap_region_iterate(&cl);
   7.154    return cl.failures() == 0;
   7.155  }
   7.156 +
   7.157 +class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure {
   7.158 +  jint   _claim_value;
   7.159 +  size_t _failures;
   7.160 +
   7.161 +public:
   7.162 +  CheckClaimValuesInCSetHRClosure(jint claim_value) :
   7.163 +    _claim_value(claim_value),
   7.164 +    _failures(0) { }
   7.165 +
   7.166 +  size_t failures() {
   7.167 +    return _failures;
   7.168 +  }
   7.169 +
   7.170 +  bool doHeapRegion(HeapRegion* hr) {
   7.171 +    assert(hr->in_collection_set(), "how?");
   7.172 +    assert(!hr->isHumongous(), "H-region in CSet");
   7.173 +    if (hr->claim_value() != _claim_value) {
   7.174 +      gclog_or_tty->print_cr("CSet Region " HR_FORMAT ", "
   7.175 +                             "claim value = %d, should be %d",
   7.176 +                             HR_FORMAT_PARAMS(hr),
   7.177 +                             hr->claim_value(), _claim_value);
   7.178 +      _failures += 1;
   7.179 +    }
   7.180 +    return false;
   7.181 +  }
   7.182 +};
   7.183 +
   7.184 +bool G1CollectedHeap::check_cset_heap_region_claim_values(jint claim_value) {
   7.185 +  CheckClaimValuesInCSetHRClosure cl(claim_value);
   7.186 +  collection_set_iterate(&cl);
   7.187 +  return cl.failures() == 0;
   7.188 +}
   7.189  #endif // ASSERT
   7.190  
   7.191 +// We want the parallel threads to start their collection
   7.192 +// set iteration at different collection set regions to
   7.193 +// avoid contention.
   7.194 +// If we have:
   7.195 +//          n collection set regions
   7.196 +//          p threads
   7.197 +// Then thread t will start at region t * floor (n/p)
   7.198 +
   7.199 +HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) {
   7.200 +  HeapRegion* result = g1_policy()->collection_set();
   7.201 +  if (G1CollectedHeap::use_parallel_gc_threads()) {
   7.202 +    size_t cs_size = g1_policy()->cset_region_length();
   7.203 +    int n_workers = workers()->total_workers();
   7.204 +    size_t cs_spans = cs_size / n_workers;
   7.205 +    size_t ind      = cs_spans * worker_i;
   7.206 +    for (size_t i = 0; i < ind; i++) {
   7.207 +      result = result->next_in_collection_set();
   7.208 +    }
   7.209 +  }
   7.210 +  return result;
   7.211 +}
   7.212 +
   7.213  void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
   7.214    HeapRegion* r = g1_policy()->collection_set();
   7.215    while (r != NULL) {
   7.216 @@ -2918,6 +3014,7 @@
   7.217      HandleMark hm;
   7.218      VerifyRegionClosure blk(_allow_dirty, true, _vo);
   7.219      _g1h->heap_region_par_iterate_chunked(&blk, worker_i,
   7.220 +                                          _g1h->workers()->active_workers(),
   7.221                                            HeapRegion::ParVerifyClaimValue);
   7.222      if (blk.failures()) {
   7.223        _failures = true;
   7.224 @@ -2935,6 +3032,10 @@
   7.225    if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) {
   7.226      if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); }
   7.227      VerifyRootsClosure rootsCl(vo);
   7.228 +
   7.229 +    assert(Thread::current()->is_VM_thread(),
   7.230 +      "Expected to be executed serially by the VM thread at this point");
   7.231 +
   7.232      CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false);
   7.233  
   7.234      // We apply the relevant closures to all the oops in the
   7.235 @@ -2979,7 +3080,10 @@
   7.236               "sanity check");
   7.237  
   7.238        G1ParVerifyTask task(this, allow_dirty, vo);
   7.239 -      int n_workers = workers()->total_workers();
   7.240 +      assert(UseDynamicNumberOfGCThreads ||
   7.241 +        workers()->active_workers() == workers()->total_workers(),
   7.242 +        "If not dynamic should be using all the workers");
   7.243 +      int n_workers = workers()->active_workers();
   7.244        set_par_threads(n_workers);
   7.245        workers()->run_task(&task);
   7.246        set_par_threads(0);
   7.247 @@ -2987,6 +3091,8 @@
   7.248          failures = true;
   7.249        }
   7.250  
   7.251 +      // Checks that the expected amount of parallel work was done.
   7.252 +      // The implication is that n_workers is > 0.
   7.253        assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue),
   7.254               "sanity check");
   7.255  
   7.256 @@ -3210,8 +3316,6 @@
   7.257    }
   7.258  }
   7.259  
   7.260 -// <NEW PREDICTION>
   7.261 -
   7.262  double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr,
   7.263                                                         bool young) {
   7.264    return _g1_policy->predict_region_elapsed_time_ms(hr, young);
   7.265 @@ -3251,7 +3355,7 @@
   7.266  void
   7.267  G1CollectedHeap::setup_surviving_young_words() {
   7.268    guarantee( _surviving_young_words == NULL, "pre-condition" );
   7.269 -  size_t array_length = g1_policy()->young_cset_length();
   7.270 +  size_t array_length = g1_policy()->young_cset_region_length();
   7.271    _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length);
   7.272    if (_surviving_young_words == NULL) {
   7.273      vm_exit_out_of_memory(sizeof(size_t) * array_length,
   7.274 @@ -3268,7 +3372,7 @@
   7.275  void
   7.276  G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
   7.277    MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
   7.278 -  size_t array_length = g1_policy()->young_cset_length();
   7.279 +  size_t array_length = g1_policy()->young_cset_region_length();
   7.280    for (size_t i = 0; i < array_length; ++i)
   7.281      _surviving_young_words[i] += surv_young_words[i];
   7.282  }
   7.283 @@ -3280,8 +3384,6 @@
   7.284    _surviving_young_words = NULL;
   7.285  }
   7.286  
   7.287 -// </NEW PREDICTION>
   7.288 -
   7.289  #ifdef ASSERT
   7.290  class VerifyCSetClosure: public HeapRegionClosure {
   7.291  public:
   7.292 @@ -3404,6 +3506,10 @@
   7.293      assert(check_young_list_well_formed(),
   7.294        "young list should be well formed");
   7.295  
   7.296 +    // Don't dynamically change the number of GC threads this early.  A value of
   7.297 +    // 0 is used to indicate serial work.  When parallel work is done,
   7.298 +    // it will be set.
   7.299 +
   7.300      { // Call to jvmpi::post_class_unload_events must occur outside of active GC
   7.301        IsGCActiveMark x;
   7.302  
   7.303 @@ -3617,7 +3723,8 @@
   7.304          double end_time_sec = os::elapsedTime();
   7.305          double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
   7.306          g1_policy()->record_pause_time_ms(pause_time_ms);
   7.307 -        g1_policy()->record_collection_pause_end();
   7.308 +        int active_gc_threads = workers()->active_workers();
   7.309 +        g1_policy()->record_collection_pause_end(active_gc_threads);
   7.310  
   7.311          MemoryService::track_memory_usage();
   7.312  
   7.313 @@ -4158,7 +4265,7 @@
   7.314    // non-young regions (where the age is -1)
   7.315    // We also add a few elements at the beginning and at the end in
   7.316    // an attempt to eliminate cache contention
   7.317 -  size_t real_length = 1 + _g1h->g1_policy()->young_cset_length();
   7.318 +  size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
   7.319    size_t array_length = PADDING_ELEM_NUM +
   7.320                          real_length +
   7.321                          PADDING_ELEM_NUM;
   7.322 @@ -4564,13 +4671,13 @@
   7.323    }
   7.324  
   7.325  public:
   7.326 -  G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues)
   7.327 +  G1ParTask(G1CollectedHeap* g1h,
   7.328 +            RefToScanQueueSet *task_queues)
   7.329      : AbstractGangTask("G1 collection"),
   7.330        _g1h(g1h),
   7.331        _queues(task_queues),
   7.332 -      _terminator(workers, _queues),
   7.333 -      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true),
   7.334 -      _n_workers(workers)
   7.335 +      _terminator(0, _queues),
   7.336 +      _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   7.337    {}
   7.338  
   7.339    RefToScanQueueSet* queues() { return _queues; }
   7.340 @@ -4579,6 +4686,20 @@
   7.341      return queues()->queue(i);
   7.342    }
   7.343  
   7.344 +  ParallelTaskTerminator* terminator() { return &_terminator; }
   7.345 +
   7.346 +  virtual void set_for_termination(int active_workers) {
   7.347 +    // This task calls set_n_termination() in par_non_clean_card_iterate_work()
   7.348 +    // in the young space (_par_seq_tasks) in the G1 heap
   7.349 +    // for SequentialSubTasksDone.
   7.350 +    // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap
   7.351 +    // both of which need setting by set_n_termination().
   7.352 +    _g1h->SharedHeap::set_n_termination(active_workers);
   7.353 +    _g1h->set_n_termination(active_workers);
   7.354 +    terminator()->reset_for_reuse(active_workers);
   7.355 +    _n_workers = active_workers;
   7.356 +  }
   7.357 +
   7.358    void work(int i) {
   7.359      if (i >= _n_workers) return;  // no work needed this round
   7.360  
   7.361 @@ -4863,12 +4984,12 @@
   7.362  private:
   7.363    G1CollectedHeap*   _g1h;
   7.364    RefToScanQueueSet* _queues;
   7.365 -  WorkGang*          _workers;
   7.366 +  FlexibleWorkGang*  _workers;
   7.367    int                _active_workers;
   7.368  
   7.369  public:
   7.370    G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
   7.371 -                        WorkGang* workers,
   7.372 +                        FlexibleWorkGang* workers,
   7.373                          RefToScanQueueSet *task_queues,
   7.374                          int n_workers) :
   7.375      _g1h(g1h),
   7.376 @@ -5124,11 +5245,13 @@
   7.377    // referents points to another object which is also referenced by an
   7.378    // object discovered by the STW ref processor.
   7.379  
   7.380 -  int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
   7.381 -                        workers()->total_workers() : 1);
   7.382 -
   7.383 -  set_par_threads(n_workers);
   7.384 -  G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues);
   7.385 +  int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
   7.386 +                        workers()->active_workers() : 1);
   7.387 +
   7.388 +  assert(active_workers == workers()->active_workers(),
   7.389 +         "Need to reset active_workers");
   7.390 +  set_par_threads(active_workers);
   7.391 +  G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues);
   7.392  
   7.393    if (G1CollectedHeap::use_parallel_gc_threads()) {
   7.394      workers()->run_task(&keep_cm_referents);
   7.395 @@ -5194,7 +5317,6 @@
   7.396                                        NULL);
   7.397    } else {
   7.398      // Parallel reference processing
   7.399 -    int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
   7.400      assert(rp->num_q() == active_workers, "sanity");
   7.401      assert(active_workers <= rp->max_num_q(), "sanity");
   7.402  
   7.403 @@ -5227,7 +5349,9 @@
   7.404    } else {
   7.405      // Parallel reference enqueuing
   7.406  
   7.407 -    int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
   7.408 +    int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1);
   7.409 +    assert(active_workers == workers()->active_workers(),
   7.410 +           "Need to reset active_workers");
   7.411      assert(rp->num_q() == active_workers, "sanity");
   7.412      assert(active_workers <= rp->max_num_q(), "sanity");
   7.413  
   7.414 @@ -5254,9 +5378,24 @@
   7.415    concurrent_g1_refine()->set_use_cache(false);
   7.416    concurrent_g1_refine()->clear_hot_cache_claimed_index();
   7.417  
   7.418 -  int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1);
   7.419 -  set_par_threads(n_workers);
   7.420 -  G1ParTask g1_par_task(this, n_workers, _task_queues);
   7.421 +  int n_workers;
   7.422 +  if (G1CollectedHeap::use_parallel_gc_threads()) {
   7.423 +    n_workers =
   7.424 +      AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
   7.425 +                                     workers()->active_workers(),
   7.426 +                                     Threads::number_of_non_daemon_threads());
   7.427 +    assert(UseDynamicNumberOfGCThreads ||
   7.428 +           n_workers == workers()->total_workers(),
   7.429 +           "If not dynamic should be using all the  workers");
   7.430 +    set_par_threads(n_workers);
   7.431 +  } else {
   7.432 +    assert(n_par_threads() == 0,
   7.433 +           "Should be the original non-parallel value");
   7.434 +    n_workers = 1;
   7.435 +  }
   7.436 +  workers()->set_active_workers(n_workers);
   7.437 +
   7.438 +  G1ParTask g1_par_task(this, _task_queues);
   7.439  
   7.440    init_for_evac_failure(NULL);
   7.441  
   7.442 @@ -5269,6 +5408,10 @@
   7.443      // The individual threads will set their evac-failure closures.
   7.444      StrongRootsScope srs(this);
   7.445      if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr();
   7.446 +    // These tasks use ShareHeap::_process_strong_tasks
   7.447 +    assert(UseDynamicNumberOfGCThreads ||
   7.448 +           workers()->active_workers() == workers()->total_workers(),
   7.449 +           "If not dynamic should be using all the  workers");
   7.450      workers()->run_task(&g1_par_task);
   7.451    } else {
   7.452      StrongRootsScope srs(this);
   7.453 @@ -5277,6 +5420,7 @@
   7.454  
   7.455    double par_time = (os::elapsedTime() - start_par) * 1000.0;
   7.456    g1_policy()->record_par_time(par_time);
   7.457 +
   7.458    set_par_threads(0);
   7.459  
   7.460    // Process any discovered reference objects - we have
   7.461 @@ -5304,8 +5448,11 @@
   7.462  
   7.463    finalize_for_evac_failure();
   7.464  
   7.465 -  // Must do this before removing self-forwarding pointers, which clears
   7.466 -  // the per-region evac-failure flags.
   7.467 +  // Must do this before clearing the per-region evac-failure flags
   7.468 +  // (which is currently done when we free the collection set).
   7.469 +  // We also only do this if marking is actually in progress and so
   7.470 +  // have to do this before we set the mark_in_progress flag at the
   7.471 +  // end of an initial mark pause.
   7.472    concurrent_mark()->complete_marking_in_collection_set();
   7.473  
   7.474    if (evacuation_failed()) {
   7.475 @@ -5567,7 +5714,6 @@
   7.476  
   7.477    while (cur != NULL) {
   7.478      assert(!is_on_master_free_list(cur), "sanity");
   7.479 -
   7.480      if (non_young) {
   7.481        if (cur->is_young()) {
   7.482          double end_sec = os::elapsedTime();
   7.483 @@ -5578,12 +5724,14 @@
   7.484          non_young = false;
   7.485        }
   7.486      } else {
   7.487 -      double end_sec = os::elapsedTime();
   7.488 -      double elapsed_ms = (end_sec - start_sec) * 1000.0;
   7.489 -      young_time_ms += elapsed_ms;
   7.490 -
   7.491 -      start_sec = os::elapsedTime();
   7.492 -      non_young = true;
   7.493 +      if (!cur->is_young()) {
   7.494 +        double end_sec = os::elapsedTime();
   7.495 +        double elapsed_ms = (end_sec - start_sec) * 1000.0;
   7.496 +        young_time_ms += elapsed_ms;
   7.497 +
   7.498 +        start_sec = os::elapsedTime();
   7.499 +        non_young = true;
   7.500 +      }
   7.501      }
   7.502  
   7.503      rs_lengths += cur->rem_set()->occupied();
   7.504 @@ -5595,8 +5743,8 @@
   7.505  
   7.506      if (cur->is_young()) {
   7.507        int index = cur->young_index_in_cset();
   7.508 -      guarantee( index != -1, "invariant" );
   7.509 -      guarantee( (size_t)index < policy->young_cset_length(), "invariant" );
   7.510 +      assert(index != -1, "invariant");
   7.511 +      assert((size_t) index < policy->young_cset_region_length(), "invariant");
   7.512        size_t words_survived = _surviving_young_words[index];
   7.513        cur->record_surv_words_in_group(words_survived);
   7.514  
   7.515 @@ -5607,7 +5755,7 @@
   7.516        cur->set_next_young_region(NULL);
   7.517      } else {
   7.518        int index = cur->young_index_in_cset();
   7.519 -      guarantee( index == -1, "invariant" );
   7.520 +      assert(index == -1, "invariant");
   7.521      }
   7.522  
   7.523      assert( (cur->is_young() && cur->young_index_in_cset() > -1) ||
   7.524 @@ -5615,13 +5763,26 @@
   7.525              "invariant" );
   7.526  
   7.527      if (!cur->evacuation_failed()) {
   7.528 +      MemRegion used_mr = cur->used_region();
   7.529 +
   7.530        // And the region is empty.
   7.531 -      assert(!cur->is_empty(), "Should not have empty regions in a CS.");
   7.532 +      assert(!used_mr.is_empty(), "Should not have empty regions in a CS.");
   7.533 +
   7.534 +      // If marking is in progress then clear any objects marked in
   7.535 +      // the current region. Note mark_in_progress() returns false,
   7.536 +      // even during an initial mark pause, until the set_marking_started()
   7.537 +      // call which takes place later in the pause.
   7.538 +      if (mark_in_progress()) {
   7.539 +        assert(!g1_policy()->during_initial_mark_pause(), "sanity");
   7.540 +        _cm->nextMarkBitMap()->clearRange(used_mr);
   7.541 +      }
   7.542 +
   7.543        free_region(cur, &pre_used, &local_free_list, false /* par */);
   7.544      } else {
   7.545        cur->uninstall_surv_rate_group();
   7.546 -      if (cur->is_young())
   7.547 +      if (cur->is_young()) {
   7.548          cur->set_young_index_in_cset(-1);
   7.549 +      }
   7.550        cur->set_not_young();
   7.551        cur->set_evacuation_failed(false);
   7.552        // The region is now considered to be old.
   7.553 @@ -5635,10 +5796,12 @@
   7.554  
   7.555    double end_sec = os::elapsedTime();
   7.556    double elapsed_ms = (end_sec - start_sec) * 1000.0;
   7.557 -  if (non_young)
   7.558 +
   7.559 +  if (non_young) {
   7.560      non_young_time_ms += elapsed_ms;
   7.561 -  else
   7.562 +  } else {
   7.563      young_time_ms += elapsed_ms;
   7.564 +  }
   7.565  
   7.566    update_sets_after_freeing_regions(pre_used, &local_free_list,
   7.567                                      NULL /* old_proxy_set */,
   7.568 @@ -5722,7 +5885,6 @@
   7.569    assert(heap_lock_held_for_gc(),
   7.570                "the heap lock should already be held by or for this thread");
   7.571    _young_list->push_region(hr);
   7.572 -  g1_policy()->set_region_short_lived(hr);
   7.573  }
   7.574  
   7.575  class NoYoungRegionsClosure: public HeapRegionClosure {
   7.576 @@ -5880,7 +6042,6 @@
   7.577      HeapRegion* new_alloc_region = new_region(word_size,
   7.578                                                false /* do_expand */);
   7.579      if (new_alloc_region != NULL) {
   7.580 -      g1_policy()->update_region_num(true /* next_is_young */);
   7.581        set_region_short_lived_locked(new_alloc_region);
   7.582        _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full);
   7.583        return new_alloc_region;
   7.584 @@ -5908,6 +6069,21 @@
   7.585    return _g1h->new_mutator_alloc_region(word_size, force);
   7.586  }
   7.587  
   7.588 +void G1CollectedHeap::set_par_threads() {
   7.589 +  // Don't change the number of workers.  Use the value previously set
   7.590 +  // in the workgroup.
   7.591 +  int n_workers = workers()->active_workers();
   7.592 +    assert(UseDynamicNumberOfGCThreads ||
   7.593 +           n_workers == workers()->total_workers(),
   7.594 +      "Otherwise should be using the total number of workers");
   7.595 +  if (n_workers == 0) {
   7.596 +    assert(false, "Should have been set in prior evacuation pause.");
   7.597 +    n_workers = ParallelGCThreads;
   7.598 +    workers()->set_active_workers(n_workers);
   7.599 +  }
   7.600 +  set_par_threads(n_workers);
   7.601 +}
   7.602 +
   7.603  void MutatorAllocRegion::retire_region(HeapRegion* alloc_region,
   7.604                                         size_t allocated_bytes) {
   7.605    _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
     8.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Dec 01 13:42:41 2011 -0500
     8.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Dec 02 08:52:53 2011 -0500
     8.3 @@ -987,6 +987,16 @@
     8.4  
     8.5    void set_par_threads(int t) {
     8.6      SharedHeap::set_par_threads(t);
     8.7 +    // Done in SharedHeap but oddly there are
     8.8 +    // two _process_strong_tasks's in a G1CollectedHeap
     8.9 +    // so do it here too.
    8.10 +    _process_strong_tasks->set_n_threads(t);
    8.11 +  }
    8.12 +
    8.13 +  // Set _n_par_threads according to a policy TBD.
    8.14 +  void set_par_threads();
    8.15 +
    8.16 +  void set_n_termination(int t) {
    8.17      _process_strong_tasks->set_n_threads(t);
    8.18    }
    8.19  
    8.20 @@ -1276,6 +1286,7 @@
    8.21    // i.e., that a closure never attempt to abort a traversal.
    8.22    void heap_region_par_iterate_chunked(HeapRegionClosure* blk,
    8.23                                         int worker,
    8.24 +                                       int no_of_par_workers,
    8.25                                         jint claim_value);
    8.26  
    8.27    // It resets all the region claim values to the default.
    8.28 @@ -1283,8 +1294,17 @@
    8.29  
    8.30  #ifdef ASSERT
    8.31    bool check_heap_region_claim_values(jint claim_value);
    8.32 +
    8.33 +  // Same as the routine above but only checks regions in the
    8.34 +  // current collection set.
    8.35 +  bool check_cset_heap_region_claim_values(jint claim_value);
    8.36  #endif // ASSERT
    8.37  
    8.38 +  // Given the id of a worker, calculate a suitable
    8.39 +  // starting region for iterating over the current
    8.40 +  // collection set.
    8.41 +  HeapRegion* start_cset_region_for_worker(int worker_i);
    8.42 +
    8.43    // Iterate over the regions (if any) in the current collection set.
    8.44    void collection_set_iterate(HeapRegionClosure* blk);
    8.45  
    8.46 @@ -1610,16 +1630,12 @@
    8.47  public:
    8.48    void stop_conc_gc_threads();
    8.49  
    8.50 -  // <NEW PREDICTION>
    8.51 -
    8.52    double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
    8.53    void check_if_region_is_too_expensive(double predicted_time_ms);
    8.54    size_t pending_card_num();
    8.55    size_t max_pending_card_num();
    8.56    size_t cards_scanned();
    8.57  
    8.58 -  // </NEW PREDICTION>
    8.59 -
    8.60  protected:
    8.61    size_t _max_heap_capacity;
    8.62  };
     9.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Dec 01 13:42:41 2011 -0500
     9.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Dec 02 08:52:53 2011 -0500
     9.3 @@ -36,10 +36,6 @@
     9.4  #include "runtime/mutexLocker.hpp"
     9.5  #include "utilities/debug.hpp"
     9.6  
     9.7 -#define PREDICTIONS_VERBOSE 0
     9.8 -
     9.9 -// <NEW PREDICTION>
    9.10 -
    9.11  // Different defaults for different number of GC threads
    9.12  // They were chosen by running GCOld and SPECjbb on debris with different
    9.13  //   numbers of GC threads and choosing them based on the results
    9.14 @@ -80,8 +76,6 @@
    9.15    1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30
    9.16  };
    9.17  
    9.18 -// </NEW PREDICTION>
    9.19 -
    9.20  // Help class for avoiding interleaved logging
    9.21  class LineBuffer: public StackObj {
    9.22  
    9.23 @@ -137,10 +131,6 @@
    9.24    _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
    9.25                          ? ParallelGCThreads : 1),
    9.26  
    9.27 -  _n_pauses(0),
    9.28 -  _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.29 -  _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.30 -  _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.31    _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.32    _all_pause_times_ms(new NumberSeq()),
    9.33    _stop_world_start(0.0),
    9.34 @@ -148,11 +138,10 @@
    9.35    _all_yield_times_ms(new NumberSeq()),
    9.36    _using_new_ratio_calculations(false),
    9.37  
    9.38 -  _all_mod_union_times_ms(new NumberSeq()),
    9.39 -
    9.40    _summary(new Summary()),
    9.41  
    9.42    _cur_clear_ct_time_ms(0.0),
    9.43 +  _mark_closure_time_ms(0.0),
    9.44  
    9.45    _cur_ref_proc_time_ms(0.0),
    9.46    _cur_ref_enq_time_ms(0.0),
    9.47 @@ -165,11 +154,6 @@
    9.48    _num_cc_clears(0L),
    9.49  #endif
    9.50  
    9.51 -  _region_num_young(0),
    9.52 -  _region_num_tenured(0),
    9.53 -  _prev_region_num_young(0),
    9.54 -  _prev_region_num_tenured(0),
    9.55 -
    9.56    _aux_num(10),
    9.57    _all_aux_times_ms(new NumberSeq[_aux_num]),
    9.58    _cur_aux_start_times_ms(new double[_aux_num]),
    9.59 @@ -179,8 +163,6 @@
    9.60    _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.61    _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.62  
    9.63 -  // <NEW PREDICTION>
    9.64 -
    9.65    _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
    9.66    _prev_collection_pause_end_ms(0.0),
    9.67    _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
    9.68 @@ -199,13 +181,10 @@
    9.69                                           new TruncatedSeq(TruncatedSeqLength)),
    9.70  
    9.71    _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
    9.72 -  _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)),
    9.73    _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)),
    9.74  
    9.75    _pause_time_target_ms((double) MaxGCPauseMillis),
    9.76  
    9.77 -  // </NEW PREDICTION>
    9.78 -
    9.79    _full_young_gcs(true),
    9.80    _full_young_pause_num(0),
    9.81    _partial_young_pause_num(0),
    9.82 @@ -221,16 +200,10 @@
    9.83  
    9.84     _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.85  
    9.86 -  _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.87 -  _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)),
    9.88 -
    9.89    _recent_avg_pause_time_ratio(0.0),
    9.90  
    9.91    _all_full_gc_times_ms(new NumberSeq()),
    9.92  
    9.93 -  // G1PausesBtwnConcMark defaults to -1
    9.94 -  // so the hack is to do the cast  QQQ FIXME
    9.95 -  _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark),
    9.96    _initiate_conc_mark_if_possible(false),
    9.97    _during_initial_mark_pause(false),
    9.98    _should_revert_to_full_young_gcs(false),
    9.99 @@ -242,22 +215,21 @@
   9.100  
   9.101    _prev_collection_pause_used_at_end_bytes(0),
   9.102  
   9.103 +  _eden_cset_region_length(0),
   9.104 +  _survivor_cset_region_length(0),
   9.105 +  _old_cset_region_length(0),
   9.106 +
   9.107    _collection_set(NULL),
   9.108 -  _collection_set_size(0),
   9.109    _collection_set_bytes_used_before(0),
   9.110  
   9.111    // Incremental CSet attributes
   9.112    _inc_cset_build_state(Inactive),
   9.113    _inc_cset_head(NULL),
   9.114    _inc_cset_tail(NULL),
   9.115 -  _inc_cset_size(0),
   9.116 -  _inc_cset_young_index(0),
   9.117    _inc_cset_bytes_used_before(0),
   9.118    _inc_cset_max_finger(NULL),
   9.119 -  _inc_cset_recorded_young_bytes(0),
   9.120    _inc_cset_recorded_rs_lengths(0),
   9.121    _inc_cset_predicted_elapsed_time_ms(0.0),
   9.122 -  _inc_cset_predicted_bytes_to_copy(0),
   9.123  
   9.124  #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
   9.125  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
   9.126 @@ -325,8 +297,6 @@
   9.127    // start conservatively
   9.128    _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis;
   9.129  
   9.130 -  // <NEW PREDICTION>
   9.131 -
   9.132    int index;
   9.133    if (ParallelGCThreads == 0)
   9.134      index = 0;
   9.135 @@ -348,8 +318,6 @@
   9.136    _non_young_other_cost_per_region_ms_seq->add(
   9.137                             non_young_other_cost_per_region_ms_defaults[index]);
   9.138  
   9.139 -  // </NEW PREDICTION>
   9.140 -
   9.141    // Below, we might need to calculate the pause time target based on
   9.142    // the pause interval. When we do so we are going to give G1 maximum
   9.143    // flexibility and allow it to do pauses when it needs to. So, we'll
   9.144 @@ -908,9 +876,6 @@
   9.145  
   9.146    record_survivor_regions(0, NULL, NULL);
   9.147  
   9.148 -  _prev_region_num_young   = _region_num_young;
   9.149 -  _prev_region_num_tenured = _region_num_tenured;
   9.150 -
   9.151    _free_regions_at_end_of_collection = _g1->free_regions();
   9.152    // Reset survivors SurvRateGroup.
   9.153    _survivor_surv_rate_group->reset();
   9.154 @@ -982,10 +947,9 @@
   9.155      _cur_aux_times_set[i] = false;
   9.156    }
   9.157  
   9.158 -  // These are initialized to zero here and they are set during
   9.159 +  // This is initialized to zero here and is set during
   9.160    // the evacuation pause if marking is in progress.
   9.161    _cur_satb_drain_time_ms = 0.0;
   9.162 -  _last_satb_drain_processed_buffers = 0;
   9.163  
   9.164    _last_young_gc_full = false;
   9.165  
   9.166 @@ -996,10 +960,6 @@
   9.167    assert( verify_young_ages(), "region age verification" );
   9.168  }
   9.169  
   9.170 -void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) {
   9.171 -  _mark_closure_time_ms = mark_closure_time_ms;
   9.172 -}
   9.173 -
   9.174  void G1CollectorPolicy::record_concurrent_mark_init_end(double
   9.175                                                     mark_init_elapsed_time_ms) {
   9.176    _during_marking = true;
   9.177 @@ -1060,7 +1020,7 @@
   9.178    double total = 0.0;
   9.179    LineBuffer buf(level);
   9.180    buf.append("[%s (ms):", str);
   9.181 -  for (uint i = 0; i < ParallelGCThreads; ++i) {
   9.182 +  for (uint i = 0; i < no_of_gc_threads(); ++i) {
   9.183      double val = data[i];
   9.184      if (val < min)
   9.185        min = val;
   9.186 @@ -1070,7 +1030,7 @@
   9.187      buf.append("  %3.1lf", val);
   9.188    }
   9.189    buf.append_and_print_cr("");
   9.190 -  double avg = total / (double) ParallelGCThreads;
   9.191 +  double avg = total / (double) no_of_gc_threads();
   9.192    buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]",
   9.193      avg, min, max, max - min);
   9.194  }
   9.195 @@ -1082,7 +1042,7 @@
   9.196    double total = 0.0;
   9.197    LineBuffer buf(level);
   9.198    buf.append("[%s :", str);
   9.199 -  for (uint i = 0; i < ParallelGCThreads; ++i) {
   9.200 +  for (uint i = 0; i < no_of_gc_threads(); ++i) {
   9.201      double val = data[i];
   9.202      if (val < min)
   9.203        min = val;
   9.204 @@ -1092,7 +1052,7 @@
   9.205      buf.append(" %d", (int) val);
   9.206    }
   9.207    buf.append_and_print_cr("");
   9.208 -  double avg = total / (double) ParallelGCThreads;
   9.209 +  double avg = total / (double) no_of_gc_threads();
   9.210    buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]",
   9.211      (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min);
   9.212  }
   9.213 @@ -1112,10 +1072,10 @@
   9.214  double G1CollectorPolicy::avg_value(double* data) {
   9.215    if (G1CollectedHeap::use_parallel_gc_threads()) {
   9.216      double ret = 0.0;
   9.217 -    for (uint i = 0; i < ParallelGCThreads; ++i) {
   9.218 +    for (uint i = 0; i < no_of_gc_threads(); ++i) {
   9.219        ret += data[i];
   9.220      }
   9.221 -    return ret / (double) ParallelGCThreads;
   9.222 +    return ret / (double) no_of_gc_threads();
   9.223    } else {
   9.224      return data[0];
   9.225    }
   9.226 @@ -1124,7 +1084,7 @@
   9.227  double G1CollectorPolicy::max_value(double* data) {
   9.228    if (G1CollectedHeap::use_parallel_gc_threads()) {
   9.229      double ret = data[0];
   9.230 -    for (uint i = 1; i < ParallelGCThreads; ++i) {
   9.231 +    for (uint i = 1; i < no_of_gc_threads(); ++i) {
   9.232        if (data[i] > ret) {
   9.233          ret = data[i];
   9.234        }
   9.235 @@ -1138,7 +1098,7 @@
   9.236  double G1CollectorPolicy::sum_of_values(double* data) {
   9.237    if (G1CollectedHeap::use_parallel_gc_threads()) {
   9.238      double sum = 0.0;
   9.239 -    for (uint i = 0; i < ParallelGCThreads; i++) {
   9.240 +    for (uint i = 0; i < no_of_gc_threads(); i++) {
   9.241        sum += data[i];
   9.242      }
   9.243      return sum;
   9.244 @@ -1151,7 +1111,7 @@
   9.245    double ret = data1[0] + data2[0];
   9.246  
   9.247    if (G1CollectedHeap::use_parallel_gc_threads()) {
   9.248 -    for (uint i = 1; i < ParallelGCThreads; ++i) {
   9.249 +    for (uint i = 1; i < no_of_gc_threads(); ++i) {
   9.250        double data = data1[i] + data2[i];
   9.251        if (data > ret) {
   9.252          ret = data;
   9.253 @@ -1164,16 +1124,19 @@
   9.254  // Anything below that is considered to be zero
   9.255  #define MIN_TIMER_GRANULARITY 0.0000001
   9.256  
   9.257 -void G1CollectorPolicy::record_collection_pause_end() {
   9.258 +void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) {
   9.259    double end_time_sec = os::elapsedTime();
   9.260    double elapsed_ms = _last_pause_time_ms;
   9.261    bool parallel = G1CollectedHeap::use_parallel_gc_threads();
   9.262 +  assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
   9.263 +         "otherwise, the subtraction below does not make sense");
   9.264    size_t rs_size =
   9.265 -    _cur_collection_pause_used_regions_at_start - collection_set_size();
   9.266 +            _cur_collection_pause_used_regions_at_start - cset_region_length();
   9.267    size_t cur_used_bytes = _g1->used();
   9.268    assert(cur_used_bytes == _g1->recalculate_used(), "It should!");
   9.269    bool last_pause_included_initial_mark = false;
   9.270    bool update_stats = !_g1->evacuation_failed();
   9.271 +  set_no_of_gc_threads(no_of_gc_threads);
   9.272  
   9.273  #ifndef PRODUCT
   9.274    if (G1YoungSurvRateVerbose) {
   9.275 @@ -1226,10 +1189,6 @@
   9.276    _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0,
   9.277                            end_time_sec, false);
   9.278  
   9.279 -  guarantee(_cur_collection_pause_used_regions_at_start >=
   9.280 -            collection_set_size(),
   9.281 -            "Negative RS size?");
   9.282 -
   9.283    // This assert is exempted when we're doing parallel collection pauses,
   9.284    // because the fragmentation caused by the parallel GC allocation buffers
   9.285    // can lead to more memory being used during collection than was used
   9.286 @@ -1253,8 +1212,6 @@
   9.287      (double)surviving_bytes/
   9.288      (double)_collection_set_bytes_used_before;
   9.289  
   9.290 -  _n_pauses++;
   9.291 -
   9.292    // These values are used to update the summary information that is
   9.293    // displayed when TraceGen0Time is enabled, and are output as part
   9.294    // of the PrintGCDetails output, in the non-parallel case.
   9.295 @@ -1291,14 +1248,15 @@
   9.296    // current value of "other time"
   9.297    other_time_ms -= _cur_clear_ct_time_ms;
   9.298  
   9.299 +  // Subtract the time spent completing marking in the collection
   9.300 +  // set. Note if marking is not in progress during the pause
   9.301 +  // the value of _mark_closure_time_ms will be zero.
   9.302 +  other_time_ms -= _mark_closure_time_ms;
   9.303 +
   9.304    // TraceGen0Time and TraceGen1Time summary info updating.
   9.305    _all_pause_times_ms->add(elapsed_ms);
   9.306  
   9.307    if (update_stats) {
   9.308 -    _recent_rs_scan_times_ms->add(scan_rs_time);
   9.309 -    _recent_pause_times_ms->add(elapsed_ms);
   9.310 -    _recent_rs_sizes->add(rs_size);
   9.311 -
   9.312      _summary->record_total_time_ms(elapsed_ms);
   9.313      _summary->record_other_time_ms(other_time_ms);
   9.314  
   9.315 @@ -1342,9 +1300,6 @@
   9.316             || surviving_bytes <= _collection_set_bytes_used_before,
   9.317             "Or else negative collection!");
   9.318  
   9.319 -    _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before);
   9.320 -    _recent_CS_bytes_surviving->add(surviving_bytes);
   9.321 -
   9.322      // this is where we update the allocation rate of the application
   9.323      double app_time_ms =
   9.324        (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms);
   9.325 @@ -1354,13 +1309,17 @@
   9.326        // We'll just set it to something (arbitrarily) small.
   9.327        app_time_ms = 1.0;
   9.328      }
   9.329 -    size_t regions_allocated =
   9.330 -      (_region_num_young - _prev_region_num_young) +
   9.331 -      (_region_num_tenured - _prev_region_num_tenured);
   9.332 +    // We maintain the invariant that all objects allocated by mutator
   9.333 +    // threads will be allocated out of eden regions. So, we can use
   9.334 +    // the eden region number allocated since the previous GC to
   9.335 +    // calculate the application's allocate rate. The only exception
   9.336 +    // to that is humongous objects that are allocated separately. But
   9.337 +    // given that humongous object allocations do not really affect
   9.338 +    // either the pause's duration nor when the next pause will take
   9.339 +    // place we can safely ignore them here.
   9.340 +    size_t regions_allocated = eden_cset_region_length();
   9.341      double alloc_rate_ms = (double) regions_allocated / app_time_ms;
   9.342      _alloc_rate_ms_seq->add(alloc_rate_ms);
   9.343 -    _prev_region_num_young   = _region_num_young;
   9.344 -    _prev_region_num_tenured = _region_num_tenured;
   9.345  
   9.346      double interval_ms =
   9.347        (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0;
   9.348 @@ -1398,33 +1357,6 @@
   9.349      }
   9.350    }
   9.351  
   9.352 -
   9.353 -  if (G1PolicyVerbose > 1) {
   9.354 -    gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   9.355 -  }
   9.356 -
   9.357 -  if (G1PolicyVerbose > 1) {
   9.358 -    gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
   9.359 -                           "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
   9.360 -                           "      |RS|: " SIZE_FORMAT,
   9.361 -                           elapsed_ms, recent_avg_time_for_pauses_ms(),
   9.362 -                           scan_rs_time, recent_avg_time_for_rs_scan_ms(),
   9.363 -                           rs_size);
   9.364 -
   9.365 -    gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
   9.366 -                           "       At end " SIZE_FORMAT "K\n"
   9.367 -                           "       garbage      : " SIZE_FORMAT "K"
   9.368 -                           "       of     " SIZE_FORMAT "K\n"
   9.369 -                           "       survival     : %6.2f%%  (%6.2f%% avg)",
   9.370 -                           _cur_collection_pause_used_at_start_bytes/K,
   9.371 -                           _g1->used()/K, freed_bytes/K,
   9.372 -                           _collection_set_bytes_used_before/K,
   9.373 -                           survival_fraction*100.0,
   9.374 -                           recent_avg_survival_fraction()*100.0);
   9.375 -    gclog_or_tty->print_cr("       Recent %% gc pause time: %6.2f",
   9.376 -                           recent_avg_pause_time_ratio() * 100.0);
   9.377 -  }
   9.378 -
   9.379    // PrintGCDetails output
   9.380    if (PrintGCDetails) {
   9.381      bool print_marking_info =
   9.382 @@ -1436,7 +1368,6 @@
   9.383  
   9.384      if (print_marking_info) {
   9.385        print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
   9.386 -      print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers);
   9.387      }
   9.388  
   9.389      if (parallel) {
   9.390 @@ -1478,6 +1409,9 @@
   9.391        print_stats(1, "Scan RS", scan_rs_time);
   9.392        print_stats(1, "Object Copying", obj_copy_time);
   9.393      }
   9.394 +    if (print_marking_info) {
   9.395 +      print_stats(1, "Complete CSet Marking", _mark_closure_time_ms);
   9.396 +    }
   9.397      print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
   9.398  #ifndef PRODUCT
   9.399      print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms);
   9.400 @@ -1489,9 +1423,14 @@
   9.401      }
   9.402  #endif
   9.403      print_stats(1, "Other", other_time_ms);
   9.404 -    print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms);
   9.405 +    print_stats(2, "Choose CSet",
   9.406 +                   (_recorded_young_cset_choice_time_ms +
   9.407 +                    _recorded_non_young_cset_choice_time_ms));
   9.408      print_stats(2, "Ref Proc", _cur_ref_proc_time_ms);
   9.409      print_stats(2, "Ref Enq", _cur_ref_enq_time_ms);
   9.410 +    print_stats(2, "Free CSet",
   9.411 +                   (_recorded_young_free_cset_time_ms +
   9.412 +                    _recorded_non_young_free_cset_time_ms));
   9.413  
   9.414      for (int i = 0; i < _aux_num; ++i) {
   9.415        if (_cur_aux_times_set[i]) {
   9.416 @@ -1576,8 +1515,6 @@
   9.417    _short_lived_surv_rate_group->start_adding_regions();
   9.418    // do that for any other surv rate groupsx
   9.419  
   9.420 -  // <NEW PREDICTION>
   9.421 -
   9.422    if (update_stats) {
   9.423      double pause_time_ms = elapsed_ms;
   9.424  
   9.425 @@ -1631,21 +1568,21 @@
   9.426         _mark_closure_time_ms + termination_time);
   9.427  
   9.428      double young_other_time_ms = 0.0;
   9.429 -    if (_recorded_young_regions > 0) {
   9.430 +    if (young_cset_region_length() > 0) {
   9.431        young_other_time_ms =
   9.432          _recorded_young_cset_choice_time_ms +
   9.433          _recorded_young_free_cset_time_ms;
   9.434        _young_other_cost_per_region_ms_seq->add(young_other_time_ms /
   9.435 -                                             (double) _recorded_young_regions);
   9.436 +                                          (double) young_cset_region_length());
   9.437      }
   9.438      double non_young_other_time_ms = 0.0;
   9.439 -    if (_recorded_non_young_regions > 0) {
   9.440 +    if (old_cset_region_length() > 0) {
   9.441        non_young_other_time_ms =
   9.442          _recorded_non_young_cset_choice_time_ms +
   9.443          _recorded_non_young_free_cset_time_ms;
   9.444  
   9.445        _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms /
   9.446 -                                         (double) _recorded_non_young_regions);
   9.447 +                                            (double) old_cset_region_length());
   9.448      }
   9.449  
   9.450      double constant_other_time_ms = all_other_time_ms -
   9.451 @@ -1659,7 +1596,6 @@
   9.452      }
   9.453  
   9.454      _pending_cards_seq->add((double) _pending_cards);
   9.455 -    _scanned_cards_seq->add((double) cards_scanned);
   9.456      _rs_lengths_seq->add((double) _max_rs_lengths);
   9.457  
   9.458      double expensive_region_limit_ms =
   9.459 @@ -1670,49 +1606,6 @@
   9.460        expensive_region_limit_ms = (double) MaxGCPauseMillis;
   9.461      }
   9.462      _expensive_region_limit_ms = expensive_region_limit_ms;
   9.463 -
   9.464 -    if (PREDICTIONS_VERBOSE) {
   9.465 -      gclog_or_tty->print_cr("");
   9.466 -      gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d "
   9.467 -                    "REGIONS %d %d %d "
   9.468 -                    "PENDING_CARDS %d %d "
   9.469 -                    "CARDS_SCANNED %d %d "
   9.470 -                    "RS_LENGTHS %d %d "
   9.471 -                    "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf "
   9.472 -                    "SURVIVAL_RATIO %1.6lf %1.6lf "
   9.473 -                    "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf "
   9.474 -                    "OTHER_YOUNG %1.6lf %1.6lf "
   9.475 -                    "OTHER_NON_YOUNG %1.6lf %1.6lf "
   9.476 -                    "VTIME_DIFF %1.6lf TERMINATION %1.6lf "
   9.477 -                    "ELAPSED %1.6lf %1.6lf ",
   9.478 -                    _cur_collection_start_sec,
   9.479 -                    (!_last_young_gc_full) ? 2 :
   9.480 -                    (last_pause_included_initial_mark) ? 1 : 0,
   9.481 -                    _recorded_region_num,
   9.482 -                    _recorded_young_regions,
   9.483 -                    _recorded_non_young_regions,
   9.484 -                    _predicted_pending_cards, _pending_cards,
   9.485 -                    _predicted_cards_scanned, cards_scanned,
   9.486 -                    _predicted_rs_lengths, _max_rs_lengths,
   9.487 -                    _predicted_rs_update_time_ms, update_rs_time,
   9.488 -                    _predicted_rs_scan_time_ms, scan_rs_time,
   9.489 -                    _predicted_survival_ratio, survival_ratio,
   9.490 -                    _predicted_object_copy_time_ms, obj_copy_time,
   9.491 -                    _predicted_constant_other_time_ms, constant_other_time_ms,
   9.492 -                    _predicted_young_other_time_ms, young_other_time_ms,
   9.493 -                    _predicted_non_young_other_time_ms,
   9.494 -                    non_young_other_time_ms,
   9.495 -                    _vtime_diff_ms, termination_time,
   9.496 -                    _predicted_pause_time_ms, elapsed_ms);
   9.497 -    }
   9.498 -
   9.499 -    if (G1PolicyVerbose > 0) {
   9.500 -      gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms",
   9.501 -                    _predicted_pause_time_ms,
   9.502 -                    (_within_target) ? "within" : "outside",
   9.503 -                    elapsed_ms);
   9.504 -    }
   9.505 -
   9.506    }
   9.507  
   9.508    _in_marking_window = new_in_marking_window;
   9.509 @@ -1723,7 +1616,6 @@
   9.510    // Note that _mmu_tracker->max_gc_time() returns the time in seconds.
   9.511    double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0;
   9.512    adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms);
   9.513 -  // </NEW PREDICTION>
   9.514  
   9.515    assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end.");
   9.516  }
   9.517 @@ -1768,8 +1660,6 @@
   9.518    }
   9.519  }
   9.520  
   9.521 -// <NEW PREDICTION>
   9.522 -
   9.523  void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time,
   9.524                                                       double update_rs_processed_buffers,
   9.525                                                       double goal_ms) {
   9.526 @@ -1905,98 +1795,17 @@
   9.527  }
   9.528  
   9.529  void
   9.530 -G1CollectorPolicy::start_recording_regions() {
   9.531 -  _recorded_rs_lengths            = 0;
   9.532 -  _recorded_young_regions         = 0;
   9.533 -  _recorded_non_young_regions     = 0;
   9.534 -
   9.535 -#if PREDICTIONS_VERBOSE
   9.536 -  _recorded_marked_bytes          = 0;
   9.537 -  _recorded_young_bytes           = 0;
   9.538 -  _predicted_bytes_to_copy        = 0;
   9.539 -  _predicted_rs_lengths           = 0;
   9.540 -  _predicted_cards_scanned        = 0;
   9.541 -#endif // PREDICTIONS_VERBOSE
   9.542 -}
   9.543 -
   9.544 -void
   9.545 -G1CollectorPolicy::record_cset_region_info(HeapRegion* hr, bool young) {
   9.546 -#if PREDICTIONS_VERBOSE
   9.547 -  if (!young) {
   9.548 -    _recorded_marked_bytes += hr->max_live_bytes();
   9.549 -  }
   9.550 -  _predicted_bytes_to_copy += predict_bytes_to_copy(hr);
   9.551 -#endif // PREDICTIONS_VERBOSE
   9.552 -
   9.553 -  size_t rs_length = hr->rem_set()->occupied();
   9.554 -  _recorded_rs_lengths += rs_length;
   9.555 -}
   9.556 -
   9.557 -void
   9.558 -G1CollectorPolicy::record_non_young_cset_region(HeapRegion* hr) {
   9.559 -  assert(!hr->is_young(), "should not call this");
   9.560 -  ++_recorded_non_young_regions;
   9.561 -  record_cset_region_info(hr, false);
   9.562 -}
   9.563 -
   9.564 -void
   9.565 -G1CollectorPolicy::set_recorded_young_regions(size_t n_regions) {
   9.566 -  _recorded_young_regions = n_regions;
   9.567 -}
   9.568 -
   9.569 -void G1CollectorPolicy::set_recorded_young_bytes(size_t bytes) {
   9.570 -#if PREDICTIONS_VERBOSE
   9.571 -  _recorded_young_bytes = bytes;
   9.572 -#endif // PREDICTIONS_VERBOSE
   9.573 +G1CollectorPolicy::init_cset_region_lengths(size_t eden_cset_region_length,
   9.574 +                                          size_t survivor_cset_region_length) {
   9.575 +  _eden_cset_region_length     = eden_cset_region_length;
   9.576 +  _survivor_cset_region_length = survivor_cset_region_length;
   9.577 +  _old_cset_region_length      = 0;
   9.578  }
   9.579  
   9.580  void G1CollectorPolicy::set_recorded_rs_lengths(size_t rs_lengths) {
   9.581    _recorded_rs_lengths = rs_lengths;
   9.582  }
   9.583  
   9.584 -void G1CollectorPolicy::set_predicted_bytes_to_copy(size_t bytes) {
   9.585 -  _predicted_bytes_to_copy = bytes;
   9.586 -}
   9.587 -
   9.588 -void
   9.589 -G1CollectorPolicy::end_recording_regions() {
   9.590 -  // The _predicted_pause_time_ms field is referenced in code
   9.591 -  // not under PREDICTIONS_VERBOSE. Let's initialize it.
   9.592 -  _predicted_pause_time_ms = -1.0;
   9.593 -
   9.594 -#if PREDICTIONS_VERBOSE
   9.595 -  _predicted_pending_cards = predict_pending_cards();
   9.596 -  _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff();
   9.597 -  if (full_young_gcs())
   9.598 -    _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths);
   9.599 -  else
   9.600 -    _predicted_cards_scanned +=
   9.601 -      predict_non_young_card_num(_predicted_rs_lengths);
   9.602 -  _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions;
   9.603 -
   9.604 -  _predicted_rs_update_time_ms =
   9.605 -    predict_rs_update_time_ms(_g1->pending_card_num());
   9.606 -  _predicted_rs_scan_time_ms =
   9.607 -    predict_rs_scan_time_ms(_predicted_cards_scanned);
   9.608 -  _predicted_object_copy_time_ms =
   9.609 -    predict_object_copy_time_ms(_predicted_bytes_to_copy);
   9.610 -  _predicted_constant_other_time_ms =
   9.611 -    predict_constant_other_time_ms();
   9.612 -  _predicted_young_other_time_ms =
   9.613 -    predict_young_other_time_ms(_recorded_young_regions);
   9.614 -  _predicted_non_young_other_time_ms =
   9.615 -    predict_non_young_other_time_ms(_recorded_non_young_regions);
   9.616 -
   9.617 -  _predicted_pause_time_ms =
   9.618 -    _predicted_rs_update_time_ms +
   9.619 -    _predicted_rs_scan_time_ms +
   9.620 -    _predicted_object_copy_time_ms +
   9.621 -    _predicted_constant_other_time_ms +
   9.622 -    _predicted_young_other_time_ms +
   9.623 -    _predicted_non_young_other_time_ms;
   9.624 -#endif // PREDICTIONS_VERBOSE
   9.625 -}
   9.626 -
   9.627  void G1CollectorPolicy::check_if_region_is_too_expensive(double
   9.628                                                             predicted_time_ms) {
   9.629    // I don't think we need to do this when in young GC mode since
   9.630 @@ -2013,9 +1822,6 @@
   9.631    }
   9.632  }
   9.633  
   9.634 -// </NEW PREDICTION>
   9.635 -
   9.636 -
   9.637  void G1CollectorPolicy::update_recent_gc_times(double end_time_sec,
   9.638                                                 double elapsed_ms) {
   9.639    _recent_gc_times_ms->add(elapsed_ms);
   9.640 @@ -2023,99 +1829,6 @@
   9.641    _prev_collection_pause_end_ms = end_time_sec * 1000.0;
   9.642  }
   9.643  
   9.644 -double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
   9.645 -  if (_recent_pause_times_ms->num() == 0) {
   9.646 -    return (double) MaxGCPauseMillis;
   9.647 -  }
   9.648 -  return _recent_pause_times_ms->avg();
   9.649 -}
   9.650 -
   9.651 -double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() {
   9.652 -  if (_recent_rs_scan_times_ms->num() == 0) {
   9.653 -    return (double)MaxGCPauseMillis/3.0;
   9.654 -  }
   9.655 -  return _recent_rs_scan_times_ms->avg();
   9.656 -}
   9.657 -
   9.658 -int G1CollectorPolicy::number_of_recent_gcs() {
   9.659 -  assert(_recent_rs_scan_times_ms->num() ==
   9.660 -         _recent_pause_times_ms->num(), "Sequence out of sync");
   9.661 -  assert(_recent_pause_times_ms->num() ==
   9.662 -         _recent_CS_bytes_used_before->num(), "Sequence out of sync");
   9.663 -  assert(_recent_CS_bytes_used_before->num() ==
   9.664 -         _recent_CS_bytes_surviving->num(), "Sequence out of sync");
   9.665 -
   9.666 -  return _recent_pause_times_ms->num();
   9.667 -}
   9.668 -
   9.669 -double G1CollectorPolicy::recent_avg_survival_fraction() {
   9.670 -  return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving,
   9.671 -                                           _recent_CS_bytes_used_before);
   9.672 -}
   9.673 -
   9.674 -double G1CollectorPolicy::last_survival_fraction() {
   9.675 -  return last_survival_fraction_work(_recent_CS_bytes_surviving,
   9.676 -                                     _recent_CS_bytes_used_before);
   9.677 -}
   9.678 -
   9.679 -double
   9.680 -G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving,
   9.681 -                                                     TruncatedSeq* before) {
   9.682 -  assert(surviving->num() == before->num(), "Sequence out of sync");
   9.683 -  if (before->sum() > 0.0) {
   9.684 -      double recent_survival_rate = surviving->sum() / before->sum();
   9.685 -      // We exempt parallel collection from this check because Alloc Buffer
   9.686 -      // fragmentation can produce negative collections.
   9.687 -      // Further, we're now always doing parallel collection.  But I'm still
   9.688 -      // leaving this here as a placeholder for a more precise assertion later.
   9.689 -      // (DLD, 10/05.)
   9.690 -      assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
   9.691 -             _g1->evacuation_failed() ||
   9.692 -             recent_survival_rate <= 1.0, "Or bad frac");
   9.693 -      return recent_survival_rate;
   9.694 -  } else {
   9.695 -    return 1.0; // Be conservative.
   9.696 -  }
   9.697 -}
   9.698 -
   9.699 -double
   9.700 -G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving,
   9.701 -                                               TruncatedSeq* before) {
   9.702 -  assert(surviving->num() == before->num(), "Sequence out of sync");
   9.703 -  if (surviving->num() > 0 && before->last() > 0.0) {
   9.704 -    double last_survival_rate = surviving->last() / before->last();
   9.705 -    // We exempt parallel collection from this check because Alloc Buffer
   9.706 -    // fragmentation can produce negative collections.
   9.707 -    // Further, we're now always doing parallel collection.  But I'm still
   9.708 -    // leaving this here as a placeholder for a more precise assertion later.
   9.709 -    // (DLD, 10/05.)
   9.710 -    assert((true || G1CollectedHeap::use_parallel_gc_threads()) ||
   9.711 -           last_survival_rate <= 1.0, "Or bad frac");
   9.712 -    return last_survival_rate;
   9.713 -  } else {
   9.714 -    return 1.0;
   9.715 -  }
   9.716 -}
   9.717 -
   9.718 -static const int survival_min_obs = 5;
   9.719 -static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 };
   9.720 -static const double min_survival_rate = 0.1;
   9.721 -
   9.722 -double
   9.723 -G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg,
   9.724 -                                                           double latest) {
   9.725 -  double res = avg;
   9.726 -  if (number_of_recent_gcs() < survival_min_obs) {
   9.727 -    res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]);
   9.728 -  }
   9.729 -  res = MAX2(res, latest);
   9.730 -  res = MAX2(res, min_survival_rate);
   9.731 -  // In the parallel case, LAB fragmentation can produce "negative
   9.732 -  // collections"; so can evac failure.  Cap at 1.0
   9.733 -  res = MIN2(res, 1.0);
   9.734 -  return res;
   9.735 -}
   9.736 -
   9.737  size_t G1CollectorPolicy::expansion_amount() {
   9.738    double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0;
   9.739    double threshold = _gc_overhead_perc;
   9.740 @@ -2331,15 +2044,6 @@
   9.741          print_summary_sd(0, buffer, &_all_aux_times_ms[i]);
   9.742        }
   9.743      }
   9.744 -
   9.745 -    size_t all_region_num = _region_num_young + _region_num_tenured;
   9.746 -    gclog_or_tty->print_cr("   New Regions %8d, Young %8d (%6.2lf%%), "
   9.747 -               "Tenured %8d (%6.2lf%%)",
   9.748 -               all_region_num,
   9.749 -               _region_num_young,
   9.750 -               (double) _region_num_young / (double) all_region_num * 100.0,
   9.751 -               _region_num_tenured,
   9.752 -               (double) _region_num_tenured / (double) all_region_num * 100.0);
   9.753    }
   9.754    if (TraceGen1Time) {
   9.755      if (_all_full_gc_times_ms->num() > 0) {
   9.756 @@ -2361,14 +2065,6 @@
   9.757  #endif // PRODUCT
   9.758  }
   9.759  
   9.760 -void G1CollectorPolicy::update_region_num(bool young) {
   9.761 -  if (young) {
   9.762 -    ++_region_num_young;
   9.763 -  } else {
   9.764 -    ++_region_num_tenured;
   9.765 -  }
   9.766 -}
   9.767 -
   9.768  #ifndef PRODUCT
   9.769  // for debugging, bit of a hack...
   9.770  static char*
   9.771 @@ -2617,6 +2313,7 @@
   9.772      ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i);
   9.773      // Back to zero for the claim value.
   9.774      _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i,
   9.775 +                                         _g1->workers()->active_workers(),
   9.776                                           HeapRegion::InitialClaimValue);
   9.777      jint regions_added = parKnownGarbageCl.marked_regions_added();
   9.778      _hrSorted->incNumMarkedHeapRegions(regions_added);
   9.779 @@ -2628,7 +2325,7 @@
   9.780  };
   9.781  
   9.782  void
   9.783 -G1CollectorPolicy::record_concurrent_mark_cleanup_end() {
   9.784 +G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) {
   9.785    double start_sec;
   9.786    if (G1PrintParCleanupStats) {
   9.787      start_sec = os::elapsedTime();
   9.788 @@ -2644,10 +2341,27 @@
   9.789  
   9.790    if (G1CollectedHeap::use_parallel_gc_threads()) {
   9.791      const size_t OverpartitionFactor = 4;
   9.792 -    const size_t MinWorkUnit = 8;
   9.793 -    const size_t WorkUnit =
   9.794 -      MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
   9.795 -           MinWorkUnit);
   9.796 +    size_t WorkUnit;
   9.797 +    // The use of MinChunkSize = 8 in the original code
   9.798 +    // causes some assertion failures when the total number of
   9.799 +    // region is less than 8.  The code here tries to fix that.
   9.800 +    // Should the original code also be fixed?
   9.801 +    if (no_of_gc_threads > 0) {
   9.802 +      const size_t MinWorkUnit =
   9.803 +        MAX2(_g1->n_regions() / no_of_gc_threads, (size_t) 1U);
   9.804 +      WorkUnit =
   9.805 +        MAX2(_g1->n_regions() / (no_of_gc_threads * OverpartitionFactor),
   9.806 +             MinWorkUnit);
   9.807 +    } else {
   9.808 +      assert(no_of_gc_threads > 0,
   9.809 +        "The active gc workers should be greater than 0");
   9.810 +      // In a product build do something reasonable to avoid a crash.
   9.811 +      const size_t MinWorkUnit =
   9.812 +        MAX2(_g1->n_regions() / ParallelGCThreads, (size_t) 1U);
   9.813 +      WorkUnit =
   9.814 +        MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor),
   9.815 +             MinWorkUnit);
   9.816 +    }
   9.817      _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(),
   9.818                                                               WorkUnit);
   9.819      ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser,
   9.820 @@ -2682,8 +2396,7 @@
   9.821  }
   9.822  
   9.823  // Add the heap region at the head of the non-incremental collection set
   9.824 -void G1CollectorPolicy::
   9.825 -add_to_collection_set(HeapRegion* hr) {
   9.826 +void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) {
   9.827    assert(_inc_cset_build_state == Active, "Precondition");
   9.828    assert(!hr->is_young(), "non-incremental add of young region");
   9.829  
   9.830 @@ -2694,9 +2407,11 @@
   9.831    hr->set_in_collection_set(true);
   9.832    hr->set_next_in_collection_set(_collection_set);
   9.833    _collection_set = hr;
   9.834 -  _collection_set_size++;
   9.835    _collection_set_bytes_used_before += hr->used();
   9.836    _g1->register_region_with_in_cset_fast_test(hr);
   9.837 +  size_t rs_length = hr->rem_set()->occupied();
   9.838 +  _recorded_rs_lengths += rs_length;
   9.839 +  _old_cset_region_length += 1;
   9.840  }
   9.841  
   9.842  // Initialize the per-collection-set information
   9.843 @@ -2705,16 +2420,11 @@
   9.844  
   9.845    _inc_cset_head = NULL;
   9.846    _inc_cset_tail = NULL;
   9.847 -  _inc_cset_size = 0;
   9.848    _inc_cset_bytes_used_before = 0;
   9.849  
   9.850 -  _inc_cset_young_index = 0;
   9.851 -
   9.852    _inc_cset_max_finger = 0;
   9.853 -  _inc_cset_recorded_young_bytes = 0;
   9.854    _inc_cset_recorded_rs_lengths = 0;
   9.855    _inc_cset_predicted_elapsed_time_ms = 0;
   9.856 -  _inc_cset_predicted_bytes_to_copy = 0;
   9.857    _inc_cset_build_state = Active;
   9.858  }
   9.859  
   9.860 @@ -2745,20 +2455,6 @@
   9.861    // rset sampling code
   9.862    hr->set_recorded_rs_length(rs_length);
   9.863    hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms);
   9.864 -
   9.865 -#if PREDICTIONS_VERBOSE
   9.866 -  size_t bytes_to_copy = predict_bytes_to_copy(hr);
   9.867 -  _inc_cset_predicted_bytes_to_copy += bytes_to_copy;
   9.868 -
   9.869 -  // Record the number of bytes used in this region
   9.870 -  _inc_cset_recorded_young_bytes += used_bytes;
   9.871 -
   9.872 -  // Cache the values we have added to the aggregated informtion
   9.873 -  // in the heap region in case we have to remove this region from
   9.874 -  // the incremental collection set, or it is updated by the
   9.875 -  // rset sampling code
   9.876 -  hr->set_predicted_bytes_to_copy(bytes_to_copy);
   9.877 -#endif // PREDICTIONS_VERBOSE
   9.878  }
   9.879  
   9.880  void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) {
   9.881 @@ -2784,17 +2480,6 @@
   9.882    // Clear the values cached in the heap region
   9.883    hr->set_recorded_rs_length(0);
   9.884    hr->set_predicted_elapsed_time_ms(0);
   9.885 -
   9.886 -#if PREDICTIONS_VERBOSE
   9.887 -  size_t old_predicted_bytes_to_copy = hr->predicted_bytes_to_copy();
   9.888 -  _inc_cset_predicted_bytes_to_copy -= old_predicted_bytes_to_copy;
   9.889 -
   9.890 -  // Subtract the number of bytes used in this region
   9.891 -  _inc_cset_recorded_young_bytes -= used_bytes;
   9.892 -
   9.893 -  // Clear the values cached in the heap region
   9.894 -  hr->set_predicted_bytes_to_copy(0);
   9.895 -#endif // PREDICTIONS_VERBOSE
   9.896  }
   9.897  
   9.898  void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) {
   9.899 @@ -2806,8 +2491,8 @@
   9.900  }
   9.901  
   9.902  void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) {
   9.903 -  assert( hr->is_young(), "invariant");
   9.904 -  assert( hr->young_index_in_cset() == -1, "invariant" );
   9.905 +  assert(hr->is_young(), "invariant");
   9.906 +  assert(hr->young_index_in_cset() > -1, "should have already been set");
   9.907    assert(_inc_cset_build_state == Active, "Precondition");
   9.908  
   9.909    // We need to clear and set the cached recorded/cached collection set
   9.910 @@ -2827,11 +2512,7 @@
   9.911    hr->set_in_collection_set(true);
   9.912    assert( hr->next_in_collection_set() == NULL, "invariant");
   9.913  
   9.914 -  _inc_cset_size++;
   9.915    _g1->register_region_with_in_cset_fast_test(hr);
   9.916 -
   9.917 -  hr->set_young_index_in_cset((int) _inc_cset_young_index);
   9.918 -  ++_inc_cset_young_index;
   9.919  }
   9.920  
   9.921  // Add the region at the RHS of the incremental cset
   9.922 @@ -2899,8 +2580,6 @@
   9.923  
   9.924    YoungList* young_list = _g1->young_list();
   9.925  
   9.926 -  start_recording_regions();
   9.927 -
   9.928    guarantee(target_pause_time_ms > 0.0,
   9.929              err_msg("target_pause_time_ms = %1.6lf should be positive",
   9.930                      target_pause_time_ms));
   9.931 @@ -2923,7 +2602,6 @@
   9.932    if (time_remaining_ms < threshold) {
   9.933      double prev_time_remaining_ms = time_remaining_ms;
   9.934      time_remaining_ms = 0.50 * target_pause_time_ms;
   9.935 -    _within_target = false;
   9.936      ergo_verbose3(ErgoCSetConstruction,
   9.937                    "adjust remaining time",
   9.938                    ergo_format_reason("remaining time lower than threshold")
   9.939 @@ -2931,8 +2609,6 @@
   9.940                    ergo_format_ms("threshold")
   9.941                    ergo_format_ms("adjusted remaining time"),
   9.942                    prev_time_remaining_ms, threshold, time_remaining_ms);
   9.943 -  } else {
   9.944 -    _within_target = true;
   9.945    }
   9.946  
   9.947    size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes;
   9.948 @@ -2941,8 +2617,6 @@
   9.949    double young_start_time_sec = os::elapsedTime();
   9.950  
   9.951    _collection_set_bytes_used_before = 0;
   9.952 -  _collection_set_size = 0;
   9.953 -  _young_cset_length  = 0;
   9.954    _last_young_gc_full = full_young_gcs() ? true : false;
   9.955  
   9.956    if (_last_young_gc_full) {
   9.957 @@ -2955,9 +2629,9 @@
   9.958    // pause are appended to the RHS of the young list, i.e.
   9.959    //   [Newly Young Regions ++ Survivors from last pause].
   9.960  
   9.961 -  size_t survivor_region_num = young_list->survivor_length();
   9.962 -  size_t eden_region_num = young_list->length() - survivor_region_num;
   9.963 -  size_t old_region_num = 0;
   9.964 +  size_t survivor_region_length = young_list->survivor_length();
   9.965 +  size_t eden_region_length = young_list->length() - survivor_region_length;
   9.966 +  init_cset_region_lengths(eden_region_length, survivor_region_length);
   9.967    hr = young_list->first_survivor_region();
   9.968    while (hr != NULL) {
   9.969      assert(hr->is_survivor(), "badly formed young list");
   9.970 @@ -2971,9 +2645,7 @@
   9.971    if (_g1->mark_in_progress())
   9.972      _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
   9.973  
   9.974 -  _young_cset_length = _inc_cset_young_index;
   9.975    _collection_set = _inc_cset_head;
   9.976 -  _collection_set_size = _inc_cset_size;
   9.977    _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   9.978    time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
   9.979    predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms;
   9.980 @@ -2983,19 +2655,12 @@
   9.981                  ergo_format_region("eden")
   9.982                  ergo_format_region("survivors")
   9.983                  ergo_format_ms("predicted young region time"),
   9.984 -                eden_region_num, survivor_region_num,
   9.985 +                eden_region_length, survivor_region_length,
   9.986                  _inc_cset_predicted_elapsed_time_ms);
   9.987  
   9.988    // The number of recorded young regions is the incremental
   9.989    // collection set's current size
   9.990 -  set_recorded_young_regions(_inc_cset_size);
   9.991    set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths);
   9.992 -  set_recorded_young_bytes(_inc_cset_recorded_young_bytes);
   9.993 -#if PREDICTIONS_VERBOSE
   9.994 -  set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy);
   9.995 -#endif // PREDICTIONS_VERBOSE
   9.996 -
   9.997 -  assert(_inc_cset_size == young_list->length(), "Invariant");
   9.998  
   9.999    double young_end_time_sec = os::elapsedTime();
  9.1000    _recorded_young_cset_choice_time_ms =
  9.1001 @@ -3009,9 +2674,16 @@
  9.1002      NumberSeq seq;
  9.1003      double avg_prediction = 100000000000000000.0; // something very large
  9.1004  
  9.1005 -    size_t prev_collection_set_size = _collection_set_size;
  9.1006      double prev_predicted_pause_time_ms = predicted_pause_time_ms;
  9.1007      do {
  9.1008 +      // Note that add_old_region_to_cset() increments the
  9.1009 +      // _old_cset_region_length field and cset_region_length() returns the
  9.1010 +      // sum of _eden_cset_region_length, _survivor_cset_region_length, and
  9.1011 +      // _old_cset_region_length. So, as old regions are added to the
  9.1012 +      // CSet, _old_cset_region_length will be incremented and
  9.1013 +      // cset_region_length(), which is used below, will always reflect
  9.1014 +      // the the total number of regions added up to this point to the CSet.
  9.1015 +
  9.1016        hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms,
  9.1017                                                        avg_prediction);
  9.1018        if (hr != NULL) {
  9.1019 @@ -3019,8 +2691,7 @@
  9.1020          double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
  9.1021          time_remaining_ms -= predicted_time_ms;
  9.1022          predicted_pause_time_ms += predicted_time_ms;
  9.1023 -        add_to_collection_set(hr);
  9.1024 -        record_non_young_cset_region(hr);
  9.1025 +        add_old_region_to_cset(hr);
  9.1026          seq.add(predicted_time_ms);
  9.1027          avg_prediction = seq.avg() + seq.sd();
  9.1028        }
  9.1029 @@ -3041,13 +2712,13 @@
  9.1030              should_continue = false;
  9.1031            }
  9.1032          } else {
  9.1033 -          if (_collection_set_size >= _young_list_fixed_length) {
  9.1034 +          if (cset_region_length() >= _young_list_fixed_length) {
  9.1035              ergo_verbose2(ErgoCSetConstruction,
  9.1036                            "stop adding old regions to CSet",
  9.1037                            ergo_format_reason("CSet length reached target")
  9.1038                            ergo_format_region("CSet")
  9.1039                            ergo_format_region("young target"),
  9.1040 -                          _collection_set_size, _young_list_fixed_length);
  9.1041 +                          cset_region_length(), _young_list_fixed_length);
  9.1042              should_continue = false;
  9.1043            }
  9.1044          }
  9.1045 @@ -3055,23 +2726,21 @@
  9.1046      } while (should_continue);
  9.1047  
  9.1048      if (!adaptive_young_list_length() &&
  9.1049 -        _collection_set_size < _young_list_fixed_length) {
  9.1050 +                             cset_region_length() < _young_list_fixed_length) {
  9.1051        ergo_verbose2(ErgoCSetConstruction,
  9.1052                      "request partially-young GCs end",
  9.1053                      ergo_format_reason("CSet length lower than target")
  9.1054                      ergo_format_region("CSet")
  9.1055                      ergo_format_region("young target"),
  9.1056 -                    _collection_set_size, _young_list_fixed_length);
  9.1057 +                    cset_region_length(), _young_list_fixed_length);
  9.1058        _should_revert_to_full_young_gcs  = true;
  9.1059      }
  9.1060  
  9.1061 -    old_region_num = _collection_set_size - prev_collection_set_size;
  9.1062 -
  9.1063      ergo_verbose2(ErgoCSetConstruction | ErgoHigh,
  9.1064                    "add old regions to CSet",
  9.1065                    ergo_format_region("old")
  9.1066                    ergo_format_ms("predicted old region time"),
  9.1067 -                  old_region_num,
  9.1068 +                  old_cset_region_length(),
  9.1069                    predicted_pause_time_ms - prev_predicted_pause_time_ms);
  9.1070    }
  9.1071  
  9.1072 @@ -3079,8 +2748,6 @@
  9.1073  
  9.1074    count_CS_bytes_used();
  9.1075  
  9.1076 -  end_recording_regions();
  9.1077 -
  9.1078    ergo_verbose5(ErgoCSetConstruction,
  9.1079                  "finish choosing CSet",
  9.1080                  ergo_format_region("eden")
  9.1081 @@ -3088,7 +2755,8 @@
  9.1082                  ergo_format_region("old")
  9.1083                  ergo_format_ms("predicted pause time")
  9.1084                  ergo_format_ms("target pause time"),
  9.1085 -                eden_region_num, survivor_region_num, old_region_num,
  9.1086 +                eden_region_length, survivor_region_length,
  9.1087 +                old_cset_region_length(),
  9.1088                  predicted_pause_time_ms, target_pause_time_ms);
  9.1089  
  9.1090    double non_young_end_time_sec = os::elapsedTime();
    10.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Dec 01 13:42:41 2011 -0500
    10.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Dec 02 08:52:53 2011 -0500
    10.3 @@ -85,13 +85,13 @@
    10.4  
    10.5  class G1CollectorPolicy: public CollectorPolicy {
    10.6  private:
    10.7 -  // The number of pauses during the execution.
    10.8 -  long _n_pauses;
    10.9 -
   10.10    // either equal to the number of parallel threads, if ParallelGCThreads
   10.11    // has been set, or 1 otherwise
   10.12    int _parallel_gc_threads;
   10.13  
   10.14 +  // The number of GC threads currently active.
   10.15 +  uintx _no_of_gc_threads;
   10.16 +
   10.17    enum SomePrivateConstants {
   10.18      NumPrevPausesForHeuristics = 10
   10.19    };
   10.20 @@ -127,18 +127,9 @@
   10.21    jlong  _num_cc_clears;                // number of times the card count cache has been cleared
   10.22  #endif
   10.23  
   10.24 -  // Statistics for recent GC pauses.  See below for how indexed.
   10.25 -  TruncatedSeq* _recent_rs_scan_times_ms;
   10.26 -
   10.27    // These exclude marking times.
   10.28 -  TruncatedSeq* _recent_pause_times_ms;
   10.29    TruncatedSeq* _recent_gc_times_ms;
   10.30  
   10.31 -  TruncatedSeq* _recent_CS_bytes_used_before;
   10.32 -  TruncatedSeq* _recent_CS_bytes_surviving;
   10.33 -
   10.34 -  TruncatedSeq* _recent_rs_sizes;
   10.35 -
   10.36    TruncatedSeq* _concurrent_mark_remark_times_ms;
   10.37    TruncatedSeq* _concurrent_mark_cleanup_times_ms;
   10.38  
   10.39 @@ -150,13 +141,6 @@
   10.40    NumberSeq* _all_stop_world_times_ms;
   10.41    NumberSeq* _all_yield_times_ms;
   10.42  
   10.43 -  size_t     _region_num_young;
   10.44 -  size_t     _region_num_tenured;
   10.45 -  size_t     _prev_region_num_young;
   10.46 -  size_t     _prev_region_num_tenured;
   10.47 -
   10.48 -  NumberSeq* _all_mod_union_times_ms;
   10.49 -
   10.50    int        _aux_num;
   10.51    NumberSeq* _all_aux_times_ms;
   10.52    double*    _cur_aux_start_times_ms;
   10.53 @@ -194,7 +178,6 @@
   10.54    // locker is active. This should be >= _young_list_target_length;
   10.55    size_t _young_list_max_length;
   10.56  
   10.57 -  size_t _young_cset_length;
   10.58    bool   _last_young_gc_full;
   10.59  
   10.60    unsigned              _full_young_pause_num;
   10.61 @@ -217,8 +200,6 @@
   10.62      return _during_marking;
   10.63    }
   10.64  
   10.65 -  // <NEW PREDICTION>
   10.66 -
   10.67  private:
   10.68    enum PredictionConstants {
   10.69      TruncatedSeqLength = 10
   10.70 @@ -240,47 +221,32 @@
   10.71    TruncatedSeq* _non_young_other_cost_per_region_ms_seq;
   10.72  
   10.73    TruncatedSeq* _pending_cards_seq;
   10.74 -  TruncatedSeq* _scanned_cards_seq;
   10.75    TruncatedSeq* _rs_lengths_seq;
   10.76  
   10.77    TruncatedSeq* _cost_per_byte_ms_during_cm_seq;
   10.78  
   10.79    TruncatedSeq* _young_gc_eff_seq;
   10.80  
   10.81 -  TruncatedSeq* _max_conc_overhead_seq;
   10.82 -
   10.83    bool   _using_new_ratio_calculations;
   10.84    size_t _min_desired_young_length; // as set on the command line or default calculations
   10.85    size_t _max_desired_young_length; // as set on the command line or default calculations
   10.86  
   10.87 -  size_t _recorded_young_regions;
   10.88 -  size_t _recorded_non_young_regions;
   10.89 -  size_t _recorded_region_num;
   10.90 +  size_t _eden_cset_region_length;
   10.91 +  size_t _survivor_cset_region_length;
   10.92 +  size_t _old_cset_region_length;
   10.93 +
   10.94 +  void init_cset_region_lengths(size_t eden_cset_region_length,
   10.95 +                                size_t survivor_cset_region_length);
   10.96 +
   10.97 +  size_t eden_cset_region_length()     { return _eden_cset_region_length;     }
   10.98 +  size_t survivor_cset_region_length() { return _survivor_cset_region_length; }
   10.99 +  size_t old_cset_region_length()      { return _old_cset_region_length;      }
  10.100  
  10.101    size_t _free_regions_at_end_of_collection;
  10.102  
  10.103    size_t _recorded_rs_lengths;
  10.104    size_t _max_rs_lengths;
  10.105  
  10.106 -  size_t _recorded_marked_bytes;
  10.107 -  size_t _recorded_young_bytes;
  10.108 -
  10.109 -  size_t _predicted_pending_cards;
  10.110 -  size_t _predicted_cards_scanned;
  10.111 -  size_t _predicted_rs_lengths;
  10.112 -  size_t _predicted_bytes_to_copy;
  10.113 -
  10.114 -  double _predicted_survival_ratio;
  10.115 -  double _predicted_rs_update_time_ms;
  10.116 -  double _predicted_rs_scan_time_ms;
  10.117 -  double _predicted_object_copy_time_ms;
  10.118 -  double _predicted_constant_other_time_ms;
  10.119 -  double _predicted_young_other_time_ms;
  10.120 -  double _predicted_non_young_other_time_ms;
  10.121 -  double _predicted_pause_time_ms;
  10.122 -
  10.123 -  double _vtime_diff_ms;
  10.124 -
  10.125    double _recorded_young_free_cset_time_ms;
  10.126    double _recorded_non_young_free_cset_time_ms;
  10.127  
  10.128 @@ -317,21 +283,28 @@
  10.129                                      double update_rs_processed_buffers,
  10.130                                      double goal_ms);
  10.131  
  10.132 +  uintx no_of_gc_threads() { return _no_of_gc_threads; }
  10.133 +  void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; }
  10.134 +
  10.135    double _pause_time_target_ms;
  10.136    double _recorded_young_cset_choice_time_ms;
  10.137    double _recorded_non_young_cset_choice_time_ms;
  10.138 -  bool   _within_target;
  10.139    size_t _pending_cards;
  10.140    size_t _max_pending_cards;
  10.141  
  10.142  public:
  10.143 +  // Accessors
  10.144  
  10.145 -  void set_region_short_lived(HeapRegion* hr) {
  10.146 +  void set_region_eden(HeapRegion* hr, int young_index_in_cset) {
  10.147 +    hr->set_young();
  10.148      hr->install_surv_rate_group(_short_lived_surv_rate_group);
  10.149 +    hr->set_young_index_in_cset(young_index_in_cset);
  10.150    }
  10.151  
  10.152 -  void set_region_survivors(HeapRegion* hr) {
  10.153 +  void set_region_survivor(HeapRegion* hr, int young_index_in_cset) {
  10.154 +    assert(hr->is_young() && hr->is_survivor(), "pre-condition");
  10.155      hr->install_surv_rate_group(_survivor_surv_rate_group);
  10.156 +    hr->set_young_index_in_cset(young_index_in_cset);
  10.157    }
  10.158  
  10.159  #ifndef PRODUCT
  10.160 @@ -343,10 +316,6 @@
  10.161                  seq->davg() * confidence_factor(seq->num()));
  10.162    }
  10.163  
  10.164 -  size_t young_cset_length() {
  10.165 -    return _young_cset_length;
  10.166 -  }
  10.167 -
  10.168    void record_max_rs_lengths(size_t rs_lengths) {
  10.169      _max_rs_lengths = rs_lengths;
  10.170    }
  10.171 @@ -465,20 +434,12 @@
  10.172    size_t predict_bytes_to_copy(HeapRegion* hr);
  10.173    double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
  10.174  
  10.175 -  void start_recording_regions();
  10.176 -  void record_cset_region_info(HeapRegion* hr, bool young);
  10.177 -  void record_non_young_cset_region(HeapRegion* hr);
  10.178 +  void set_recorded_rs_lengths(size_t rs_lengths);
  10.179  
  10.180 -  void set_recorded_young_regions(size_t n_regions);
  10.181 -  void set_recorded_young_bytes(size_t bytes);
  10.182 -  void set_recorded_rs_lengths(size_t rs_lengths);
  10.183 -  void set_predicted_bytes_to_copy(size_t bytes);
  10.184 -
  10.185 -  void end_recording_regions();
  10.186 -
  10.187 -  void record_vtime_diff_ms(double vtime_diff_ms) {
  10.188 -    _vtime_diff_ms = vtime_diff_ms;
  10.189 -  }
  10.190 +  size_t cset_region_length()       { return young_cset_region_length() +
  10.191 +                                             old_cset_region_length(); }
  10.192 +  size_t young_cset_region_length() { return eden_cset_region_length() +
  10.193 +                                             survivor_cset_region_length(); }
  10.194  
  10.195    void record_young_free_cset_time_ms(double time_ms) {
  10.196      _recorded_young_free_cset_time_ms = time_ms;
  10.197 @@ -494,8 +455,6 @@
  10.198  
  10.199    double predict_survivor_regions_evac_time();
  10.200  
  10.201 -  // </NEW PREDICTION>
  10.202 -
  10.203    void cset_regions_freed() {
  10.204      bool propagate = _last_young_gc_full && !_in_marking_window;
  10.205      _short_lived_surv_rate_group->all_surviving_words_recorded(propagate);
  10.206 @@ -575,8 +534,6 @@
  10.207    double sum_of_values (double* data);
  10.208    double max_sum (double* data1, double* data2);
  10.209  
  10.210 -  int _last_satb_drain_processed_buffers;
  10.211 -  int _last_update_rs_processed_buffers;
  10.212    double _last_pause_time_ms;
  10.213  
  10.214    size_t _bytes_in_collection_set_before_gc;
  10.215 @@ -596,10 +553,6 @@
  10.216    // set at the start of the pause.
  10.217    HeapRegion* _collection_set;
  10.218  
  10.219 -  // The number of regions in the collection set. Set from the incrementally
  10.220 -  // built collection set at the start of an evacuation pause.
  10.221 -  size_t _collection_set_size;
  10.222 -
  10.223    // The number of bytes in the collection set before the pause. Set from
  10.224    // the incrementally built collection set at the start of an evacuation
  10.225    // pause.
  10.226 @@ -622,16 +575,6 @@
  10.227    // The tail of the incrementally built collection set.
  10.228    HeapRegion* _inc_cset_tail;
  10.229  
  10.230 -  // The number of regions in the incrementally built collection set.
  10.231 -  // Used to set _collection_set_size at the start of an evacuation
  10.232 -  // pause.
  10.233 -  size_t _inc_cset_size;
  10.234 -
  10.235 -  // Used as the index in the surving young words structure
  10.236 -  // which tracks the amount of space, for each young region,
  10.237 -  // that survives the pause.
  10.238 -  size_t _inc_cset_young_index;
  10.239 -
  10.240    // The number of bytes in the incrementally built collection set.
  10.241    // Used to set _collection_set_bytes_used_before at the start of
  10.242    // an evacuation pause.
  10.243 @@ -640,11 +583,6 @@
  10.244    // Used to record the highest end of heap region in collection set
  10.245    HeapWord* _inc_cset_max_finger;
  10.246  
  10.247 -  // The number of recorded used bytes in the young regions
  10.248 -  // of the collection set. This is the sum of the used() bytes
  10.249 -  // of retired young regions in the collection set.
  10.250 -  size_t _inc_cset_recorded_young_bytes;
  10.251 -
  10.252    // The RSet lengths recorded for regions in the collection set
  10.253    // (updated by the periodic sampling of the regions in the
  10.254    // young list/collection set).
  10.255 @@ -655,68 +593,9 @@
  10.256    // regions in the young list/collection set).
  10.257    double _inc_cset_predicted_elapsed_time_ms;
  10.258  
  10.259 -  // The predicted bytes to copy for the regions in the collection
  10.260 -  // set (updated by the periodic sampling of the regions in the
  10.261 -  // young list/collection set).
  10.262 -  size_t _inc_cset_predicted_bytes_to_copy;
  10.263 -
  10.264    // Stash a pointer to the g1 heap.
  10.265    G1CollectedHeap* _g1;
  10.266  
  10.267 -  // The average time in ms per collection pause, averaged over recent pauses.
  10.268 -  double recent_avg_time_for_pauses_ms();
  10.269 -
  10.270 -  // The average time in ms for RS scanning, per pause, averaged
  10.271 -  // over recent pauses. (Note the RS scanning time for a pause
  10.272 -  // is itself an average of the RS scanning time for each worker
  10.273 -  // thread.)
  10.274 -  double recent_avg_time_for_rs_scan_ms();
  10.275 -
  10.276 -  // The number of "recent" GCs recorded in the number sequences
  10.277 -  int number_of_recent_gcs();
  10.278 -
  10.279 -  // The average survival ratio, computed by the total number of bytes
  10.280 -  // suriviving / total number of bytes before collection over the last
  10.281 -  // several recent pauses.
  10.282 -  double recent_avg_survival_fraction();
  10.283 -  // The survival fraction of the most recent pause; if there have been no
  10.284 -  // pauses, returns 1.0.
  10.285 -  double last_survival_fraction();
  10.286 -
  10.287 -  // Returns a "conservative" estimate of the recent survival rate, i.e.,
  10.288 -  // one that may be higher than "recent_avg_survival_fraction".
  10.289 -  // This is conservative in several ways:
  10.290 -  //   If there have been few pauses, it will assume a potential high
  10.291 -  //     variance, and err on the side of caution.
  10.292 -  //   It puts a lower bound (currently 0.1) on the value it will return.
  10.293 -  //   To try to detect phase changes, if the most recent pause ("latest") has a
  10.294 -  //     higher-than average ("avg") survival rate, it returns that rate.
  10.295 -  // "work" version is a utility function; young is restricted to young regions.
  10.296 -  double conservative_avg_survival_fraction_work(double avg,
  10.297 -                                                 double latest);
  10.298 -
  10.299 -  // The arguments are the two sequences that keep track of the number of bytes
  10.300 -  //   surviving and the total number of bytes before collection, resp.,
  10.301 -  //   over the last evereal recent pauses
  10.302 -  // Returns the survival rate for the category in the most recent pause.
  10.303 -  // If there have been no pauses, returns 1.0.
  10.304 -  double last_survival_fraction_work(TruncatedSeq* surviving,
  10.305 -                                     TruncatedSeq* before);
  10.306 -
  10.307 -  // The arguments are the two sequences that keep track of the number of bytes
  10.308 -  //   surviving and the total number of bytes before collection, resp.,
  10.309 -  //   over the last several recent pauses
  10.310 -  // Returns the average survival ration over the last several recent pauses
  10.311 -  // If there have been no pauses, return 1.0
  10.312 -  double recent_avg_survival_fraction_work(TruncatedSeq* surviving,
  10.313 -                                           TruncatedSeq* before);
  10.314 -
  10.315 -  double conservative_avg_survival_fraction() {
  10.316 -    double avg = recent_avg_survival_fraction();
  10.317 -    double latest = last_survival_fraction();
  10.318 -    return conservative_avg_survival_fraction_work(avg, latest);
  10.319 -  }
  10.320 -
  10.321    // The ratio of gc time to elapsed time, computed over recent pauses.
  10.322    double _recent_avg_pause_time_ratio;
  10.323  
  10.324 @@ -724,9 +603,6 @@
  10.325      return _recent_avg_pause_time_ratio;
  10.326    }
  10.327  
  10.328 -  // Number of pauses between concurrent marking.
  10.329 -  size_t _pauses_btwn_concurrent_mark;
  10.330 -
  10.331    // At the end of a pause we check the heap occupancy and we decide
  10.332    // whether we will start a marking cycle during the next pause. If
  10.333    // we decide that we want to do that, we will set this parameter to
  10.334 @@ -849,9 +725,6 @@
  10.335  
  10.336    GenRemSet::Name  rem_set_name()     { return GenRemSet::CardTable; }
  10.337  
  10.338 -  // The number of collection pauses so far.
  10.339 -  long n_pauses() const { return _n_pauses; }
  10.340 -
  10.341    // Update the heuristic info to record a collection pause of the given
  10.342    // start time, where the given number of bytes were used at the start.
  10.343    // This may involve changing the desired size of a collection set.
  10.344 @@ -864,19 +737,21 @@
  10.345    void record_concurrent_mark_init_end(double
  10.346                                             mark_init_elapsed_time_ms);
  10.347  
  10.348 -  void record_mark_closure_time(double mark_closure_time_ms);
  10.349 +  void record_mark_closure_time(double mark_closure_time_ms) {
  10.350 +    _mark_closure_time_ms = mark_closure_time_ms;
  10.351 +  }
  10.352  
  10.353    void record_concurrent_mark_remark_start();
  10.354    void record_concurrent_mark_remark_end();
  10.355  
  10.356    void record_concurrent_mark_cleanup_start();
  10.357 -  void record_concurrent_mark_cleanup_end();
  10.358 +  void record_concurrent_mark_cleanup_end(int no_of_gc_threads);
  10.359    void record_concurrent_mark_cleanup_completed();
  10.360  
  10.361    void record_concurrent_pause();
  10.362    void record_concurrent_pause_end();
  10.363  
  10.364 -  void record_collection_pause_end();
  10.365 +  void record_collection_pause_end(int no_of_gc_threads);
  10.366    void print_heap_transition();
  10.367  
  10.368    // Record the fact that a full collection occurred.
  10.369 @@ -900,15 +775,6 @@
  10.370      _cur_satb_drain_time_ms = ms;
  10.371    }
  10.372  
  10.373 -  void record_satb_drain_processed_buffers(int processed_buffers) {
  10.374 -    assert(_g1->mark_in_progress(), "shouldn't be here otherwise");
  10.375 -    _last_satb_drain_processed_buffers = processed_buffers;
  10.376 -  }
  10.377 -
  10.378 -  void record_mod_union_time(double ms) {
  10.379 -    _all_mod_union_times_ms->add(ms);
  10.380 -  }
  10.381 -
  10.382    void record_update_rs_time(int thread, double ms) {
  10.383      _par_last_update_rs_times_ms[thread] = ms;
  10.384    }
  10.385 @@ -1009,11 +875,8 @@
  10.386  
  10.387    void clear_collection_set() { _collection_set = NULL; }
  10.388  
  10.389 -  // The number of elements in the current collection set.
  10.390 -  size_t collection_set_size() { return _collection_set_size; }
  10.391 -
  10.392 -  // Add "hr" to the CS.
  10.393 -  void add_to_collection_set(HeapRegion* hr);
  10.394 +  // Add old region "hr" to the CSet.
  10.395 +  void add_old_region_to_cset(HeapRegion* hr);
  10.396  
  10.397    // Incremental CSet Support
  10.398  
  10.399 @@ -1023,9 +886,6 @@
  10.400    // The tail of the incrementally built collection set.
  10.401    HeapRegion* inc_set_tail() { return _inc_cset_tail; }
  10.402  
  10.403 -  // The number of elements in the incrementally built collection set.
  10.404 -  size_t inc_cset_size() { return _inc_cset_size; }
  10.405 -
  10.406    // Initialize incremental collection set info.
  10.407    void start_incremental_cset_building();
  10.408  
  10.409 @@ -1125,8 +985,6 @@
  10.410      return _young_list_max_length;
  10.411    }
  10.412  
  10.413 -  void update_region_num(bool young);
  10.414 -
  10.415    bool full_young_gcs() {
  10.416      return _full_young_gcs;
  10.417    }
    11.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Dec 01 13:42:41 2011 -0500
    11.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Fri Dec 02 08:52:53 2011 -0500
    11.3 @@ -209,29 +209,9 @@
    11.4    size_t cards_looked_up() { return _cards;}
    11.5  };
    11.6  
    11.7 -// We want the parallel threads to start their scanning at
    11.8 -// different collection set regions to avoid contention.
    11.9 -// If we have:
   11.10 -//          n collection set regions
   11.11 -//          p threads
   11.12 -// Then thread t will start at region t * floor (n/p)
   11.13 -
   11.14 -HeapRegion* G1RemSet::calculateStartRegion(int worker_i) {
   11.15 -  HeapRegion* result = _g1p->collection_set();
   11.16 -  if (ParallelGCThreads > 0) {
   11.17 -    size_t cs_size = _g1p->collection_set_size();
   11.18 -    int n_workers = _g1->workers()->total_workers();
   11.19 -    size_t cs_spans = cs_size / n_workers;
   11.20 -    size_t ind      = cs_spans * worker_i;
   11.21 -    for (size_t i = 0; i < ind; i++)
   11.22 -      result = result->next_in_collection_set();
   11.23 -  }
   11.24 -  return result;
   11.25 -}
   11.26 -
   11.27  void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) {
   11.28    double rs_time_start = os::elapsedTime();
   11.29 -  HeapRegion *startRegion = calculateStartRegion(worker_i);
   11.30 +  HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i);
   11.31  
   11.32    ScanRSClosure scanRScl(oc, worker_i);
   11.33  
   11.34 @@ -430,8 +410,10 @@
   11.35    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   11.36    dcqs.concatenate_logs();
   11.37  
   11.38 -  if (ParallelGCThreads > 0) {
   11.39 -    _seq_task->set_n_threads((int)n_workers());
   11.40 +  if (G1CollectedHeap::use_parallel_gc_threads()) {
   11.41 +    // Don't set the number of workers here.  It will be set
   11.42 +    // when the task is run
   11.43 +    // _seq_task->set_n_termination((int)n_workers());
   11.44    }
   11.45    guarantee( _cards_scanned == NULL, "invariant" );
   11.46    _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers());
   11.47 @@ -578,7 +560,10 @@
   11.48  void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm,
   11.49                                  int worker_num, int claim_val) {
   11.50    ScrubRSClosure scrub_cl(region_bm, card_bm);
   11.51 -  _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val);
   11.52 +  _g1->heap_region_par_iterate_chunked(&scrub_cl,
   11.53 +                                       worker_num,
   11.54 +                                       (int) n_workers(),
   11.55 +                                       claim_val);
   11.56  }
   11.57  
   11.58  
    12.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Dec 01 13:42:41 2011 -0500
    12.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Fri Dec 02 08:52:53 2011 -0500
    12.3 @@ -104,8 +104,6 @@
    12.4    void scanRS(OopsInHeapRegionClosure* oc, int worker_i);
    12.5    void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i);
    12.6  
    12.7 -  HeapRegion* calculateStartRegion(int i);
    12.8 -
    12.9    CardTableModRefBS* ct_bs() { return _ct_bs; }
   12.10    size_t cardsScanned() { return _total_cards_scanned; }
   12.11  
    13.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Dec 01 13:42:41 2011 -0500
    13.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Dec 02 08:52:53 2011 -0500
    13.3 @@ -39,10 +39,6 @@
    13.4    develop(intx, G1MarkingOverheadPercent, 0,                                \
    13.5            "Overhead of concurrent marking")                                 \
    13.6                                                                              \
    13.7 -                                                                            \
    13.8 -  develop(intx, G1PolicyVerbose, 0,                                         \
    13.9 -          "The verbosity level on G1 policy decisions")                     \
   13.10 -                                                                            \
   13.11    develop(intx, G1MarkingVerboseLevel, 0,                                   \
   13.12            "Level (0-4) of verboseness of the marking code")                 \
   13.13                                                                              \
   13.14 @@ -58,9 +54,6 @@
   13.15    develop(bool, G1TraceMarkStackOverflow, false,                            \
   13.16            "If true, extra debugging code for CM restart for ovflw.")        \
   13.17                                                                              \
   13.18 -  develop(intx, G1PausesBtwnConcMark, -1,                                   \
   13.19 -          "If positive, fixed number of pauses between conc markings")      \
   13.20 -                                                                            \
   13.21    diagnostic(bool, G1SummarizeConcMark, false,                              \
   13.22            "Summarize concurrent mark info")                                 \
   13.23                                                                              \
    14.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Dec 01 13:42:41 2011 -0500
    14.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Dec 02 08:52:53 2011 -0500
    14.3 @@ -367,12 +367,13 @@
    14.4    static void setup_heap_region_size(uintx min_heap_size);
    14.5  
    14.6    enum ClaimValues {
    14.7 -    InitialClaimValue     = 0,
    14.8 -    FinalCountClaimValue  = 1,
    14.9 -    NoteEndClaimValue     = 2,
   14.10 -    ScrubRemSetClaimValue = 3,
   14.11 -    ParVerifyClaimValue   = 4,
   14.12 -    RebuildRSClaimValue   = 5
   14.13 +    InitialClaimValue          = 0,
   14.14 +    FinalCountClaimValue       = 1,
   14.15 +    NoteEndClaimValue          = 2,
   14.16 +    ScrubRemSetClaimValue      = 3,
   14.17 +    ParVerifyClaimValue        = 4,
   14.18 +    RebuildRSClaimValue        = 5,
   14.19 +    CompleteMarkCSetClaimValue = 6
   14.20    };
   14.21  
   14.22    inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
   14.23 @@ -416,7 +417,7 @@
   14.24  
   14.25    void add_to_marked_bytes(size_t incr_bytes) {
   14.26      _next_marked_bytes = _next_marked_bytes + incr_bytes;
   14.27 -    guarantee( _next_marked_bytes <= used(), "invariant" );
   14.28 +    assert(_next_marked_bytes <= used(), "invariant" );
   14.29    }
   14.30  
   14.31    void zero_marked_bytes()      {
    15.1 --- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Thu Dec 01 13:42:41 2011 -0500
    15.2 +++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Fri Dec 02 08:52:53 2011 -0500
    15.3 @@ -33,6 +33,7 @@
    15.4  #include "runtime/java.hpp"
    15.5  #include "runtime/mutexLocker.hpp"
    15.6  #include "runtime/virtualspace.hpp"
    15.7 +#include "runtime/vmThread.hpp"
    15.8  
    15.9  void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
   15.10                                                               OopsInGenClosure* cl,
   15.11 @@ -42,6 +43,11 @@
   15.12    assert((n_threads == 1 && ParallelGCThreads == 0) ||
   15.13           n_threads <= (int)ParallelGCThreads,
   15.14           "# worker threads != # requested!");
   15.15 +  assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread");
   15.16 +  assert(UseDynamicNumberOfGCThreads ||
   15.17 +         !FLAG_IS_DEFAULT(ParallelGCThreads) ||
   15.18 +         n_threads == (int)ParallelGCThreads,
   15.19 +         "# worker threads != # requested!");
   15.20    // Make sure the LNC array is valid for the space.
   15.21    jbyte**   lowest_non_clean;
   15.22    uintptr_t lowest_non_clean_base_chunk_index;
   15.23 @@ -52,6 +58,8 @@
   15.24  
   15.25    int n_strides = n_threads * ParGCStridesPerThread;
   15.26    SequentialSubTasksDone* pst = sp->par_seq_tasks();
   15.27 +  // Sets the condition for completion of the subtask (how many threads
   15.28 +  // need to finish in order to be done).
   15.29    pst->set_n_threads(n_threads);
   15.30    pst->set_n_tasks(n_strides);
   15.31  
    16.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Dec 01 13:42:41 2011 -0500
    16.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Dec 02 08:52:53 2011 -0500
    16.3 @@ -305,7 +305,7 @@
    16.4  
    16.5    inline ParScanThreadState& thread_state(int i);
    16.6  
    16.7 -  void reset(bool promotion_failed);
    16.8 +  void reset(int active_workers, bool promotion_failed);
    16.9    void flush();
   16.10  
   16.11    #if TASKQUEUE_STATS
   16.12 @@ -322,6 +322,9 @@
   16.13    ParallelTaskTerminator& _term;
   16.14    ParNewGeneration&       _gen;
   16.15    Generation&             _next_gen;
   16.16 + public:
   16.17 +  bool is_valid(int id) const { return id < length(); }
   16.18 +  ParallelTaskTerminator* terminator() { return &_term; }
   16.19  };
   16.20  
   16.21  
   16.22 @@ -351,9 +354,9 @@
   16.23  }
   16.24  
   16.25  
   16.26 -void ParScanThreadStateSet::reset(bool promotion_failed)
   16.27 +void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed)
   16.28  {
   16.29 -  _term.reset_for_reuse();
   16.30 +  _term.reset_for_reuse(active_threads);
   16.31    if (promotion_failed) {
   16.32      for (int i = 0; i < length(); ++i) {
   16.33        thread_state(i).print_and_clear_promotion_failure_size();
   16.34 @@ -569,6 +572,24 @@
   16.35      _state_set(state_set)
   16.36    {}
   16.37  
   16.38 +// Reset the terminator for the given number of
   16.39 +// active threads.
   16.40 +void ParNewGenTask::set_for_termination(int active_workers) {
   16.41 +  _state_set->reset(active_workers, _gen->promotion_failed());
   16.42 +  // Should the heap be passed in?  There's only 1 for now so
   16.43 +  // grab it instead.
   16.44 +  GenCollectedHeap* gch = GenCollectedHeap::heap();
   16.45 +  gch->set_n_termination(active_workers);
   16.46 +}
   16.47 +
   16.48 +// The "i" passed to this method is the part of the work for
   16.49 +// this thread.  It is not the worker ID.  The "i" is derived
   16.50 +// from _started_workers which is incremented in internal_note_start()
   16.51 +// called in GangWorker loop() and which is called under the
   16.52 +// which is  called under the protection of the gang monitor and is
   16.53 +// called after a task is started.  So "i" is based on
   16.54 +// first-come-first-served.
   16.55 +
   16.56  void ParNewGenTask::work(int i) {
   16.57    GenCollectedHeap* gch = GenCollectedHeap::heap();
   16.58    // Since this is being done in a separate thread, need new resource
   16.59 @@ -581,6 +602,8 @@
   16.60    Generation* old_gen = gch->next_gen(_gen);
   16.61  
   16.62    ParScanThreadState& par_scan_state = _state_set->thread_state(i);
   16.63 +  assert(_state_set->is_valid(i), "Should not have been called");
   16.64 +
   16.65    par_scan_state.set_young_old_boundary(_young_old_boundary);
   16.66  
   16.67    par_scan_state.start_strong_roots();
   16.68 @@ -733,7 +756,9 @@
   16.69  
   16.70  private:
   16.71    virtual void work(int i);
   16.72 -
   16.73 +  virtual void set_for_termination(int active_workers) {
   16.74 +    _state_set.terminator()->reset_for_reuse(active_workers);
   16.75 +  }
   16.76  private:
   16.77    ParNewGeneration&      _gen;
   16.78    ProcessTask&           _task;
   16.79 @@ -789,18 +814,20 @@
   16.80    GenCollectedHeap* gch = GenCollectedHeap::heap();
   16.81    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
   16.82           "not a generational heap");
   16.83 -  WorkGang* workers = gch->workers();
   16.84 +  FlexibleWorkGang* workers = gch->workers();
   16.85    assert(workers != NULL, "Need parallel worker threads.");
   16.86 +  _state_set.reset(workers->active_workers(), _generation.promotion_failed());
   16.87    ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
   16.88                                   _generation.reserved().end(), _state_set);
   16.89    workers->run_task(&rp_task);
   16.90 -  _state_set.reset(_generation.promotion_failed());
   16.91 +  _state_set.reset(0 /* bad value in debug if not reset */,
   16.92 +                   _generation.promotion_failed());
   16.93  }
   16.94  
   16.95  void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
   16.96  {
   16.97    GenCollectedHeap* gch = GenCollectedHeap::heap();
   16.98 -  WorkGang* workers = gch->workers();
   16.99 +  FlexibleWorkGang* workers = gch->workers();
  16.100    assert(workers != NULL, "Need parallel worker threads.");
  16.101    ParNewRefEnqueueTaskProxy enq_task(task);
  16.102    workers->run_task(&enq_task);
  16.103 @@ -856,7 +883,13 @@
  16.104    assert(gch->kind() == CollectedHeap::GenCollectedHeap,
  16.105      "not a CMS generational heap");
  16.106    AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy();
  16.107 -  WorkGang* workers = gch->workers();
  16.108 +  FlexibleWorkGang* workers = gch->workers();
  16.109 +  assert(workers != NULL, "Need workgang for parallel work");
  16.110 +  int active_workers =
  16.111 +      AdaptiveSizePolicy::calc_active_workers(workers->total_workers(),
  16.112 +                                   workers->active_workers(),
  16.113 +                                   Threads::number_of_non_daemon_threads());
  16.114 +  workers->set_active_workers(active_workers);
  16.115    _next_gen = gch->next_gen(this);
  16.116    assert(_next_gen != NULL,
  16.117      "This must be the youngest gen, and not the only gen");
  16.118 @@ -894,13 +927,19 @@
  16.119  
  16.120    gch->save_marks();
  16.121    assert(workers != NULL, "Need parallel worker threads.");
  16.122 -  ParallelTaskTerminator _term(workers->total_workers(), task_queues());
  16.123 -  ParScanThreadStateSet thread_state_set(workers->total_workers(),
  16.124 +  int n_workers = active_workers;
  16.125 +
  16.126 +  // Set the correct parallelism (number of queues) in the reference processor
  16.127 +  ref_processor()->set_active_mt_degree(n_workers);
  16.128 +
  16.129 +  // Always set the terminator for the active number of workers
  16.130 +  // because only those workers go through the termination protocol.
  16.131 +  ParallelTaskTerminator _term(n_workers, task_queues());
  16.132 +  ParScanThreadStateSet thread_state_set(workers->active_workers(),
  16.133                                           *to(), *this, *_next_gen, *task_queues(),
  16.134                                           _overflow_stacks, desired_plab_sz(), _term);
  16.135  
  16.136    ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set);
  16.137 -  int n_workers = workers->total_workers();
  16.138    gch->set_par_threads(n_workers);
  16.139    gch->rem_set()->prepare_for_younger_refs_iterate(true);
  16.140    // It turns out that even when we're using 1 thread, doing the work in a
  16.141 @@ -914,7 +953,8 @@
  16.142      GenCollectedHeap::StrongRootsScope srs(gch);
  16.143      tsk.work(0);
  16.144    }
  16.145 -  thread_state_set.reset(promotion_failed());
  16.146 +  thread_state_set.reset(0 /* Bad value in debug if not reset */,
  16.147 +                         promotion_failed());
  16.148  
  16.149    // Process (weak) reference objects found during scavenge.
  16.150    ReferenceProcessor* rp = ref_processor();
  16.151 @@ -927,6 +967,8 @@
  16.152    EvacuateFollowersClosureGeneral evacuate_followers(gch, _level,
  16.153      &scan_without_gc_barrier, &scan_with_gc_barrier);
  16.154    rp->setup_policy(clear_all_soft_refs);
  16.155 +  // Can  the mt_degree be set later (at run_task() time would be best)?
  16.156 +  rp->set_active_mt_degree(active_workers);
  16.157    if (rp->processing_is_mt()) {
  16.158      ParNewRefProcTaskExecutor task_executor(*this, thread_state_set);
  16.159      rp->process_discovered_references(&is_alive, &keep_alive,
    17.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Dec 01 13:42:41 2011 -0500
    17.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Dec 02 08:52:53 2011 -0500
    17.3 @@ -240,6 +240,10 @@
    17.4    HeapWord* young_old_boundary() { return _young_old_boundary; }
    17.5  
    17.6    void work(int i);
    17.7 +
    17.8 +  // Reset the terminator in ParScanThreadStateSet for
    17.9 +  // "active_workers" threads.
   17.10 +  virtual void set_for_termination(int active_workers);
   17.11  };
   17.12  
   17.13  class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
    18.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Thu Dec 01 13:42:41 2011 -0500
    18.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp	Fri Dec 02 08:52:53 2011 -0500
    18.3 @@ -223,7 +223,8 @@
    18.4                                                      MutableSpace* sp,
    18.5                                                      HeapWord* space_top,
    18.6                                                      PSPromotionManager* pm,
    18.7 -                                                    uint stripe_number) {
    18.8 +                                                    uint stripe_number,
    18.9 +                                                    uint stripe_total) {
   18.10    int ssize = 128; // Naked constant!  Work unit = 64k.
   18.11    int dirty_card_count = 0;
   18.12  
   18.13 @@ -231,7 +232,11 @@
   18.14    jbyte* start_card = byte_for(sp->bottom());
   18.15    jbyte* end_card   = byte_for(sp_top - 1) + 1;
   18.16    oop* last_scanned = NULL; // Prevent scanning objects more than once
   18.17 -  for (jbyte* slice = start_card; slice < end_card; slice += ssize*ParallelGCThreads) {
   18.18 +  // The width of the stripe ssize*stripe_total must be
   18.19 +  // consistent with the number of stripes so that the complete slice
   18.20 +  // is covered.
   18.21 +  size_t slice_width = ssize * stripe_total;
   18.22 +  for (jbyte* slice = start_card; slice < end_card; slice += slice_width) {
   18.23      jbyte* worker_start_card = slice + stripe_number * ssize;
   18.24      if (worker_start_card >= end_card)
   18.25        return; // We're done.
    19.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Thu Dec 01 13:42:41 2011 -0500
    19.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp	Fri Dec 02 08:52:53 2011 -0500
    19.3 @@ -69,7 +69,8 @@
    19.4                                    MutableSpace* sp,
    19.5                                    HeapWord* space_top,
    19.6                                    PSPromotionManager* pm,
    19.7 -                                  uint stripe_number);
    19.8 +                                  uint stripe_number,
    19.9 +                                  uint stripe_total);
   19.10  
   19.11    // Verification
   19.12    static void verify_all_young_refs_imprecise();
    20.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Thu Dec 01 13:42:41 2011 -0500
    20.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp	Fri Dec 02 08:52:53 2011 -0500
    20.3 @@ -25,6 +25,7 @@
    20.4  #include "precompiled.hpp"
    20.5  #include "gc_implementation/parallelScavenge/gcTaskManager.hpp"
    20.6  #include "gc_implementation/parallelScavenge/gcTaskThread.hpp"
    20.7 +#include "gc_implementation/shared/adaptiveSizePolicy.hpp"
    20.8  #include "memory/allocation.hpp"
    20.9  #include "memory/allocation.inline.hpp"
   20.10  #include "runtime/mutex.hpp"
   20.11 @@ -181,6 +182,7 @@
   20.12    }
   20.13    set_insert_end(task);
   20.14    increment_length();
   20.15 +  verify_length();
   20.16    if (TraceGCTaskQueue) {
   20.17      print("after:");
   20.18    }
   20.19 @@ -192,7 +194,7 @@
   20.20      tty->print_cr("[" INTPTR_FORMAT "]"
   20.21                    " GCTaskQueue::enqueue(list: "
   20.22                    INTPTR_FORMAT ")",
   20.23 -                  this);
   20.24 +                  this, list);
   20.25      print("before:");
   20.26      list->print("list:");
   20.27    }
   20.28 @@ -211,14 +213,15 @@
   20.29      list->remove_end()->set_older(insert_end());
   20.30      insert_end()->set_newer(list->remove_end());
   20.31      set_insert_end(list->insert_end());
   20.32 +    set_length(length() + list_length);
   20.33      // empty the argument list.
   20.34    }
   20.35 -  set_length(length() + list_length);
   20.36    list->initialize();
   20.37    if (TraceGCTaskQueue) {
   20.38      print("after:");
   20.39      list->print("list:");
   20.40    }
   20.41 +  verify_length();
   20.42  }
   20.43  
   20.44  // Dequeue one task.
   20.45 @@ -288,6 +291,7 @@
   20.46    decrement_length();
   20.47    assert(result->newer() == NULL, "shouldn't be on queue");
   20.48    assert(result->older() == NULL, "shouldn't be on queue");
   20.49 +  verify_length();
   20.50    return result;
   20.51  }
   20.52  
   20.53 @@ -311,22 +315,40 @@
   20.54    result->set_newer(NULL);
   20.55    result->set_older(NULL);
   20.56    decrement_length();
   20.57 +  verify_length();
   20.58    return result;
   20.59  }
   20.60  
   20.61  NOT_PRODUCT(
   20.62 +// Count the elements in the queue and verify the length against
   20.63 +// that count.
   20.64 +void GCTaskQueue::verify_length() const {
   20.65 +  uint count = 0;
   20.66 +  for (GCTask* element = insert_end();
   20.67 +       element != NULL;
   20.68 +       element = element->older()) {
   20.69 +
   20.70 +    count++;
   20.71 +  }
   20.72 +  assert(count == length(), "Length does not match queue");
   20.73 +}
   20.74 +
   20.75  void GCTaskQueue::print(const char* message) const {
   20.76    tty->print_cr("[" INTPTR_FORMAT "] GCTaskQueue:"
   20.77                  "  insert_end: " INTPTR_FORMAT
   20.78                  "  remove_end: " INTPTR_FORMAT
   20.79 +                "  length:       %d"
   20.80                  "  %s",
   20.81 -                this, insert_end(), remove_end(), message);
   20.82 +                this, insert_end(), remove_end(), length(), message);
   20.83 +  uint count = 0;
   20.84    for (GCTask* element = insert_end();
   20.85         element != NULL;
   20.86         element = element->older()) {
   20.87      element->print("    ");
   20.88 +    count++;
   20.89      tty->cr();
   20.90    }
   20.91 +  tty->print("Total tasks: %d", count);
   20.92  }
   20.93  )
   20.94  
   20.95 @@ -351,12 +373,16 @@
   20.96  //
   20.97  GCTaskManager::GCTaskManager(uint workers) :
   20.98    _workers(workers),
   20.99 +  _active_workers(0),
  20.100 +  _idle_workers(0),
  20.101    _ndc(NULL) {
  20.102    initialize();
  20.103  }
  20.104  
  20.105  GCTaskManager::GCTaskManager(uint workers, NotifyDoneClosure* ndc) :
  20.106    _workers(workers),
  20.107 +  _active_workers(0),
  20.108 +  _idle_workers(0),
  20.109    _ndc(ndc) {
  20.110    initialize();
  20.111  }
  20.112 @@ -373,6 +399,7 @@
  20.113    GCTaskQueue* unsynchronized_queue = GCTaskQueue::create_on_c_heap();
  20.114    _queue = SynchronizedGCTaskQueue::create(unsynchronized_queue, lock());
  20.115    _noop_task = NoopGCTask::create_on_c_heap();
  20.116 +  _idle_inactive_task = WaitForBarrierGCTask::create_on_c_heap();
  20.117    _resource_flag = NEW_C_HEAP_ARRAY(bool, workers());
  20.118    {
  20.119      // Set up worker threads.
  20.120 @@ -418,6 +445,8 @@
  20.121    assert(queue()->is_empty(), "still have queued work");
  20.122    NoopGCTask::destroy(_noop_task);
  20.123    _noop_task = NULL;
  20.124 +  WaitForBarrierGCTask::destroy(_idle_inactive_task);
  20.125 +  _idle_inactive_task = NULL;
  20.126    if (_thread != NULL) {
  20.127      for (uint i = 0; i < workers(); i += 1) {
  20.128        GCTaskThread::destroy(thread(i));
  20.129 @@ -442,6 +471,86 @@
  20.130    }
  20.131  }
  20.132  
  20.133 +void GCTaskManager::set_active_gang() {
  20.134 +  _active_workers =
  20.135 +    AdaptiveSizePolicy::calc_active_workers(workers(),
  20.136 +                                 active_workers(),
  20.137 +                                 Threads::number_of_non_daemon_threads());
  20.138 +
  20.139 +  assert(!all_workers_active() || active_workers() == ParallelGCThreads,
  20.140 +         err_msg("all_workers_active() is  incorrect: "
  20.141 +                 "active %d  ParallelGCThreads %d", active_workers(),
  20.142 +                 ParallelGCThreads));
  20.143 +  if (TraceDynamicGCThreads) {
  20.144 +    gclog_or_tty->print_cr("GCTaskManager::set_active_gang(): "
  20.145 +                           "all_workers_active()  %d  workers %d  "
  20.146 +                           "active  %d  ParallelGCThreads %d ",
  20.147 +                           all_workers_active(), workers(),  active_workers(),
  20.148 +                           ParallelGCThreads);
  20.149 +  }
  20.150 +}
  20.151 +
  20.152 +// Create IdleGCTasks for inactive workers.
  20.153 +// Creates tasks in a ResourceArea and assumes
  20.154 +// an appropriate ResourceMark.
  20.155 +void GCTaskManager::task_idle_workers() {
  20.156 +  {
  20.157 +    int more_inactive_workers = 0;
  20.158 +    {
  20.159 +      // Stop any idle tasks from exiting their IdleGCTask's
  20.160 +      // and get the count for additional IdleGCTask's under
  20.161 +      // the GCTaskManager's monitor so that the "more_inactive_workers"
  20.162 +      // count is correct.
  20.163 +      MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
  20.164 +      _idle_inactive_task->set_should_wait(true);
  20.165 +      // active_workers are a number being requested.  idle_workers
  20.166 +      // are the number currently idle.  If all the workers are being
  20.167 +      // requested to be active but some are already idle, reduce
  20.168 +      // the number of active_workers to be consistent with the
  20.169 +      // number of idle_workers.  The idle_workers are stuck in
  20.170 +      // idle tasks and will no longer be release (since a new GC
  20.171 +      // is starting).  Try later to release enough idle_workers
  20.172 +      // to allow the desired number of active_workers.
  20.173 +      more_inactive_workers =
  20.174 +        workers() - active_workers() - idle_workers();
  20.175 +      if (more_inactive_workers < 0) {
  20.176 +        int reduced_active_workers = active_workers() + more_inactive_workers;
  20.177 +        set_active_workers(reduced_active_workers);
  20.178 +        more_inactive_workers = 0;
  20.179 +      }
  20.180 +      if (TraceDynamicGCThreads) {
  20.181 +        gclog_or_tty->print_cr("JT: %d  workers %d  active  %d  "
  20.182 +                                "idle %d  more %d",
  20.183 +                                Threads::number_of_non_daemon_threads(),
  20.184 +                                workers(),
  20.185 +                                active_workers(),
  20.186 +                                idle_workers(),
  20.187 +                                more_inactive_workers);
  20.188 +      }
  20.189 +    }
  20.190 +    GCTaskQueue* q = GCTaskQueue::create();
  20.191 +    for(uint i = 0; i < (uint) more_inactive_workers; i++) {
  20.192 +      q->enqueue(IdleGCTask::create_on_c_heap());
  20.193 +      increment_idle_workers();
  20.194 +    }
  20.195 +    assert(workers() == active_workers() + idle_workers(),
  20.196 +      "total workers should equal active + inactive");
  20.197 +    add_list(q);
  20.198 +    // GCTaskQueue* q was created in a ResourceArea so a
  20.199 +    // destroy() call is not needed.
  20.200 +  }
  20.201 +}
  20.202 +
  20.203 +void  GCTaskManager::release_idle_workers() {
  20.204 +  {
  20.205 +    MutexLockerEx ml(monitor(),
  20.206 +      Mutex::_no_safepoint_check_flag);
  20.207 +    _idle_inactive_task->set_should_wait(false);
  20.208 +    monitor()->notify_all();
  20.209 +  // Release monitor
  20.210 +  }
  20.211 +}
  20.212 +
  20.213  void GCTaskManager::print_task_time_stamps() {
  20.214    for(uint i=0; i<ParallelGCThreads; i++) {
  20.215      GCTaskThread* t = thread(i);
  20.216 @@ -510,6 +619,13 @@
  20.217    // Release monitor().
  20.218  }
  20.219  
  20.220 +// GC workers wait in get_task() for new work to be added
  20.221 +// to the GCTaskManager's queue.  When new work is added,
  20.222 +// a notify is sent to the waiting GC workers which then
  20.223 +// compete to get tasks.  If a GC worker wakes up and there
  20.224 +// is no work on the queue, it is given a noop_task to execute
  20.225 +// and then loops to find more work.
  20.226 +
  20.227  GCTask* GCTaskManager::get_task(uint which) {
  20.228    GCTask* result = NULL;
  20.229    // Grab the queue lock.
  20.230 @@ -558,8 +674,10 @@
  20.231                    which, result, GCTask::Kind::to_string(result->kind()));
  20.232      tty->print_cr("     %s", result->name());
  20.233    }
  20.234 -  increment_busy_workers();
  20.235 -  increment_delivered_tasks();
  20.236 +  if (!result->is_idle_task()) {
  20.237 +    increment_busy_workers();
  20.238 +    increment_delivered_tasks();
  20.239 +  }
  20.240    return result;
  20.241    // Release monitor().
  20.242  }
  20.243 @@ -622,6 +740,7 @@
  20.244  
  20.245  uint GCTaskManager::decrement_busy_workers() {
  20.246    assert(queue()->own_lock(), "don't own the lock");
  20.247 +  assert(_busy_workers > 0, "About to make a mistake");
  20.248    _busy_workers -= 1;
  20.249    return _busy_workers;
  20.250  }
  20.251 @@ -643,11 +762,28 @@
  20.252    set_resource_flag(which, false);
  20.253  }
  20.254  
  20.255 +// "list" contains tasks that are ready to execute.  Those
  20.256 +// tasks are added to the GCTaskManager's queue of tasks and
  20.257 +// then the GC workers are notified that there is new work to
  20.258 +// do.
  20.259 +//
  20.260 +// Typically different types of tasks can be added to the "list".
  20.261 +// For example in PSScavenge OldToYoungRootsTask, SerialOldToYoungRootsTask,
  20.262 +// ScavengeRootsTask, and StealTask tasks are all added to the list
  20.263 +// and then the GC workers are notified of new work.  The tasks are
  20.264 +// handed out in the order in which they are added to the list
  20.265 +// (although execution is not necessarily in that order).  As long
  20.266 +// as any tasks are running the GCTaskManager will wait for execution
  20.267 +// to complete.  GC workers that execute a stealing task remain in
  20.268 +// the stealing task until all stealing tasks have completed.  The load
  20.269 +// balancing afforded by the stealing tasks work best if the stealing
  20.270 +// tasks are added last to the list.
  20.271 +
  20.272  void GCTaskManager::execute_and_wait(GCTaskQueue* list) {
  20.273    WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
  20.274    list->enqueue(fin);
  20.275    add_list(list);
  20.276 -  fin->wait_for();
  20.277 +  fin->wait_for(true /* reset */);
  20.278    // We have to release the barrier tasks!
  20.279    WaitForBarrierGCTask::destroy(fin);
  20.280  }
  20.281 @@ -692,6 +828,72 @@
  20.282  }
  20.283  
  20.284  //
  20.285 +// IdleGCTask
  20.286 +//
  20.287 +
  20.288 +IdleGCTask* IdleGCTask::create() {
  20.289 +  IdleGCTask* result = new IdleGCTask(false);
  20.290 +  return result;
  20.291 +}
  20.292 +
  20.293 +IdleGCTask* IdleGCTask::create_on_c_heap() {
  20.294 +  IdleGCTask* result = new(ResourceObj::C_HEAP) IdleGCTask(true);
  20.295 +  return result;
  20.296 +}
  20.297 +
  20.298 +void IdleGCTask::do_it(GCTaskManager* manager, uint which) {
  20.299 +  WaitForBarrierGCTask* wait_for_task = manager->idle_inactive_task();
  20.300 +  if (TraceGCTaskManager) {
  20.301 +    tty->print_cr("[" INTPTR_FORMAT "]"
  20.302 +                  " IdleGCTask:::do_it()"
  20.303 +      "  should_wait: %s",
  20.304 +      this, wait_for_task->should_wait() ? "true" : "false");
  20.305 +  }
  20.306 +  MutexLockerEx ml(manager->monitor(), Mutex::_no_safepoint_check_flag);
  20.307 +  if (TraceDynamicGCThreads) {
  20.308 +    gclog_or_tty->print_cr("--- idle %d", which);
  20.309 +  }
  20.310 +  // Increment has to be done when the idle tasks are created.
  20.311 +  // manager->increment_idle_workers();
  20.312 +  manager->monitor()->notify_all();
  20.313 +  while (wait_for_task->should_wait()) {
  20.314 +    if (TraceGCTaskManager) {
  20.315 +      tty->print_cr("[" INTPTR_FORMAT "]"
  20.316 +                    " IdleGCTask::do_it()"
  20.317 +        "  [" INTPTR_FORMAT "] (%s)->wait()",
  20.318 +        this, manager->monitor(), manager->monitor()->name());
  20.319 +    }
  20.320 +    manager->monitor()->wait(Mutex::_no_safepoint_check_flag, 0);
  20.321 +  }
  20.322 +  manager->decrement_idle_workers();
  20.323 +  if (TraceDynamicGCThreads) {
  20.324 +    gclog_or_tty->print_cr("--- release %d", which);
  20.325 +  }
  20.326 +  if (TraceGCTaskManager) {
  20.327 +    tty->print_cr("[" INTPTR_FORMAT "]"
  20.328 +                  " IdleGCTask::do_it() returns"
  20.329 +      "  should_wait: %s",
  20.330 +      this, wait_for_task->should_wait() ? "true" : "false");
  20.331 +  }
  20.332 +  // Release monitor().
  20.333 +}
  20.334 +
  20.335 +void IdleGCTask::destroy(IdleGCTask* that) {
  20.336 +  if (that != NULL) {
  20.337 +    that->destruct();
  20.338 +    if (that->is_c_heap_obj()) {
  20.339 +      FreeHeap(that);
  20.340 +    }
  20.341 +  }
  20.342 +}
  20.343 +
  20.344 +void IdleGCTask::destruct() {
  20.345 +  // This has to know it's superclass structure, just like the constructor.
  20.346 +  this->GCTask::destruct();
  20.347 +  // Nothing else to do.
  20.348 +}
  20.349 +
  20.350 +//
  20.351  // BarrierGCTask
  20.352  //
  20.353  
  20.354 @@ -768,7 +970,8 @@
  20.355  }
  20.356  
  20.357  WaitForBarrierGCTask* WaitForBarrierGCTask::create_on_c_heap() {
  20.358 -  WaitForBarrierGCTask* result = new WaitForBarrierGCTask(true);
  20.359 +  WaitForBarrierGCTask* result =
  20.360 +    new (ResourceObj::C_HEAP) WaitForBarrierGCTask(true);
  20.361    return result;
  20.362  }
  20.363  
  20.364 @@ -849,7 +1052,7 @@
  20.365    }
  20.366  }
  20.367  
  20.368 -void WaitForBarrierGCTask::wait_for() {
  20.369 +void WaitForBarrierGCTask::wait_for(bool reset) {
  20.370    if (TraceGCTaskManager) {
  20.371      tty->print_cr("[" INTPTR_FORMAT "]"
  20.372                    " WaitForBarrierGCTask::wait_for()"
  20.373 @@ -869,7 +1072,9 @@
  20.374        monitor()->wait(Mutex::_no_safepoint_check_flag, 0);
  20.375      }
  20.376      // Reset the flag in case someone reuses this task.
  20.377 -    set_should_wait(true);
  20.378 +    if (reset) {
  20.379 +      set_should_wait(true);
  20.380 +    }
  20.381      if (TraceGCTaskManager) {
  20.382        tty->print_cr("[" INTPTR_FORMAT "]"
  20.383                      " WaitForBarrierGCTask::wait_for() returns"
    21.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Thu Dec 01 13:42:41 2011 -0500
    21.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp	Fri Dec 02 08:52:53 2011 -0500
    21.3 @@ -45,6 +45,7 @@
    21.4  class ReleasingBarrierGCTask;
    21.5  class NotifyingBarrierGCTask;
    21.6  class WaitForBarrierGCTask;
    21.7 +class IdleGCTask;
    21.8  // A free list of Monitor*'s.
    21.9  class MonitorSupply;
   21.10  
   21.11 @@ -64,7 +65,8 @@
   21.12        unknown_task,
   21.13        ordinary_task,
   21.14        barrier_task,
   21.15 -      noop_task
   21.16 +      noop_task,
   21.17 +      idle_task
   21.18      };
   21.19      static const char* to_string(kind value);
   21.20    };
   21.21 @@ -108,6 +110,9 @@
   21.22    bool is_noop_task() const {
   21.23      return kind()==Kind::noop_task;
   21.24    }
   21.25 +  bool is_idle_task() const {
   21.26 +    return kind()==Kind::idle_task;
   21.27 +  }
   21.28    void print(const char* message) const PRODUCT_RETURN;
   21.29  protected:
   21.30    // Constructors: Only create subclasses.
   21.31 @@ -153,6 +158,7 @@
   21.32      assert(((insert_end() == NULL && remove_end() == NULL) ||
   21.33              (insert_end() != NULL && remove_end() != NULL)),
   21.34             "insert_end and remove_end don't match");
   21.35 +    assert((insert_end() != NULL) || (_length == 0), "Not empty");
   21.36      return insert_end() == NULL;
   21.37    }
   21.38    uint length() const {
   21.39 @@ -204,6 +210,8 @@
   21.40    GCTask* remove();                     // Remove from remove end.
   21.41    GCTask* remove(GCTask* task);         // Remove from the middle.
   21.42    void print(const char* message) const PRODUCT_RETURN;
   21.43 +  // Debug support
   21.44 +  void verify_length() const PRODUCT_RETURN;
   21.45  };
   21.46  
   21.47  // A GCTaskQueue that can be synchronized.
   21.48 @@ -285,12 +293,76 @@
   21.49    }
   21.50  };
   21.51  
   21.52 +// Dynamic number of GC threads
   21.53 +//
   21.54 +//  GC threads wait in get_task() for work (i.e., a task) to perform.
   21.55 +// When the number of GC threads was static, the number of tasks
   21.56 +// created to do a job was equal to or greater than the maximum
   21.57 +// number of GC threads (ParallelGCThreads).  The job might be divided
   21.58 +// into a number of tasks greater than the number of GC threads for
   21.59 +// load balancing (i.e., over partitioning).  The last task to be
   21.60 +// executed by a GC thread in a job is a work stealing task.  A
   21.61 +// GC  thread that gets a work stealing task continues to execute
   21.62 +// that task until the job is done.  In the static number of GC theads
   21.63 +// case, tasks are added to a queue (FIFO).  The work stealing tasks are
   21.64 +// the last to be added.  Once the tasks are added, the GC threads grab
   21.65 +// a task and go.  A single thread can do all the non-work stealing tasks
   21.66 +// and then execute a work stealing and wait for all the other GC threads
   21.67 +// to execute their work stealing task.
   21.68 +//  In the dynamic number of GC threads implementation, idle-tasks are
   21.69 +// created to occupy the non-participating or "inactive" threads.  An
   21.70 +// idle-task makes the GC thread wait on a barrier that is part of the
   21.71 +// GCTaskManager.  The GC threads that have been "idled" in a IdleGCTask
   21.72 +// are released once all the active GC threads have finished their work
   21.73 +// stealing tasks.  The GCTaskManager does not wait for all the "idled"
   21.74 +// GC threads to resume execution. When those GC threads do resume
   21.75 +// execution in the course of the thread scheduling, they call get_tasks()
   21.76 +// as all the other GC threads do.  Because all the "idled" threads are
   21.77 +// not required to execute in order to finish a job, it is possible for
   21.78 +// a GC thread to still be "idled" when the next job is started.  Such
   21.79 +// a thread stays "idled" for the next job.  This can result in a new
   21.80 +// job not having all the expected active workers.  For example if on
   21.81 +// job requests 4 active workers out of a total of 10 workers so the
   21.82 +// remaining 6 are "idled", if the next job requests 6 active workers
   21.83 +// but all 6 of the "idled" workers are still idle, then the next job
   21.84 +// will only get 4 active workers.
   21.85 +//  The implementation for the parallel old compaction phase has an
   21.86 +// added complication.  In the static case parold partitions the chunks
   21.87 +// ready to be filled into stacks, one for each GC thread.  A GC thread
   21.88 +// executing a draining task (drains the stack of ready chunks)
   21.89 +// claims a stack according to it's id (the unique ordinal value assigned
   21.90 +// to each GC thread).  In the dynamic case not all GC threads will
   21.91 +// actively participate so stacks with ready to fill chunks can only be
   21.92 +// given to the active threads.  An initial implementation chose stacks
   21.93 +// number 1-n to get the ready chunks and required that GC threads
   21.94 +// 1-n be the active workers.  This was undesirable because it required
   21.95 +// certain threads to participate.  In the final implementation a
   21.96 +// list of stacks equal in number to the active workers are filled
   21.97 +// with ready chunks.  GC threads that participate get a stack from
   21.98 +// the task (DrainStacksCompactionTask), empty the stack, and then add it to a
   21.99 +// recycling list at the end of the task.  If the same GC thread gets
  21.100 +// a second task, it gets a second stack to drain and returns it.  The
  21.101 +// stacks are added to a recycling list so that later stealing tasks
  21.102 +// for this tasks can get a stack from the recycling list.  Stealing tasks
  21.103 +// use the stacks in its work in a way similar to the draining tasks.
  21.104 +// A thread is not guaranteed to get anything but a stealing task and
  21.105 +// a thread that only gets a stealing task has to get a stack. A failed
  21.106 +// implementation tried to have the GC threads keep the stack they used
  21.107 +// during a draining task for later use in the stealing task but that didn't
  21.108 +// work because as noted a thread is not guaranteed to get a draining task.
  21.109 +//
  21.110 +// For PSScavenge and ParCompactionManager the GC threads are
  21.111 +// held in the GCTaskThread** _thread array in GCTaskManager.
  21.112 +
  21.113 +
  21.114  class GCTaskManager : public CHeapObj {
  21.115   friend class ParCompactionManager;
  21.116   friend class PSParallelCompact;
  21.117   friend class PSScavenge;
  21.118   friend class PSRefProcTaskExecutor;
  21.119   friend class RefProcTaskExecutor;
  21.120 + friend class GCTaskThread;
  21.121 + friend class IdleGCTask;
  21.122  private:
  21.123    // Instance state.
  21.124    NotifyDoneClosure*        _ndc;               // Notify on completion.
  21.125 @@ -298,6 +370,7 @@
  21.126    Monitor*                  _monitor;           // Notification of changes.
  21.127    SynchronizedGCTaskQueue*  _queue;             // Queue of tasks.
  21.128    GCTaskThread**            _thread;            // Array of worker threads.
  21.129 +  uint                      _active_workers;    // Number of active workers.
  21.130    uint                      _busy_workers;      // Number of busy workers.
  21.131    uint                      _blocking_worker;   // The worker that's blocking.
  21.132    bool*                     _resource_flag;     // Array of flag per threads.
  21.133 @@ -307,6 +380,8 @@
  21.134    uint                      _emptied_queue;     // Times we emptied the queue.
  21.135    NoopGCTask*               _noop_task;         // The NoopGCTask instance.
  21.136    uint                      _noop_tasks;        // Count of noop tasks.
  21.137 +  WaitForBarrierGCTask*     _idle_inactive_task;// Task for inactive workers
  21.138 +  volatile uint             _idle_workers;      // Number of idled workers
  21.139  public:
  21.140    // Factory create and destroy methods.
  21.141    static GCTaskManager* create(uint workers) {
  21.142 @@ -324,6 +399,9 @@
  21.143    uint busy_workers() const {
  21.144      return _busy_workers;
  21.145    }
  21.146 +  volatile uint idle_workers() const {
  21.147 +    return _idle_workers;
  21.148 +  }
  21.149    //     Pun between Monitor* and Mutex*
  21.150    Monitor* monitor() const {
  21.151      return _monitor;
  21.152 @@ -331,6 +409,9 @@
  21.153    Monitor * lock() const {
  21.154      return _monitor;
  21.155    }
  21.156 +  WaitForBarrierGCTask* idle_inactive_task() {
  21.157 +    return _idle_inactive_task;
  21.158 +  }
  21.159    // Methods.
  21.160    //     Add the argument task to be run.
  21.161    void add_task(GCTask* task);
  21.162 @@ -350,6 +431,10 @@
  21.163    bool should_release_resources(uint which); // Predicate.
  21.164    //     Note the release of resources by the argument worker.
  21.165    void note_release(uint which);
  21.166 +  //     Create IdleGCTasks for inactive workers and start workers
  21.167 +  void task_idle_workers();
  21.168 +  //     Release the workers in IdleGCTasks
  21.169 +  void release_idle_workers();
  21.170    // Constants.
  21.171    //     A sentinel worker identifier.
  21.172    static uint sentinel_worker() {
  21.173 @@ -375,6 +460,15 @@
  21.174    uint workers() const {
  21.175      return _workers;
  21.176    }
  21.177 +  void set_active_workers(uint v) {
  21.178 +    assert(v <= _workers, "Trying to set more workers active than there are");
  21.179 +    _active_workers = MIN2(v, _workers);
  21.180 +    assert(v != 0, "Trying to set active workers to 0");
  21.181 +    _active_workers = MAX2(1U, _active_workers);
  21.182 +  }
  21.183 +  // Sets the number of threads that will be used in a collection
  21.184 +  void set_active_gang();
  21.185 +
  21.186    NotifyDoneClosure* notify_done_closure() const {
  21.187      return _ndc;
  21.188    }
  21.189 @@ -457,8 +551,21 @@
  21.190    void reset_noop_tasks() {
  21.191      _noop_tasks = 0;
  21.192    }
  21.193 +  void increment_idle_workers() {
  21.194 +    _idle_workers++;
  21.195 +  }
  21.196 +  void decrement_idle_workers() {
  21.197 +    _idle_workers--;
  21.198 +  }
  21.199    // Other methods.
  21.200    void initialize();
  21.201 +
  21.202 + public:
  21.203 +  // Return true if all workers are currently active.
  21.204 +  bool all_workers_active() { return workers() == active_workers(); }
  21.205 +  uint active_workers() const {
  21.206 +    return _active_workers;
  21.207 +  }
  21.208  };
  21.209  
  21.210  //
  21.211 @@ -475,6 +582,8 @@
  21.212    static NoopGCTask* create();
  21.213    static NoopGCTask* create_on_c_heap();
  21.214    static void destroy(NoopGCTask* that);
  21.215 +
  21.216 +  virtual char* name() { return (char *)"noop task"; }
  21.217    // Methods from GCTask.
  21.218    void do_it(GCTaskManager* manager, uint which) {
  21.219      // Nothing to do.
  21.220 @@ -518,6 +627,8 @@
  21.221    }
  21.222    // Destructor-like method.
  21.223    void destruct();
  21.224 +
  21.225 +  virtual char* name() { return (char *)"barrier task"; }
  21.226    // Methods.
  21.227    //     Wait for this to be the only task running.
  21.228    void do_it_internal(GCTaskManager* manager, uint which);
  21.229 @@ -586,11 +697,13 @@
  21.230  // the BarrierGCTask is done.
  21.231  // This may cover many of the uses of NotifyingBarrierGCTasks.
  21.232  class WaitForBarrierGCTask : public BarrierGCTask {
  21.233 +  friend class GCTaskManager;
  21.234 +  friend class IdleGCTask;
  21.235  private:
  21.236    // Instance state.
  21.237 -  Monitor*   _monitor;                  // Guard and notify changes.
  21.238 -  bool       _should_wait;              // true=>wait, false=>proceed.
  21.239 -  const bool _is_c_heap_obj;            // Was allocated on the heap.
  21.240 +  Monitor*      _monitor;                  // Guard and notify changes.
  21.241 +  volatile bool _should_wait;              // true=>wait, false=>proceed.
  21.242 +  const bool    _is_c_heap_obj;            // Was allocated on the heap.
  21.243  public:
  21.244    virtual char* name() { return (char *) "waitfor-barrier-task"; }
  21.245  
  21.246 @@ -600,7 +713,10 @@
  21.247    static void destroy(WaitForBarrierGCTask* that);
  21.248    // Methods.
  21.249    void     do_it(GCTaskManager* manager, uint which);
  21.250 -  void     wait_for();
  21.251 +  void     wait_for(bool reset);
  21.252 +  void set_should_wait(bool value) {
  21.253 +    _should_wait = value;
  21.254 +  }
  21.255  protected:
  21.256    // Constructor.  Clients use factory, but there might be subclasses.
  21.257    WaitForBarrierGCTask(bool on_c_heap);
  21.258 @@ -613,14 +729,38 @@
  21.259    bool should_wait() const {
  21.260      return _should_wait;
  21.261    }
  21.262 -  void set_should_wait(bool value) {
  21.263 -    _should_wait = value;
  21.264 -  }
  21.265    bool is_c_heap_obj() {
  21.266      return _is_c_heap_obj;
  21.267    }
  21.268  };
  21.269  
  21.270 +// Task that is used to idle a GC task when fewer than
  21.271 +// the maximum workers are wanted.
  21.272 +class IdleGCTask : public GCTask {
  21.273 +  const bool    _is_c_heap_obj;            // Was allocated on the heap.
  21.274 + public:
  21.275 +  bool is_c_heap_obj() {
  21.276 +    return _is_c_heap_obj;
  21.277 +  }
  21.278 +  // Factory create and destroy methods.
  21.279 +  static IdleGCTask* create();
  21.280 +  static IdleGCTask* create_on_c_heap();
  21.281 +  static void destroy(IdleGCTask* that);
  21.282 +
  21.283 +  virtual char* name() { return (char *)"idle task"; }
  21.284 +  // Methods from GCTask.
  21.285 +  virtual void do_it(GCTaskManager* manager, uint which);
  21.286 +protected:
  21.287 +  // Constructor.
  21.288 +  IdleGCTask(bool on_c_heap) :
  21.289 +    GCTask(GCTask::Kind::idle_task),
  21.290 +    _is_c_heap_obj(on_c_heap) {
  21.291 +    // Nothing to do.
  21.292 +  }
  21.293 +  // Destructor-like method.
  21.294 +  void destruct();
  21.295 +};
  21.296 +
  21.297  class MonitorSupply : public AllStatic {
  21.298  private:
  21.299    // State.
    22.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Thu Dec 01 13:42:41 2011 -0500
    22.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp	Fri Dec 02 08:52:53 2011 -0500
    22.3 @@ -93,6 +93,11 @@
    22.4    st->cr();
    22.5  }
    22.6  
    22.7 +// GC workers get tasks from the GCTaskManager and execute
    22.8 +// them in this method.  If there are no tasks to execute,
    22.9 +// the GC workers wait in the GCTaskManager's get_task()
   22.10 +// for tasks to be enqueued for execution.
   22.11 +
   22.12  void GCTaskThread::run() {
   22.13    // Set up the thread for stack overflow support
   22.14    this->record_stack_base_and_size();
   22.15 @@ -124,7 +129,6 @@
   22.16      for (; /* break */; ) {
   22.17        // This will block until there is a task to be gotten.
   22.18        GCTask* task = manager()->get_task(which());
   22.19 -
   22.20        // In case the update is costly
   22.21        if (PrintGCTaskTimeStamps) {
   22.22          timer.update();
   22.23 @@ -134,18 +138,28 @@
   22.24        char* name = task->name();
   22.25  
   22.26        task->do_it(manager(), which());
   22.27 -      manager()->note_completion(which());
   22.28  
   22.29 -      if (PrintGCTaskTimeStamps) {
   22.30 -        assert(_time_stamps != NULL, "Sanity (PrintGCTaskTimeStamps set late?)");
   22.31 +      if (!task->is_idle_task()) {
   22.32 +        manager()->note_completion(which());
   22.33  
   22.34 -        timer.update();
   22.35 +        if (PrintGCTaskTimeStamps) {
   22.36 +          assert(_time_stamps != NULL,
   22.37 +            "Sanity (PrintGCTaskTimeStamps set late?)");
   22.38  
   22.39 -        GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++);
   22.40 +          timer.update();
   22.41  
   22.42 -        time_stamp->set_name(name);
   22.43 -        time_stamp->set_entry_time(entry_time);
   22.44 -        time_stamp->set_exit_time(timer.ticks());
   22.45 +          GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++);
   22.46 +
   22.47 +          time_stamp->set_name(name);
   22.48 +          time_stamp->set_entry_time(entry_time);
   22.49 +          time_stamp->set_exit_time(timer.ticks());
   22.50 +        }
   22.51 +      } else {
   22.52 +        // idle tasks complete outside the normal accounting
   22.53 +        // so that a task can complete without waiting for idle tasks.
   22.54 +        // They have to be terminated separately.
   22.55 +        IdleGCTask::destroy((IdleGCTask*)task);
   22.56 +        set_is_working(true);
   22.57        }
   22.58  
   22.59        // Check if we should release our inner resources.
    23.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Thu Dec 01 13:42:41 2011 -0500
    23.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp	Fri Dec 02 08:52:53 2011 -0500
    23.3 @@ -35,6 +35,7 @@
    23.4  class GCTaskManager;
    23.5  
    23.6  class GCTaskThread : public WorkerThread {
    23.7 +  friend class GCTaskManager;
    23.8  private:
    23.9    // Instance state.
   23.10    GCTaskManager* _manager;              // Manager for worker.
   23.11 @@ -45,6 +46,8 @@
   23.12  
   23.13    GCTaskTimeStamp* time_stamp_at(uint index);
   23.14  
   23.15 +  bool _is_working;                     // True if participating in GC tasks
   23.16 +
   23.17   public:
   23.18    // Factory create and destroy methods.
   23.19    static GCTaskThread* create(GCTaskManager* manager,
   23.20 @@ -84,6 +87,7 @@
   23.21    uint processor_id() const {
   23.22      return _processor_id;
   23.23    }
   23.24 +  void set_is_working(bool v) { _is_working = v; }
   23.25  };
   23.26  
   23.27  class GCTaskTimeStamp : public CHeapObj
    24.1 --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Dec 01 13:42:41 2011 -0500
    24.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Fri Dec 02 08:52:53 2011 -0500
    24.3 @@ -152,15 +152,16 @@
    24.4  {
    24.5    ParallelScavengeHeap* heap = PSParallelCompact::gc_heap();
    24.6    uint parallel_gc_threads = heap->gc_task_manager()->workers();
    24.7 +  uint active_gc_threads = heap->gc_task_manager()->active_workers();
    24.8    RegionTaskQueueSet* qset = ParCompactionManager::region_array();
    24.9 -  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
   24.10 +  ParallelTaskTerminator terminator(active_gc_threads, qset);
   24.11    GCTaskQueue* q = GCTaskQueue::create();
   24.12    for(uint i=0; i<parallel_gc_threads; i++) {
   24.13      q->enqueue(new RefProcTaskProxy(task, i));
   24.14    }
   24.15    if (task.marks_oops_alive()) {
   24.16      if (parallel_gc_threads>1) {
   24.17 -      for (uint j=0; j<parallel_gc_threads; j++) {
   24.18 +      for (uint j=0; j<active_gc_threads; j++) {
   24.19          q->enqueue(new StealMarkingTask(&terminator));
   24.20        }
   24.21      }
   24.22 @@ -216,7 +217,6 @@
   24.23  // StealRegionCompactionTask
   24.24  //
   24.25  
   24.26 -
   24.27  StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t):
   24.28    _terminator(t) {}
   24.29  
   24.30 @@ -229,6 +229,32 @@
   24.31    ParCompactionManager* cm =
   24.32      ParCompactionManager::gc_thread_compaction_manager(which);
   24.33  
   24.34 +
   24.35 +  // If not all threads are active, get a draining stack
   24.36 +  // from the list.  Else, just use this threads draining stack.
   24.37 +  uint which_stack_index;
   24.38 +  bool use_all_workers = manager->all_workers_active();
   24.39 +  if (use_all_workers) {
   24.40 +    which_stack_index = which;
   24.41 +    assert(manager->active_workers() == ParallelGCThreads,
   24.42 +           err_msg("all_workers_active has been incorrectly set: "
   24.43 +                   " active %d  ParallelGCThreads %d", manager->active_workers(),
   24.44 +                   ParallelGCThreads));
   24.45 +  } else {
   24.46 +    which_stack_index = ParCompactionManager::pop_recycled_stack_index();
   24.47 +  }
   24.48 +
   24.49 +  cm->set_region_stack_index(which_stack_index);
   24.50 +  cm->set_region_stack(ParCompactionManager::region_list(which_stack_index));
   24.51 +  if (TraceDynamicGCThreads) {
   24.52 +    gclog_or_tty->print_cr("StealRegionCompactionTask::do_it "
   24.53 +                           "region_stack_index %d region_stack = 0x%x "
   24.54 +                           " empty (%d) use all workers %d",
   24.55 +    which_stack_index, ParCompactionManager::region_list(which_stack_index),
   24.56 +    cm->region_stack()->is_empty(),
   24.57 +    use_all_workers);
   24.58 +  }
   24.59 +
   24.60    // Has to drain stacks first because there may be regions on
   24.61    // preloaded onto the stack and this thread may never have
   24.62    // done a draining task.  Are the draining tasks needed?
   24.63 @@ -285,6 +311,50 @@
   24.64    ParCompactionManager* cm =
   24.65      ParCompactionManager::gc_thread_compaction_manager(which);
   24.66  
   24.67 +  uint which_stack_index;
   24.68 +  bool use_all_workers = manager->all_workers_active();
   24.69 +  if (use_all_workers) {
   24.70 +    which_stack_index = which;
   24.71 +    assert(manager->active_workers() == ParallelGCThreads,
   24.72 +           err_msg("all_workers_active has been incorrectly set: "
   24.73 +                   " active %d  ParallelGCThreads %d", manager->active_workers(),
   24.74 +                   ParallelGCThreads));
   24.75 +  } else {
   24.76 +    which_stack_index = stack_index();
   24.77 +  }
   24.78 +
   24.79 +  cm->set_region_stack(ParCompactionManager::region_list(which_stack_index));
   24.80 +  if (TraceDynamicGCThreads) {
   24.81 +    gclog_or_tty->print_cr("DrainStacksCompactionTask::do_it which = %d "
   24.82 +                           "which_stack_index = %d/empty(%d) "
   24.83 +                           "use all workers %d",
   24.84 +                           which, which_stack_index,
   24.85 +                           cm->region_stack()->is_empty(),
   24.86 +                           use_all_workers);
   24.87 +  }
   24.88 +
   24.89 +  cm->set_region_stack_index(which_stack_index);
   24.90 +
   24.91    // Process any regions already in the compaction managers stacks.
   24.92    cm->drain_region_stacks();
   24.93 +
   24.94 +  assert(cm->region_stack()->is_empty(), "Not empty");
   24.95 +
   24.96 +  if (!use_all_workers) {
   24.97 +    // Always give up the region stack.
   24.98 +    assert(cm->region_stack() ==
   24.99 +           ParCompactionManager::region_list(cm->region_stack_index()),
  24.100 +           "region_stack and region_stack_index are inconsistent");
  24.101 +    ParCompactionManager::push_recycled_stack_index(cm->region_stack_index());
  24.102 +
  24.103 +    if (TraceDynamicGCThreads) {
  24.104 +      void* old_region_stack = (void*) cm->region_stack();
  24.105 +      int old_region_stack_index = cm->region_stack_index();
  24.106 +      gclog_or_tty->print_cr("Pushing region stack 0x%x/%d",
  24.107 +        old_region_stack, old_region_stack_index);
  24.108 +    }
  24.109 +
  24.110 +    cm->set_region_stack(NULL);
  24.111 +    cm->set_region_stack_index((uint)max_uintx);
  24.112 +  }
  24.113  }
    25.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Thu Dec 01 13:42:41 2011 -0500
    25.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp	Fri Dec 02 08:52:53 2011 -0500
    25.3 @@ -1,5 +1,5 @@
    25.4  /*
    25.5 - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
    25.6 + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
    25.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    25.8   *
    25.9   * This code is free software; you can redistribute it and/or modify it
   25.10 @@ -39,6 +39,9 @@
   25.11  
   25.12  PSOldGen*            ParCompactionManager::_old_gen = NULL;
   25.13  ParCompactionManager**  ParCompactionManager::_manager_array = NULL;
   25.14 +
   25.15 +RegionTaskQueue**              ParCompactionManager::_region_list = NULL;
   25.16 +
   25.17  OopTaskQueueSet*     ParCompactionManager::_stack_array = NULL;
   25.18  ParCompactionManager::ObjArrayTaskQueueSet*
   25.19    ParCompactionManager::_objarray_queues = NULL;
   25.20 @@ -46,8 +49,14 @@
   25.21  ParMarkBitMap*       ParCompactionManager::_mark_bitmap = NULL;
   25.22  RegionTaskQueueSet*  ParCompactionManager::_region_array = NULL;
   25.23  
   25.24 +uint*                 ParCompactionManager::_recycled_stack_index = NULL;
   25.25 +int                   ParCompactionManager::_recycled_top = -1;
   25.26 +int                   ParCompactionManager::_recycled_bottom = -1;
   25.27 +
   25.28  ParCompactionManager::ParCompactionManager() :
   25.29 -    _action(CopyAndUpdate) {
   25.30 +    _action(CopyAndUpdate),
   25.31 +    _region_stack(NULL),
   25.32 +    _region_stack_index((uint)max_uintx) {
   25.33  
   25.34    ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
   25.35    assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
   25.36 @@ -57,7 +66,10 @@
   25.37  
   25.38    marking_stack()->initialize();
   25.39    _objarray_stack.initialize();
   25.40 -  region_stack()->initialize();
   25.41 +}
   25.42 +
   25.43 +ParCompactionManager::~ParCompactionManager() {
   25.44 +  delete _recycled_stack_index;
   25.45  }
   25.46  
   25.47  void ParCompactionManager::initialize(ParMarkBitMap* mbm) {
   25.48 @@ -72,6 +84,19 @@
   25.49    _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 );
   25.50    guarantee(_manager_array != NULL, "Could not allocate manager_array");
   25.51  
   25.52 +  _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*,
   25.53 +                                         parallel_gc_threads+1);
   25.54 +  guarantee(_region_list != NULL, "Could not initialize promotion manager");
   25.55 +
   25.56 +  _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads);
   25.57 +
   25.58 +  // parallel_gc-threads + 1 to be consistent with the number of
   25.59 +  // compaction managers.
   25.60 +  for(uint i=0; i<parallel_gc_threads + 1; i++) {
   25.61 +    _region_list[i] = new RegionTaskQueue();
   25.62 +    region_list(i)->initialize();
   25.63 +  }
   25.64 +
   25.65    _stack_array = new OopTaskQueueSet(parallel_gc_threads);
   25.66    guarantee(_stack_array != NULL, "Could not allocate stack_array");
   25.67    _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads);
   25.68 @@ -85,7 +110,7 @@
   25.69      guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager");
   25.70      stack_array()->register_queue(i, _manager_array[i]->marking_stack());
   25.71      _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_stack);
   25.72 -    region_array()->register_queue(i, _manager_array[i]->region_stack());
   25.73 +    region_array()->register_queue(i, region_list(i));
   25.74    }
   25.75  
   25.76    // The VMThread gets its own ParCompactionManager, which is not available
   25.77 @@ -97,6 +122,29 @@
   25.78      "Not initialized?");
   25.79  }
   25.80  
   25.81 +int ParCompactionManager::pop_recycled_stack_index() {
   25.82 +  assert(_recycled_bottom <= _recycled_top, "list is empty");
   25.83 +  // Get the next available index
   25.84 +  if (_recycled_bottom < _recycled_top) {
   25.85 +    uint cur, next, last;
   25.86 +    do {
   25.87 +      cur = _recycled_bottom;
   25.88 +      next = cur + 1;
   25.89 +      last = Atomic::cmpxchg(next, &_recycled_bottom, cur);
   25.90 +    } while (cur != last);
   25.91 +    return _recycled_stack_index[next];
   25.92 +  } else {
   25.93 +    return -1;
   25.94 +  }
   25.95 +}
   25.96 +
   25.97 +void ParCompactionManager::push_recycled_stack_index(uint v) {
   25.98 +  // Get the next available index
   25.99 +  int cur = Atomic::add(1, &_recycled_top);
  25.100 +  _recycled_stack_index[cur] = v;
  25.101 +  assert(_recycled_bottom <= _recycled_top, "list top and bottom are wrong");
  25.102 +}
  25.103 +
  25.104  bool ParCompactionManager::should_update() {
  25.105    assert(action() != NotValid, "Action is not set");
  25.106    return (action() == ParCompactionManager::Update) ||
  25.107 @@ -111,14 +159,13 @@
  25.108           (action() == ParCompactionManager::UpdateAndCopy);
  25.109  }
  25.110  
  25.111 -bool ParCompactionManager::should_verify_only() {
  25.112 -  assert(action() != NotValid, "Action is not set");
  25.113 -  return action() == ParCompactionManager::VerifyUpdate;
  25.114 +void ParCompactionManager::region_list_push(uint list_index,
  25.115 +                                            size_t region_index) {
  25.116 +  region_list(list_index)->push(region_index);
  25.117  }
  25.118  
  25.119 -bool ParCompactionManager::should_reset_only() {
  25.120 -  assert(action() != NotValid, "Action is not set");
  25.121 -  return action() == ParCompactionManager::ResetObjects;
  25.122 +void ParCompactionManager::verify_region_list_empty(uint list_index) {
  25.123 +  assert(region_list(list_index)->is_empty(), "Not empty");
  25.124  }
  25.125  
  25.126  ParCompactionManager*
    26.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Thu Dec 01 13:42:41 2011 -0500
    26.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp	Fri Dec 02 08:52:53 2011 -0500
    26.3 @@ -1,5 +1,5 @@
    26.4  /*
    26.5 - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
    26.6 + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
    26.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    26.8   *
    26.9   * This code is free software; you can redistribute it and/or modify it
   26.10 @@ -48,6 +48,7 @@
   26.11    friend class StealRegionCompactionTask;
   26.12    friend class UpdateAndFillClosure;
   26.13    friend class RefProcTaskExecutor;
   26.14 +  friend class IdleGCTask;
   26.15  
   26.16   public:
   26.17  
   26.18 @@ -58,8 +59,6 @@
   26.19      Copy,
   26.20      UpdateAndCopy,
   26.21      CopyAndUpdate,
   26.22 -    VerifyUpdate,
   26.23 -    ResetObjects,
   26.24      NotValid
   26.25    };
   26.26  // ------------------------  End don't putback if not needed
   26.27 @@ -85,7 +84,31 @@
   26.28    // Is there a way to reuse the _marking_stack for the
   26.29    // saving empty regions?  For now just create a different
   26.30    // type of TaskQueue.
   26.31 -  RegionTaskQueue               _region_stack;
   26.32 +  RegionTaskQueue*             _region_stack;
   26.33 +
   26.34 +  static RegionTaskQueue**     _region_list;
   26.35 +  // Index in _region_list for current _region_stack.
   26.36 +  uint _region_stack_index;
   26.37 +
   26.38 +  // Indexes of recycled region stacks/overflow stacks
   26.39 +  // Stacks of regions to be compacted are embedded in the tasks doing
   26.40 +  // the compaction.  A thread that executes the task extracts the
   26.41 +  // region stack and drains it.  These threads keep these region
   26.42 +  // stacks for use during compaction task stealing.  If a thread
   26.43 +  // gets a second draining task, it pushed its current region stack
   26.44 +  // index into the array _recycled_stack_index and gets a new
   26.45 +  // region stack from the task.  A thread that is executing a
   26.46 +  // compaction stealing task without ever having executing a
   26.47 +  // draining task, will get a region stack from _recycled_stack_index.
   26.48 +  //
   26.49 +  // Array of indexes into the array of region stacks.
   26.50 +  static uint*                    _recycled_stack_index;
   26.51 +  // The index into _recycled_stack_index of the last region stack index
   26.52 +  // pushed.  If -1, there are no entries into _recycled_stack_index.
   26.53 +  static int                      _recycled_top;
   26.54 +  // The index into _recycled_stack_index of the last region stack index
   26.55 +  // popped.  If -1, there has not been any entry popped.
   26.56 +  static int                      _recycled_bottom;
   26.57  
   26.58    Stack<Klass*>                 _revisit_klass_stack;
   26.59    Stack<DataLayout*>            _revisit_mdo_stack;
   26.60 @@ -104,7 +127,6 @@
   26.61    // Array of tasks.  Needed by the ParallelTaskTerminator.
   26.62    static RegionTaskQueueSet* region_array()      { return _region_array; }
   26.63    OverflowTaskQueue<oop>*  marking_stack()       { return &_marking_stack; }
   26.64 -  RegionTaskQueue* region_stack()                { return &_region_stack; }
   26.65  
   26.66    // Pushes onto the marking stack.  If the marking stack is full,
   26.67    // pushes onto the overflow stack.
   26.68 @@ -116,10 +138,33 @@
   26.69    Action action() { return _action; }
   26.70    void set_action(Action v) { _action = v; }
   26.71  
   26.72 +  RegionTaskQueue* region_stack()                { return _region_stack; }
   26.73 +  void set_region_stack(RegionTaskQueue* v)       { _region_stack = v; }
   26.74 +
   26.75    inline static ParCompactionManager* manager_array(int index);
   26.76  
   26.77 +  inline static RegionTaskQueue* region_list(int index) {
   26.78 +    return _region_list[index];
   26.79 +  }
   26.80 +
   26.81 +  uint region_stack_index() { return _region_stack_index; }
   26.82 +  void set_region_stack_index(uint v) { _region_stack_index = v; }
   26.83 +
   26.84 +  // Pop and push unique reusable stack index
   26.85 +  static int pop_recycled_stack_index();
   26.86 +  static void push_recycled_stack_index(uint v);
   26.87 +  static void reset_recycled_stack_index() {
   26.88 +    _recycled_bottom = _recycled_top = -1;
   26.89 +  }
   26.90 +
   26.91    ParCompactionManager();
   26.92 +  ~ParCompactionManager();
   26.93  
   26.94 +  // Pushes onto the region stack at the given index.  If the
   26.95 +  // region stack is full,
   26.96 +  // pushes onto the region overflow stack.
   26.97 +  static void region_list_push(uint stack_index, size_t region_index);
   26.98 +  static void verify_region_list_empty(uint stack_index);
   26.99    ParMarkBitMap* mark_bitmap() { return _mark_bitmap; }
  26.100  
  26.101    // Take actions in preparation for a compaction.
  26.102 @@ -129,8 +174,6 @@
  26.103  
  26.104    bool should_update();
  26.105    bool should_copy();
  26.106 -  bool should_verify_only();
  26.107 -  bool should_reset_only();
  26.108  
  26.109    Stack<Klass*>* revisit_klass_stack() { return &_revisit_klass_stack; }
  26.110    Stack<DataLayout*>* revisit_mdo_stack() { return &_revisit_mdo_stack; }
    27.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Thu Dec 01 13:42:41 2011 -0500
    27.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp	Fri Dec 02 08:52:53 2011 -0500
    27.3 @@ -1,5 +1,5 @@
    27.4  /*
    27.5 - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
    27.6 + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
    27.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    27.8   *
    27.9   * This code is free software; you can redistribute it and/or modify it
   27.10 @@ -96,7 +96,8 @@
   27.11     * by the MarkSweepAlwaysCompactCount parameter. This is a significant
   27.12     * performance improvement!
   27.13     */
   27.14 -  bool skip_dead = ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0);
   27.15 +  bool skip_dead = (MarkSweepAlwaysCompactCount < 1)
   27.16 +    || ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0);
   27.17  
   27.18    size_t allowed_deadspace = 0;
   27.19    if (skip_dead) {
    28.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Dec 01 13:42:41 2011 -0500
    28.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Fri Dec 02 08:52:53 2011 -0500
    28.3 @@ -2045,6 +2045,11 @@
    28.4      ResourceMark rm;
    28.5      HandleMark hm;
    28.6  
    28.7 +    // Set the number of GC threads to be used in this collection
    28.8 +    gc_task_manager()->set_active_gang();
    28.9 +    gc_task_manager()->task_idle_workers();
   28.10 +    heap->set_par_threads(gc_task_manager()->active_workers());
   28.11 +
   28.12      const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc;
   28.13  
   28.14      // This is useful for debugging but don't change the output the
   28.15 @@ -2197,6 +2202,7 @@
   28.16      // Track memory usage and detect low memory
   28.17      MemoryService::track_memory_usage();
   28.18      heap->update_counters();
   28.19 +    gc_task_manager()->release_idle_workers();
   28.20    }
   28.21  
   28.22  #ifdef ASSERT
   28.23 @@ -2204,7 +2210,7 @@
   28.24      ParCompactionManager* const cm =
   28.25        ParCompactionManager::manager_array(int(i));
   28.26      assert(cm->marking_stack()->is_empty(),       "should be empty");
   28.27 -    assert(cm->region_stack()->is_empty(),        "should be empty");
   28.28 +    assert(ParCompactionManager::region_list(int(i))->is_empty(), "should be empty");
   28.29      assert(cm->revisit_klass_stack()->is_empty(), "should be empty");
   28.30    }
   28.31  #endif // ASSERT
   28.32 @@ -2351,8 +2357,9 @@
   28.33  
   28.34    ParallelScavengeHeap* heap = gc_heap();
   28.35    uint parallel_gc_threads = heap->gc_task_manager()->workers();
   28.36 +  uint active_gc_threads = heap->gc_task_manager()->active_workers();
   28.37    TaskQueueSetSuper* qset = ParCompactionManager::region_array();
   28.38 -  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
   28.39 +  ParallelTaskTerminator terminator(active_gc_threads, qset);
   28.40  
   28.41    PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
   28.42    PSParallelCompact::FollowStackClosure follow_stack_closure(cm);
   28.43 @@ -2374,21 +2381,13 @@
   28.44      q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti));
   28.45      q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache));
   28.46  
   28.47 -    if (parallel_gc_threads > 1) {
   28.48 -      for (uint j = 0; j < parallel_gc_threads; j++) {
   28.49 +    if (active_gc_threads > 1) {
   28.50 +      for (uint j = 0; j < active_gc_threads; j++) {
   28.51          q->enqueue(new StealMarkingTask(&terminator));
   28.52        }
   28.53      }
   28.54  
   28.55 -    WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
   28.56 -    q->enqueue(fin);
   28.57 -
   28.58 -    gc_task_manager()->add_list(q);
   28.59 -
   28.60 -    fin->wait_for();
   28.61 -
   28.62 -    // We have to release the barrier tasks!
   28.63 -    WaitForBarrierGCTask::destroy(fin);
   28.64 +    gc_task_manager()->execute_and_wait(q);
   28.65    }
   28.66  
   28.67    // Process reference objects found during marking
   28.68 @@ -2483,10 +2482,22 @@
   28.69  {
   28.70    TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty);
   28.71  
   28.72 -  const unsigned int task_count = MAX2(parallel_gc_threads, 1U);
   28.73 -  for (unsigned int j = 0; j < task_count; j++) {
   28.74 +  // Find the threads that are active
   28.75 +  unsigned int which = 0;
   28.76 +
   28.77 +  const uint task_count = MAX2(parallel_gc_threads, 1U);
   28.78 +  for (uint j = 0; j < task_count; j++) {
   28.79      q->enqueue(new DrainStacksCompactionTask(j));
   28.80 +    ParCompactionManager::verify_region_list_empty(j);
   28.81 +    // Set the region stacks variables to "no" region stack values
   28.82 +    // so that they will be recognized and needing a region stack
   28.83 +    // in the stealing tasks if they do not get one by executing
   28.84 +    // a draining stack.
   28.85 +    ParCompactionManager* cm = ParCompactionManager::manager_array(j);
   28.86 +    cm->set_region_stack(NULL);
   28.87 +    cm->set_region_stack_index((uint)max_uintx);
   28.88    }
   28.89 +  ParCompactionManager::reset_recycled_stack_index();
   28.90  
   28.91    // Find all regions that are available (can be filled immediately) and
   28.92    // distribute them to the thread stacks.  The iteration is done in reverse
   28.93 @@ -2495,8 +2506,10 @@
   28.94    const ParallelCompactData& sd = PSParallelCompact::summary_data();
   28.95  
   28.96    size_t fillable_regions = 0;   // A count for diagnostic purposes.
   28.97 -  unsigned int which = 0;       // The worker thread number.
   28.98 -
   28.99 +  // A region index which corresponds to the tasks created above.
  28.100 +  // "which" must be 0 <= which < task_count
  28.101 +
  28.102 +  which = 0;
  28.103    for (unsigned int id = to_space_id; id > perm_space_id; --id) {
  28.104      SpaceInfo* const space_info = _space_info + id;
  28.105      MutableSpace* const space = space_info->space();
  28.106 @@ -2509,8 +2522,7 @@
  28.107  
  28.108      for (size_t cur = end_region - 1; cur >= beg_region; --cur) {
  28.109        if (sd.region(cur)->claim_unsafe()) {
  28.110 -        ParCompactionManager* cm = ParCompactionManager::manager_array(which);
  28.111 -        cm->push_region(cur);
  28.112 +        ParCompactionManager::region_list_push(which, cur);
  28.113  
  28.114          if (TraceParallelOldGCCompactionPhase && Verbose) {
  28.115            const size_t count_mod_8 = fillable_regions & 7;
  28.116 @@ -2521,8 +2533,10 @@
  28.117  
  28.118          NOT_PRODUCT(++fillable_regions;)
  28.119  
  28.120 -        // Assign regions to threads in round-robin fashion.
  28.121 +        // Assign regions to tasks in round-robin fashion.
  28.122          if (++which == task_count) {
  28.123 +          assert(which <= parallel_gc_threads,
  28.124 +            "Inconsistent number of workers");
  28.125            which = 0;
  28.126          }
  28.127        }
  28.128 @@ -2642,26 +2656,19 @@
  28.129    PSOldGen* old_gen = heap->old_gen();
  28.130    old_gen->start_array()->reset();
  28.131    uint parallel_gc_threads = heap->gc_task_manager()->workers();
  28.132 +  uint active_gc_threads = heap->gc_task_manager()->active_workers();
  28.133    TaskQueueSetSuper* qset = ParCompactionManager::region_array();
  28.134 -  ParallelTaskTerminator terminator(parallel_gc_threads, qset);
  28.135 +  ParallelTaskTerminator terminator(active_gc_threads, qset);
  28.136  
  28.137    GCTaskQueue* q = GCTaskQueue::create();
  28.138 -  enqueue_region_draining_tasks(q, parallel_gc_threads);
  28.139 -  enqueue_dense_prefix_tasks(q, parallel_gc_threads);
  28.140 -  enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads);
  28.141 +  enqueue_region_draining_tasks(q, active_gc_threads);
  28.142 +  enqueue_dense_prefix_tasks(q, active_gc_threads);
  28.143 +  enqueue_region_stealing_tasks(q, &terminator, active_gc_threads);
  28.144  
  28.145    {
  28.146      TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty);
  28.147  
  28.148 -    WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create();
  28.149 -    q->enqueue(fin);
  28.150 -
  28.151 -    gc_task_manager()->add_list(q);
  28.152 -
  28.153 -    fin->wait_for();
  28.154 -
  28.155 -    // We have to release the barrier tasks!
  28.156 -    WaitForBarrierGCTask::destroy(fin);
  28.157 +    gc_task_manager()->execute_and_wait(q);
  28.158  
  28.159  #ifdef  ASSERT
  28.160      // Verify that all regions have been processed before the deferred updates.
  28.161 @@ -2729,6 +2736,9 @@
  28.162  PSParallelCompact::follow_weak_klass_links() {
  28.163    // All klasses on the revisit stack are marked at this point.
  28.164    // Update and follow all subklass, sibling and implementor links.
  28.165 +  // Check all the stacks here even if not all the workers are active.
  28.166 +  // There is no accounting which indicates which stacks might have
  28.167 +  // contents to be followed.
  28.168    if (PrintRevisitStats) {
  28.169      gclog_or_tty->print_cr("#classes in system dictionary = %d",
  28.170                             SystemDictionary::number_of_classes());
  28.171 @@ -3360,20 +3370,7 @@
  28.172    HeapWord* beg_addr = sp->bottom();
  28.173    HeapWord* end_addr = sp->top();
  28.174  
  28.175 -#ifdef ASSERT
  28.176    assert(beg_addr <= dp_addr && dp_addr <= end_addr, "bad dense prefix");
  28.177 -  if (cm->should_verify_only()) {
  28.178 -    VerifyUpdateClosure verify_update(cm, sp);
  28.179 -    bitmap->iterate(&verify_update, beg_addr, end_addr);
  28.180 -    return;
  28.181 -  }
  28.182 -
  28.183 -  if (cm->should_reset_only()) {
  28.184 -    ResetObjectsClosure reset_objects(cm);
  28.185 -    bitmap->iterate(&reset_objects, beg_addr, end_addr);
  28.186 -    return;
  28.187 -  }
  28.188 -#endif
  28.189  
  28.190    const size_t beg_region = sd.addr_to_region_idx(beg_addr);
  28.191    const size_t dp_region = sd.addr_to_region_idx(dp_addr);
  28.192 @@ -3492,35 +3489,6 @@
  28.193    return ParMarkBitMap::incomplete;
  28.194  }
  28.195  
  28.196 -// Verify the new location using the forwarding pointer
  28.197 -// from MarkSweep::mark_sweep_phase2().  Set the mark_word
  28.198 -// to the initial value.
  28.199 -ParMarkBitMapClosure::IterationStatus
  28.200 -PSParallelCompact::VerifyUpdateClosure::do_addr(HeapWord* addr, size_t words) {
  28.201 -  // The second arg (words) is not used.
  28.202 -  oop obj = (oop) addr;
  28.203 -  HeapWord* forwarding_ptr = (HeapWord*) obj->mark()->decode_pointer();
  28.204 -  HeapWord* new_pointer = summary_data().calc_new_pointer(obj);
  28.205 -  if (forwarding_ptr == NULL) {
  28.206 -    // The object is dead or not moving.
  28.207 -    assert(bitmap()->is_unmarked(obj) || (new_pointer == (HeapWord*) obj),
  28.208 -           "Object liveness is wrong.");
  28.209 -    return ParMarkBitMap::incomplete;
  28.210 -  }
  28.211 -  assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 ||
  28.212 -         forwarding_ptr == new_pointer, "new location is incorrect");
  28.213 -  return ParMarkBitMap::incomplete;
  28.214 -}
  28.215 -
  28.216 -// Reset objects modified for debug checking.
  28.217 -ParMarkBitMapClosure::IterationStatus
  28.218 -PSParallelCompact::ResetObjectsClosure::do_addr(HeapWord* addr, size_t words) {
  28.219 -  // The second arg (words) is not used.
  28.220 -  oop obj = (oop) addr;
  28.221 -  obj->init_mark();
  28.222 -  return ParMarkBitMap::incomplete;
  28.223 -}
  28.224 -
  28.225  // Prepare for compaction.  This method is executed once
  28.226  // (i.e., by a single thread) before compaction.
  28.227  // Save the updated location of the intArrayKlassObj for
    29.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu Dec 01 13:42:41 2011 -0500
    29.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Fri Dec 02 08:52:53 2011 -0500
    29.3 @@ -832,31 +832,6 @@
    29.4      virtual void do_code_blob(CodeBlob* cb) const { }
    29.5    };
    29.6  
    29.7 -  // Closure for verifying update of pointers.  Does not
    29.8 -  // have any side effects.
    29.9 -  class VerifyUpdateClosure: public ParMarkBitMapClosure {
   29.10 -    const MutableSpace* _space; // Is this ever used?
   29.11 -
   29.12 -   public:
   29.13 -    VerifyUpdateClosure(ParCompactionManager* cm, const MutableSpace* sp) :
   29.14 -      ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm), _space(sp)
   29.15 -    { }
   29.16 -
   29.17 -    virtual IterationStatus do_addr(HeapWord* addr, size_t words);
   29.18 -
   29.19 -    const MutableSpace* space() { return _space; }
   29.20 -  };
   29.21 -
   29.22 -  // Closure for updating objects altered for debug checking
   29.23 -  class ResetObjectsClosure: public ParMarkBitMapClosure {
   29.24 -   public:
   29.25 -    ResetObjectsClosure(ParCompactionManager* cm):
   29.26 -      ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm)
   29.27 -    { }
   29.28 -
   29.29 -    virtual IterationStatus do_addr(HeapWord* addr, size_t words);
   29.30 -  };
   29.31 -
   29.32    friend class KeepAliveClosure;
   29.33    friend class FollowStackClosure;
   29.34    friend class AdjustPointerClosure;
   29.35 @@ -1183,10 +1158,6 @@
   29.36    // Update the deferred objects in the space.
   29.37    static void update_deferred_objects(ParCompactionManager* cm, SpaceId id);
   29.38  
   29.39 -  // Mark pointer and follow contents.
   29.40 -  template <class T>
   29.41 -  static inline void mark_and_follow(ParCompactionManager* cm, T* p);
   29.42 -
   29.43    static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; }
   29.44    static ParallelCompactData& summary_data() { return _summary_data; }
   29.45  
   29.46 @@ -1283,20 +1254,6 @@
   29.47  }
   29.48  
   29.49  template <class T>
   29.50 -inline void PSParallelCompact::mark_and_follow(ParCompactionManager* cm,
   29.51 -                                               T* p) {
   29.52 -  T heap_oop = oopDesc::load_heap_oop(p);
   29.53 -  if (!oopDesc::is_null(heap_oop)) {
   29.54 -    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   29.55 -    if (mark_bitmap()->is_unmarked(obj)) {
   29.56 -      if (mark_obj(obj)) {
   29.57 -        obj->follow_contents(cm);
   29.58 -      }
   29.59 -    }
   29.60 -  }
   29.61 -}
   29.62 -
   29.63 -template <class T>
   29.64  inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) {
   29.65    T heap_oop = oopDesc::load_heap_oop(p);
   29.66    if (!oopDesc::is_null(heap_oop)) {
    30.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Thu Dec 01 13:42:41 2011 -0500
    30.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp	Fri Dec 02 08:52:53 2011 -0500
    30.3 @@ -181,28 +181,29 @@
    30.4  void PSRefProcTaskExecutor::execute(ProcessTask& task)
    30.5  {
    30.6    GCTaskQueue* q = GCTaskQueue::create();
    30.7 -  for(uint i=0; i<ParallelGCThreads; i++) {
    30.8 +  GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager();
    30.9 +  for(uint i=0; i < manager->active_workers(); i++) {
   30.10      q->enqueue(new PSRefProcTaskProxy(task, i));
   30.11    }
   30.12 -  ParallelTaskTerminator terminator(
   30.13 -                 ParallelScavengeHeap::gc_task_manager()->workers(),
   30.14 +  ParallelTaskTerminator terminator(manager->active_workers(),
   30.15                   (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth());
   30.16 -  if (task.marks_oops_alive() && ParallelGCThreads > 1) {
   30.17 -    for (uint j=0; j<ParallelGCThreads; j++) {
   30.18 +  if (task.marks_oops_alive() && manager->active_workers() > 1) {
   30.19 +    for (uint j = 0; j < manager->active_workers(); j++) {
   30.20        q->enqueue(new StealTask(&terminator));
   30.21      }
   30.22    }
   30.23 -  ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q);
   30.24 +  manager->execute_and_wait(q);
   30.25  }
   30.26  
   30.27  
   30.28  void PSRefProcTaskExecutor::execute(EnqueueTask& task)
   30.29  {
   30.30    GCTaskQueue* q = GCTaskQueue::create();
   30.31 -  for(uint i=0; i<ParallelGCThreads; i++) {
   30.32 +  GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager();
   30.33 +  for(uint i=0; i < manager->active_workers(); i++) {
   30.34      q->enqueue(new PSRefEnqueueTaskProxy(task, i));
   30.35    }
   30.36 -  ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q);
   30.37 +  manager->execute_and_wait(q);
   30.38  }
   30.39  
   30.40  // This method contains all heap specific policy for invoking scavenge.
   30.41 @@ -375,6 +376,14 @@
   30.42      // Release all previously held resources
   30.43      gc_task_manager()->release_all_resources();
   30.44  
   30.45 +    // Set the number of GC threads to be used in this collection
   30.46 +    gc_task_manager()->set_active_gang();
   30.47 +    gc_task_manager()->task_idle_workers();
   30.48 +    // Get the active number of workers here and use that value
   30.49 +    // throughout the methods.
   30.50 +    uint active_workers = gc_task_manager()->active_workers();
   30.51 +    heap->set_par_threads(active_workers);
   30.52 +
   30.53      PSPromotionManager::pre_scavenge();
   30.54  
   30.55      // We'll use the promotion manager again later.
   30.56 @@ -385,8 +394,9 @@
   30.57  
   30.58        GCTaskQueue* q = GCTaskQueue::create();
   30.59  
   30.60 -      for(uint i=0; i<ParallelGCThreads; i++) {
   30.61 -        q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i));
   30.62 +      uint stripe_total = active_workers;
   30.63 +      for(uint i=0; i < stripe_total; i++) {
   30.64 +        q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i, stripe_total));
   30.65        }
   30.66  
   30.67        q->enqueue(new SerialOldToYoungRootsTask(perm_gen, perm_top));
   30.68 @@ -403,10 +413,10 @@
   30.69        q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache));
   30.70  
   30.71        ParallelTaskTerminator terminator(
   30.72 -                  gc_task_manager()->workers(),
   30.73 +        active_workers,
   30.74                    (TaskQueueSetSuper*) promotion_manager->stack_array_depth());
   30.75 -      if (ParallelGCThreads>1) {
   30.76 -        for (uint j=0; j<ParallelGCThreads; j++) {
   30.77 +      if (active_workers > 1) {
   30.78 +        for (uint j = 0; j < active_workers; j++) {
   30.79            q->enqueue(new StealTask(&terminator));
   30.80          }
   30.81        }
   30.82 @@ -419,6 +429,7 @@
   30.83      // Process reference objects discovered during scavenge
   30.84      {
   30.85        reference_processor()->setup_policy(false); // not always_clear
   30.86 +      reference_processor()->set_active_mt_degree(active_workers);
   30.87        PSKeepAliveClosure keep_alive(promotion_manager);
   30.88        PSEvacuateFollowersClosure evac_followers(promotion_manager);
   30.89        if (reference_processor()->processing_is_mt()) {
   30.90 @@ -622,6 +633,8 @@
   30.91      // Track memory usage and detect low memory
   30.92      MemoryService::track_memory_usage();
   30.93      heap->update_counters();
   30.94 +
   30.95 +    gc_task_manager()->release_idle_workers();
   30.96    }
   30.97  
   30.98    if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) {
   30.99 @@ -804,6 +817,7 @@
  30.100  
  30.101    // Initialize ref handling object for scavenging.
  30.102    MemRegion mr = young_gen->reserved();
  30.103 +
  30.104    _ref_processor =
  30.105      new ReferenceProcessor(mr,                         // span
  30.106                             ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
    31.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Dec 01 13:42:41 2011 -0500
    31.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Fri Dec 02 08:52:53 2011 -0500
    31.3 @@ -202,7 +202,8 @@
    31.4                                             _gen->object_space(),
    31.5                                             _gen_top,
    31.6                                             pm,
    31.7 -                                           _stripe_number);
    31.8 +                                           _stripe_number,
    31.9 +                                           _stripe_total);
   31.10  
   31.11      // Do the real work
   31.12      pm->drain_stacks(false);
    32.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp	Thu Dec 01 13:42:41 2011 -0500
    32.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp	Fri Dec 02 08:52:53 2011 -0500
    32.3 @@ -135,16 +135,63 @@
    32.4  // OldToYoungRootsTask
    32.5  //
    32.6  // This task is used to scan old to young roots in parallel
    32.7 +//
    32.8 +// A GC thread executing this tasks divides the generation (old gen)
    32.9 +// into slices and takes a stripe in the slice as its part of the
   32.10 +// work.
   32.11 +//
   32.12 +//      +===============+        slice 0
   32.13 +//      |  stripe 0     |
   32.14 +//      +---------------+
   32.15 +//      |  stripe 1     |
   32.16 +//      +---------------+
   32.17 +//      |  stripe 2     |
   32.18 +//      +---------------+
   32.19 +//      |  stripe 3     |
   32.20 +//      +===============+        slice 1
   32.21 +//      |  stripe 0     |
   32.22 +//      +---------------+
   32.23 +//      |  stripe 1     |
   32.24 +//      +---------------+
   32.25 +//      |  stripe 2     |
   32.26 +//      +---------------+
   32.27 +//      |  stripe 3     |
   32.28 +//      +===============+        slice 2
   32.29 +//      ...
   32.30 +//
   32.31 +// A task is created for each stripe.  In this case there are 4 tasks
   32.32 +// created.  A GC thread first works on its stripe within slice 0
   32.33 +// and then moves to its stripe in the next slice until all stripes
   32.34 +// exceed the top of the generation.  Note that having fewer GC threads
   32.35 +// than stripes works because all the tasks are executed so all stripes
   32.36 +// will be covered.  In this example if 4 tasks have been created to cover
   32.37 +// all the stripes and there are only 3 threads, one of the threads will
   32.38 +// get the tasks with the 4th stripe.  However, there is a dependence in
   32.39 +// CardTableExtension::scavenge_contents_parallel() on the number
   32.40 +// of tasks created.  In scavenge_contents_parallel the distance
   32.41 +// to the next stripe is calculated based on the number of tasks.
   32.42 +// If the stripe width is ssize, a task's next stripe is at
   32.43 +// ssize * number_of_tasks (= slice_stride).  In this case after
   32.44 +// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1
   32.45 +// by adding slice_stride to the start of stripe 0 in slice 0 to get
   32.46 +// to the start of stride 0 in slice 1.
   32.47  
   32.48  class OldToYoungRootsTask : public GCTask {
   32.49   private:
   32.50    PSOldGen* _gen;
   32.51    HeapWord* _gen_top;
   32.52    uint _stripe_number;
   32.53 +  uint _stripe_total;
   32.54  
   32.55   public:
   32.56 -  OldToYoungRootsTask(PSOldGen *gen, HeapWord* gen_top, uint stripe_number) :
   32.57 -    _gen(gen), _gen_top(gen_top), _stripe_number(stripe_number) { }
   32.58 +  OldToYoungRootsTask(PSOldGen *gen,
   32.59 +                      HeapWord* gen_top,
   32.60 +                      uint stripe_number,
   32.61 +                      uint stripe_total) :
   32.62 +    _gen(gen),
   32.63 +    _gen_top(gen_top),
   32.64 +    _stripe_number(stripe_number),
   32.65 +    _stripe_total(stripe_total) { }
   32.66  
   32.67    char* name() { return (char *)"old-to-young-roots-task"; }
   32.68  
    33.1 --- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Thu Dec 01 13:42:41 2011 -0500
    33.2 +++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp	Fri Dec 02 08:52:53 2011 -0500
    33.3 @@ -28,8 +28,10 @@
    33.4  #include "memory/collectorPolicy.hpp"
    33.5  #include "runtime/timer.hpp"
    33.6  #include "utilities/ostream.hpp"
    33.7 +#include "utilities/workgroup.hpp"
    33.8  elapsedTimer AdaptiveSizePolicy::_minor_timer;
    33.9  elapsedTimer AdaptiveSizePolicy::_major_timer;
   33.10 +bool AdaptiveSizePolicy::_debug_perturbation = false;
   33.11  
   33.12  // The throughput goal is implemented as
   33.13  //      _throughput_goal = 1 - ( 1 / (1 + gc_cost_ratio))
   33.14 @@ -88,6 +90,134 @@
   33.15    _young_gen_policy_is_ready = false;
   33.16  }
   33.17  
   33.18 +//  If the number of GC threads was set on the command line,
   33.19 +// use it.
   33.20 +//  Else
   33.21 +//    Calculate the number of GC threads based on the number of Java threads.
   33.22 +//    Calculate the number of GC threads based on the size of the heap.
   33.23 +//    Use the larger.
   33.24 +
   33.25 +int AdaptiveSizePolicy::calc_default_active_workers(uintx total_workers,
   33.26 +                                            const uintx min_workers,
   33.27 +                                            uintx active_workers,
   33.28 +                                            uintx application_workers) {
   33.29 +  // If the user has specifically set the number of
   33.30 +  // GC threads, use them.
   33.31 +
   33.32 +  // If the user has turned off using a dynamic number of GC threads
   33.33 +  // or the users has requested a specific number, set the active
   33.34 +  // number of workers to all the workers.
   33.35 +
   33.36 +  uintx new_active_workers = total_workers;
   33.37 +  uintx prev_active_workers = active_workers;
   33.38 +  uintx active_workers_by_JT = 0;
   33.39 +  uintx active_workers_by_heap_size = 0;
   33.40 +
   33.41 +  // Always use at least min_workers but use up to
   33.42 +  // GCThreadsPerJavaThreads * application threads.
   33.43 +  active_workers_by_JT =
   33.44 +    MAX2((uintx) GCWorkersPerJavaThread * application_workers,
   33.45 +         min_workers);
   33.46 +
   33.47 +  // Choose a number of GC threads based on the current size
   33.48 +  // of the heap.  This may be complicated because the size of
   33.49 +  // the heap depends on factors such as the thoughput goal.
   33.50 +  // Still a large heap should be collected by more GC threads.
   33.51 +  active_workers_by_heap_size =
   33.52 +      MAX2((size_t) 2U, Universe::heap()->capacity() / HeapSizePerGCThread);
   33.53 +
   33.54 +  uintx max_active_workers =
   33.55 +    MAX2(active_workers_by_JT, active_workers_by_heap_size);
   33.56 +
   33.57 +  // Limit the number of workers to the the number created,
   33.58 +  // (workers()).
   33.59 +  new_active_workers = MIN2(max_active_workers,
   33.60 +                                (uintx) total_workers);
   33.61 +
   33.62 +  // Increase GC workers instantly but decrease them more
   33.63 +  // slowly.
   33.64 +  if (new_active_workers < prev_active_workers) {
   33.65 +    new_active_workers =
   33.66 +      MAX2(min_workers, (prev_active_workers + new_active_workers) / 2);
   33.67 +  }
   33.68 +
   33.69 +  // Check once more that the number of workers is within the limits.
   33.70 +  assert(min_workers <= total_workers, "Minimum workers not consistent with total workers");
   33.71 +  assert(new_active_workers >= min_workers, "Minimum workers not observed");
   33.72 +  assert(new_active_workers <= total_workers, "Total workers not observed");
   33.73 +
   33.74 +  if (ForceDynamicNumberOfGCThreads) {
   33.75 +    // Assume this is debugging and jiggle the number of GC threads.
   33.76 +    if (new_active_workers == prev_active_workers) {
   33.77 +      if (new_active_workers < total_workers) {
   33.78 +        new_active_workers++;
   33.79 +      } else if (new_active_workers > min_workers) {
   33.80 +        new_active_workers--;
   33.81 +      }
   33.82 +    }
   33.83 +    if (new_active_workers == total_workers) {
   33.84 +      if (_debug_perturbation) {
   33.85 +        new_active_workers =  min_workers;
   33.86 +      }
   33.87 +      _debug_perturbation = !_debug_perturbation;
   33.88 +    }
   33.89 +    assert((new_active_workers <= (uintx) ParallelGCThreads) &&
   33.90 +           (new_active_workers >= min_workers),
   33.91 +      "Jiggled active workers too much");
   33.92 +  }
   33.93 +
   33.94 +  if (TraceDynamicGCThreads) {
   33.95 +     gclog_or_tty->print_cr("GCTaskManager::calc_default_active_workers() : "
   33.96 +       "active_workers(): %d  new_acitve_workers: %d  "
   33.97 +       "prev_active_workers: %d\n"
   33.98 +       " active_workers_by_JT: %d  active_workers_by_heap_size: %d",
   33.99 +       active_workers, new_active_workers, prev_active_workers,
  33.100 +       active_workers_by_JT, active_workers_by_heap_size);
  33.101 +  }
  33.102 +  assert(new_active_workers > 0, "Always need at least 1");
  33.103 +  return new_active_workers;
  33.104 +}
  33.105 +
  33.106 +int AdaptiveSizePolicy::calc_active_workers(uintx total_workers,
  33.107 +                                            uintx active_workers,
  33.108 +                                            uintx application_workers) {
  33.109 +  // If the user has specifically set the number of
  33.110 +  // GC threads, use them.
  33.111 +
  33.112 +  // If the user has turned off using a dynamic number of GC threads
  33.113 +  // or the users has requested a specific number, set the active
  33.114 +  // number of workers to all the workers.
  33.115 +
  33.116 +  int new_active_workers;
  33.117 +  if (!UseDynamicNumberOfGCThreads ||
  33.118 +     (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) {
  33.119 +    new_active_workers = total_workers;
  33.120 +  } else {
  33.121 +    new_active_workers = calc_default_active_workers(total_workers,
  33.122 +                                                     2, /* Minimum number of workers */
  33.123 +                                                     active_workers,
  33.124 +                                                     application_workers);
  33.125 +  }
  33.126 +  assert(new_active_workers > 0, "Always need at least 1");
  33.127 +  return new_active_workers;
  33.128 +}
  33.129 +
  33.130 +int AdaptiveSizePolicy::calc_active_conc_workers(uintx total_workers,
  33.131 +                                                 uintx active_workers,
  33.132 +                                                 uintx application_workers) {
  33.133 +  if (!UseDynamicNumberOfGCThreads ||
  33.134 +     (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) {
  33.135 +    return ConcGCThreads;
  33.136 +  } else {
  33.137 +    int no_of_gc_threads = calc_default_active_workers(
  33.138 +                             total_workers,
  33.139 +                             1, /* Minimum number of workers */
  33.140 +                             active_workers,
  33.141 +                             application_workers);
  33.142 +    return no_of_gc_threads;
  33.143 +  }
  33.144 +}
  33.145 +
  33.146  bool AdaptiveSizePolicy::tenuring_threshold_change() const {
  33.147    return decrement_tenuring_threshold_for_gc_cost() ||
  33.148           increment_tenuring_threshold_for_gc_cost() ||
    34.1 --- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Thu Dec 01 13:42:41 2011 -0500
    34.2 +++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp	Fri Dec 02 08:52:53 2011 -0500
    34.3 @@ -187,6 +187,8 @@
    34.4    julong _young_gen_change_for_minor_throughput;
    34.5    julong _old_gen_change_for_major_throughput;
    34.6  
    34.7 +  static const uint GCWorkersPerJavaThread  = 2;
    34.8 +
    34.9    // Accessors
   34.10  
   34.11    double gc_pause_goal_sec() const { return _gc_pause_goal_sec; }
   34.12 @@ -331,6 +333,8 @@
   34.13    // Return true if the policy suggested a change.
   34.14    bool tenuring_threshold_change() const;
   34.15  
   34.16 +  static bool _debug_perturbation;
   34.17 +
   34.18   public:
   34.19    AdaptiveSizePolicy(size_t init_eden_size,
   34.20                       size_t init_promo_size,
   34.21 @@ -338,6 +342,31 @@
   34.22                       double gc_pause_goal_sec,
   34.23                       uint gc_cost_ratio);
   34.24  
   34.25 +  // Return number default  GC threads to use in the next GC.
   34.26 +  static int calc_default_active_workers(uintx total_workers,
   34.27 +                                         const uintx min_workers,
   34.28 +                                         uintx active_workers,
   34.29 +                                         uintx application_workers);
   34.30 +
   34.31 +  // Return number of GC threads to use in the next GC.
   34.32 +  // This is called sparingly so as not to change the
   34.33 +  // number of GC workers gratuitously.
   34.34 +  //   For ParNew collections
   34.35 +  //   For PS scavenge and ParOld collections
   34.36 +  //   For G1 evacuation pauses (subject to update)
   34.37 +  // Other collection phases inherit the number of
   34.38 +  // GC workers from the calls above.  For example,
   34.39 +  // a CMS parallel remark uses the same number of GC
   34.40 +  // workers as the most recent ParNew collection.
   34.41 +  static int calc_active_workers(uintx total_workers,
   34.42 +                                 uintx active_workers,
   34.43 +                                 uintx application_workers);
   34.44 +
   34.45 +  // Return number of GC threads to use in the next concurrent GC phase.
   34.46 +  static int calc_active_conc_workers(uintx total_workers,
   34.47 +                                      uintx active_workers,
   34.48 +                                      uintx application_workers);
   34.49 +
   34.50    bool is_gc_cms_adaptive_size_policy() {
   34.51      return kind() == _gc_cms_adaptive_size_policy;
   34.52    }
    35.1 --- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Thu Dec 01 13:42:41 2011 -0500
    35.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Fri Dec 02 08:52:53 2011 -0500
    35.3 @@ -1,5 +1,5 @@
    35.4  /*
    35.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    35.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    35.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    35.8   *
    35.9   * This code is free software; you can redistribute it and/or modify it
   35.10 @@ -196,8 +196,6 @@
   35.11    static void mark_object(oop obj);
   35.12    // Mark pointer and follow contents.  Empty marking stack afterwards.
   35.13    template <class T> static inline void follow_root(T* p);
   35.14 -  // Mark pointer and follow contents.
   35.15 -  template <class T> static inline void mark_and_follow(T* p);
   35.16    // Check mark and maybe push on marking stack
   35.17    template <class T> static inline void mark_and_push(T* p);
   35.18    static inline void push_objarray(oop obj, size_t index);
    36.1 --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Thu Dec 01 13:42:41 2011 -0500
    36.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp	Fri Dec 02 08:52:53 2011 -0500
    36.3 @@ -1,5 +1,5 @@
    36.4  /*
    36.5 - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
    36.6 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
    36.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    36.8   *
    36.9   * This code is free software; you can redistribute it and/or modify it
   36.10 @@ -63,18 +63,6 @@
   36.11    follow_stack();
   36.12  }
   36.13  
   36.14 -template <class T> inline void MarkSweep::mark_and_follow(T* p) {
   36.15 -//  assert(Universe::heap()->is_in_reserved(p), "should be in object space");
   36.16 -  T heap_oop = oopDesc::load_heap_oop(p);
   36.17 -  if (!oopDesc::is_null(heap_oop)) {
   36.18 -    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
   36.19 -    if (!obj->mark()->is_marked()) {
   36.20 -      mark_object(obj);
   36.21 -      obj->follow_contents();
   36.22 -    }
   36.23 -  }
   36.24 -}
   36.25 -
   36.26  template <class T> inline void MarkSweep::mark_and_push(T* p) {
   36.27  //  assert(Universe::heap()->is_in_reserved(p), "should be in object space");
   36.28    T heap_oop = oopDesc::load_heap_oop(p);
    37.1 --- a/src/share/vm/memory/cardTableModRefBS.cpp	Thu Dec 01 13:42:41 2011 -0500
    37.2 +++ b/src/share/vm/memory/cardTableModRefBS.cpp	Fri Dec 02 08:52:53 2011 -0500
    37.3 @@ -460,9 +460,43 @@
    37.4                                                                   OopsInGenClosure* cl,
    37.5                                                                   CardTableRS* ct) {
    37.6    if (!mr.is_empty()) {
    37.7 -    int n_threads = SharedHeap::heap()->n_par_threads();
    37.8 -    if (n_threads > 0) {
    37.9 +    // Caller (process_strong_roots()) claims that all GC threads
   37.10 +    // execute this call.  With UseDynamicNumberOfGCThreads now all
   37.11 +    // active GC threads execute this call.  The number of active GC
   37.12 +    // threads needs to be passed to par_non_clean_card_iterate_work()
   37.13 +    // to get proper partitioning and termination.
   37.14 +    //
   37.15 +    // This is an example of where n_par_threads() is used instead
   37.16 +    // of workers()->active_workers().  n_par_threads can be set to 0 to
   37.17 +    // turn off parallelism.  For example when this code is called as
   37.18 +    // part of verification and SharedHeap::process_strong_roots() is being
   37.19 +    // used, then n_par_threads() may have been set to 0.  active_workers
   37.20 +    // is not overloaded with the meaning that it is a switch to disable
   37.21 +    // parallelism and so keeps the meaning of the number of
   37.22 +    // active gc workers.  If parallelism has not been shut off by
   37.23 +    // setting n_par_threads to 0, then n_par_threads should be
   37.24 +    // equal to active_workers.  When a different mechanism for shutting
   37.25 +    // off parallelism is used, then active_workers can be used in
   37.26 +    // place of n_par_threads.
   37.27 +    //  This is an example of a path where n_par_threads is
   37.28 +    // set to 0 to turn off parallism.
   37.29 +    //  [7] CardTableModRefBS::non_clean_card_iterate()
   37.30 +    //  [8] CardTableRS::younger_refs_in_space_iterate()
   37.31 +    //  [9] Generation::younger_refs_in_space_iterate()
   37.32 +    //  [10] OneContigSpaceCardGeneration::younger_refs_iterate()
   37.33 +    //  [11] CompactingPermGenGen::younger_refs_iterate()
   37.34 +    //  [12] CardTableRS::younger_refs_iterate()
   37.35 +    //  [13] SharedHeap::process_strong_roots()
   37.36 +    //  [14] G1CollectedHeap::verify()
   37.37 +    //  [15] Universe::verify()
   37.38 +    //  [16] G1CollectedHeap::do_collection_pause_at_safepoint()
   37.39 +    //
   37.40 +    int n_threads =  SharedHeap::heap()->n_par_threads();
   37.41 +    bool is_par = n_threads > 0;
   37.42 +    if (is_par) {
   37.43  #ifndef SERIALGC
   37.44 +      assert(SharedHeap::heap()->n_par_threads() ==
   37.45 +             SharedHeap::heap()->workers()->active_workers(), "Mismatch");
   37.46        non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads);
   37.47  #else  // SERIALGC
   37.48        fatal("Parallel gc not supported here.");
   37.49 @@ -489,6 +523,10 @@
   37.50  // change their values in any manner.
   37.51  void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr,
   37.52                                                        MemRegionClosure* cl) {
   37.53 +  bool is_par = (SharedHeap::heap()->n_par_threads() > 0);
   37.54 +  assert(!is_par ||
   37.55 +          (SharedHeap::heap()->n_par_threads() ==
   37.56 +          SharedHeap::heap()->workers()->active_workers()), "Mismatch");
   37.57    for (int i = 0; i < _cur_covered_regions; i++) {
   37.58      MemRegion mri = mr.intersection(_covered[i]);
   37.59      if (mri.word_size() > 0) {
   37.60 @@ -624,23 +662,6 @@
   37.61    return MemRegion(mr.end(), mr.end());
   37.62  }
   37.63  
   37.64 -// Set all the dirty cards in the given region to "precleaned" state.
   37.65 -void CardTableModRefBS::preclean_dirty_cards(MemRegion mr) {
   37.66 -  for (int i = 0; i < _cur_covered_regions; i++) {
   37.67 -    MemRegion mri = mr.intersection(_covered[i]);
   37.68 -    if (!mri.is_empty()) {
   37.69 -      jbyte *cur_entry, *limit;
   37.70 -      for (cur_entry = byte_for(mri.start()), limit = byte_for(mri.last());
   37.71 -           cur_entry <= limit;
   37.72 -           cur_entry++) {
   37.73 -        if (*cur_entry == dirty_card) {
   37.74 -          *cur_entry = precleaned_card;
   37.75 -        }
   37.76 -      }
   37.77 -    }
   37.78 -  }
   37.79 -}
   37.80 -
   37.81  uintx CardTableModRefBS::ct_max_alignment_constraint() {
   37.82    return card_size * os::vm_page_size();
   37.83  }
    38.1 --- a/src/share/vm/memory/cardTableModRefBS.hpp	Thu Dec 01 13:42:41 2011 -0500
    38.2 +++ b/src/share/vm/memory/cardTableModRefBS.hpp	Fri Dec 02 08:52:53 2011 -0500
    38.3 @@ -435,9 +435,6 @@
    38.4    MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset,
    38.5                                           int reset_val);
    38.6  
    38.7 -  // Set all the dirty cards in the given region to precleaned state.
    38.8 -  void preclean_dirty_cards(MemRegion mr);
    38.9 -
   38.10    // Provide read-only access to the card table array.
   38.11    const jbyte* byte_for_const(const void* p) const {
   38.12      return byte_for(p);
    39.1 --- a/src/share/vm/memory/cardTableRS.cpp	Thu Dec 01 13:42:41 2011 -0500
    39.2 +++ b/src/share/vm/memory/cardTableRS.cpp	Fri Dec 02 08:52:53 2011 -0500
    39.3 @@ -164,7 +164,13 @@
    39.4  ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
    39.5    DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) :
    39.6      _dirty_card_closure(dirty_card_closure), _ct(ct) {
    39.7 +    // Cannot yet substitute active_workers for n_par_threads
    39.8 +    // in the case where parallelism is being turned off by
    39.9 +    // setting n_par_threads to 0.
   39.10      _is_par = (SharedHeap::heap()->n_par_threads() > 0);
   39.11 +    assert(!_is_par ||
   39.12 +           (SharedHeap::heap()->n_par_threads() ==
   39.13 +            SharedHeap::heap()->workers()->active_workers()), "Mismatch");
   39.14  }
   39.15  
   39.16  void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
    40.1 --- a/src/share/vm/memory/sharedHeap.cpp	Thu Dec 01 13:42:41 2011 -0500
    40.2 +++ b/src/share/vm/memory/sharedHeap.cpp	Fri Dec 02 08:52:53 2011 -0500
    40.3 @@ -58,7 +58,6 @@
    40.4    _perm_gen(NULL), _rem_set(NULL),
    40.5    _strong_roots_parity(0),
    40.6    _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)),
    40.7 -  _n_par_threads(0),
    40.8    _workers(NULL)
    40.9  {
   40.10    if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) {
   40.11 @@ -80,6 +79,14 @@
   40.12    }
   40.13  }
   40.14  
   40.15 +int SharedHeap::n_termination() {
   40.16 +  return _process_strong_tasks->n_threads();
   40.17 +}
   40.18 +
   40.19 +void SharedHeap::set_n_termination(int t) {
   40.20 +  _process_strong_tasks->set_n_threads(t);
   40.21 +}
   40.22 +
   40.23  bool SharedHeap::heap_lock_held_for_gc() {
   40.24    Thread* t = Thread::current();
   40.25    return    Heap_lock->owned_by_self()
   40.26 @@ -144,6 +151,10 @@
   40.27    StrongRootsScope srs(this, activate_scope);
   40.28    // General strong roots.
   40.29    assert(_strong_roots_parity != 0, "must have called prologue code");
   40.30 +  // _n_termination for _process_strong_tasks should be set up stream
   40.31 +  // in a method not running in a GC worker.  Otherwise the GC worker
   40.32 +  // could be trying to change the termination condition while the task
   40.33 +  // is executing in another GC worker.
   40.34    if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) {
   40.35      Universe::oops_do(roots);
   40.36      // Consider perm-gen discovered lists to be strong.
    41.1 --- a/src/share/vm/memory/sharedHeap.hpp	Thu Dec 01 13:42:41 2011 -0500
    41.2 +++ b/src/share/vm/memory/sharedHeap.hpp	Fri Dec 02 08:52:53 2011 -0500
    41.3 @@ -49,6 +49,62 @@
    41.4  class CollectorPolicy;
    41.5  class KlassHandle;
    41.6  
    41.7 +// Note on use of FlexibleWorkGang's for GC.
    41.8 +// There are three places where task completion is determined.
    41.9 +// In
   41.10 +//    1) ParallelTaskTerminator::offer_termination() where _n_threads
   41.11 +//    must be set to the correct value so that count of workers that
   41.12 +//    have offered termination will exactly match the number
   41.13 +//    working on the task.  Tasks such as those derived from GCTask
   41.14 +//    use ParallelTaskTerminator's.  Tasks that want load balancing
   41.15 +//    by work stealing use this method to gauge completion.
   41.16 +//    2) SubTasksDone has a variable _n_threads that is used in
   41.17 +//    all_tasks_completed() to determine completion.  all_tasks_complete()
   41.18 +//    counts the number of tasks that have been done and then reset
   41.19 +//    the SubTasksDone so that it can be used again.  When the number of
   41.20 +//    tasks is set to the number of GC workers, then _n_threads must
   41.21 +//    be set to the number of active GC workers. G1CollectedHeap,
   41.22 +//    HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone.
   41.23 +//    This seems too many.
   41.24 +//    3) SequentialSubTasksDone has an _n_threads that is used in
   41.25 +//    a way similar to SubTasksDone and has the same dependency on the
   41.26 +//    number of active GC workers.  CompactibleFreeListSpace and Space
   41.27 +//    have SequentialSubTasksDone's.
   41.28 +// Example of using SubTasksDone and SequentialSubTasksDone
   41.29 +// G1CollectedHeap::g1_process_strong_roots() calls
   41.30 +//  process_strong_roots(false, // no scoping; this is parallel code
   41.31 +//                       collecting_perm_gen, so,
   41.32 +//                       &buf_scan_non_heap_roots,
   41.33 +//                       &eager_scan_code_roots,
   41.34 +//                       &buf_scan_perm);
   41.35 +//  which delegates to SharedHeap::process_strong_roots() and uses
   41.36 +//  SubTasksDone* _process_strong_tasks to claim tasks.
   41.37 +//  process_strong_roots() calls
   41.38 +//      rem_set()->younger_refs_iterate(perm_gen(), perm_blk);
   41.39 +//  to scan the card table and which eventually calls down into
   41.40 +//  CardTableModRefBS::par_non_clean_card_iterate_work().  This method
   41.41 +//  uses SequentialSubTasksDone* _pst to claim tasks.
   41.42 +//  Both SubTasksDone and SequentialSubTasksDone call their method
   41.43 +//  all_tasks_completed() to count the number of GC workers that have
   41.44 +//  finished their work.  That logic is "when all the workers are
   41.45 +//  finished the tasks are finished".
   41.46 +//
   41.47 +//  The pattern that appears  in the code is to set _n_threads
   41.48 +//  to a value > 1 before a task that you would like executed in parallel
   41.49 +//  and then to set it to 0 after that task has completed.  A value of
   41.50 +//  0 is a "special" value in set_n_threads() which translates to
   41.51 +//  setting _n_threads to 1.
   41.52 +//
   41.53 +//  Some code uses _n_terminiation to decide if work should be done in
   41.54 +//  parallel.  The notorious possibly_parallel_oops_do() in threads.cpp
   41.55 +//  is an example of such code.  Look for variable "is_par" for other
   41.56 +//  examples.
   41.57 +//
   41.58 +//  The active_workers is not reset to 0 after a parallel phase.  It's
   41.59 +//  value may be used in later phases and in one instance at least
   41.60 +//  (the parallel remark) it has to be used (the parallel remark depends
   41.61 +//  on the partitioning done in the previous parallel scavenge).
   41.62 +
   41.63  class SharedHeap : public CollectedHeap {
   41.64    friend class VMStructs;
   41.65  
   41.66 @@ -84,11 +140,6 @@
   41.67    // If we're doing parallel GC, use this gang of threads.
   41.68    FlexibleWorkGang* _workers;
   41.69  
   41.70 -  // Number of parallel threads currently working on GC tasks.
   41.71 -  // O indicates use sequential code; 1 means use parallel code even with
   41.72 -  // only one thread, for performance testing purposes.
   41.73 -  int _n_par_threads;
   41.74 -
   41.75    // Full initialization is done in a concrete subtype's "initialize"
   41.76    // function.
   41.77    SharedHeap(CollectorPolicy* policy_);
   41.78 @@ -107,6 +158,7 @@
   41.79    CollectorPolicy *collector_policy() const { return _collector_policy; }
   41.80  
   41.81    void set_barrier_set(BarrierSet* bs);
   41.82 +  SubTasksDone* process_strong_tasks() { return _process_strong_tasks; }
   41.83  
   41.84    // Does operations required after initialization has been done.
   41.85    virtual void post_initialize();
   41.86 @@ -198,13 +250,6 @@
   41.87  
   41.88    FlexibleWorkGang* workers() const { return _workers; }
   41.89  
   41.90 -  // Sets the number of parallel threads that will be doing tasks
   41.91 -  // (such as process strong roots) subsequently.
   41.92 -  virtual void set_par_threads(int t);
   41.93 -
   41.94 -  // Number of threads currently working on GC tasks.
   41.95 -  int n_par_threads() { return _n_par_threads; }
   41.96 -
   41.97    // Invoke the "do_oop" method the closure "roots" on all root locations.
   41.98    // If "collecting_perm_gen" is false, then roots that may only contain
   41.99    // references to permGen objects are not scanned; instead, in that case,
  41.100 @@ -240,6 +285,13 @@
  41.101    virtual void gc_prologue(bool full) = 0;
  41.102    virtual void gc_epilogue(bool full) = 0;
  41.103  
  41.104 +  // Sets the number of parallel threads that will be doing tasks
  41.105 +  // (such as process strong roots) subsequently.
  41.106 +  virtual void set_par_threads(int t);
  41.107 +
  41.108 +  int n_termination();
  41.109 +  void set_n_termination(int t);
  41.110 +
  41.111    //
  41.112    // New methods from CollectedHeap
  41.113    //
    42.1 --- a/src/share/vm/memory/space.hpp	Thu Dec 01 13:42:41 2011 -0500
    42.2 +++ b/src/share/vm/memory/space.hpp	Fri Dec 02 08:52:53 2011 -0500
    42.3 @@ -1,5 +1,5 @@
    42.4  /*
    42.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
    42.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
    42.7   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
    42.8   *
    42.9   * This code is free software; you can redistribute it and/or modify it
   42.10 @@ -533,7 +533,8 @@
   42.11     * by the MarkSweepAlwaysCompactCount parameter.                           \
   42.12     */                                                                        \
   42.13    int invocations = SharedHeap::heap()->perm_gen()->stat_record()->invocations;\
   42.14 -  bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0);       \
   42.15 +  bool skip_dead = (MarkSweepAlwaysCompactCount < 1)                         \
   42.16 +    ||((invocations % MarkSweepAlwaysCompactCount) != 0);                    \
   42.17                                                                               \
   42.18    size_t allowed_deadspace = 0;                                              \
   42.19    if (skip_dead) {                                                           \
    43.1 --- a/src/share/vm/oops/objArrayOop.hpp	Thu Dec 01 13:42:41 2011 -0500
    43.2 +++ b/src/share/vm/oops/objArrayOop.hpp	Fri Dec 02 08:52:53 2011 -0500
    43.3 @@ -34,7 +34,7 @@
    43.4    friend class objArrayKlass;
    43.5    friend class Runtime1;
    43.6    friend class psPromotionManager;
    43.7 -  friend class CSMarkOopClosure;
    43.8 +  friend class CSetMarkOopClosure;
    43.9    friend class G1ParScanPartialArrayClosure;
   43.10  
   43.11    template <class T> T* obj_at_addr(int index) const {
    44.1 --- a/src/share/vm/runtime/arguments.cpp	Thu Dec 01 13:42:41 2011 -0500
    44.2 +++ b/src/share/vm/runtime/arguments.cpp	Fri Dec 02 08:52:53 2011 -0500
    44.3 @@ -1394,8 +1394,8 @@
    44.4    // If no heap maximum was requested explicitly, use some reasonable fraction
    44.5    // of the physical memory, up to a maximum of 1GB.
    44.6    if (UseParallelGC) {
    44.7 -    FLAG_SET_ERGO(uintx, ParallelGCThreads,
    44.8 -                  Abstract_VM_Version::parallel_worker_threads());
    44.9 +    FLAG_SET_DEFAULT(ParallelGCThreads,
   44.10 +                     Abstract_VM_Version::parallel_worker_threads());
   44.11  
   44.12      // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the
   44.13      // SurvivorRatio has been set, reset their default values to SurvivorRatio +
    45.1 --- a/src/share/vm/runtime/globals.hpp	Thu Dec 01 13:42:41 2011 -0500
    45.2 +++ b/src/share/vm/runtime/globals.hpp	Fri Dec 02 08:52:53 2011 -0500
    45.3 @@ -1416,6 +1416,21 @@
    45.4    product(uintx, ParallelGCThreads, 0,                                      \
    45.5            "Number of parallel threads parallel gc will use")                \
    45.6                                                                              \
    45.7 +  product(bool, UseDynamicNumberOfGCThreads, false,                         \
    45.8 +          "Dynamically choose the number of parallel threads "              \
    45.9 +          "parallel gc will use")                                           \
   45.10 +                                                                            \
   45.11 +  diagnostic(bool, ForceDynamicNumberOfGCThreads, false,                    \
   45.12 +          "Force dynamic selection of the number of"                        \
   45.13 +          "parallel threads parallel gc will use to aid debugging")         \
   45.14 +                                                                            \
   45.15 +  product(uintx, HeapSizePerGCThread, ScaleForWordSize(64*M),               \
   45.16 +          "Size of heap (bytes) per GC thread used in calculating the "     \
   45.17 +          "number of GC threads")                                           \
   45.18 +                                                                            \
   45.19 +  product(bool, TraceDynamicGCThreads, false,                               \
   45.20 +          "Trace the dynamic GC thread usage")                              \
   45.21 +                                                                            \
   45.22    develop(bool, ParallelOldGCSplitALot, false,                              \
   45.23            "Provoke splitting (copying data from a young gen space to"       \
   45.24            "multiple destination spaces)")                                   \
   45.25 @@ -2357,7 +2372,7 @@
   45.26    develop(bool, TraceGCTaskQueue, false,                                    \
   45.27            "Trace actions of the GC task queues")                            \
   45.28                                                                              \
   45.29 -  develop(bool, TraceGCTaskThread, false,                                   \
   45.30 +  diagnostic(bool, TraceGCTaskThread, false,                                   \
   45.31            "Trace actions of the GC task threads")                           \
   45.32                                                                              \
   45.33    product(bool, PrintParallelOldGCPhaseTimes, false,                        \
    46.1 --- a/src/share/vm/runtime/thread.cpp	Thu Dec 01 13:42:41 2011 -0500
    46.2 +++ b/src/share/vm/runtime/thread.cpp	Fri Dec 02 08:52:53 2011 -0500
    46.3 @@ -778,12 +778,12 @@
    46.4        return true;
    46.5      } else {
    46.6        guarantee(res == strong_roots_parity, "Or else what?");
    46.7 -      assert(SharedHeap::heap()->n_par_threads() > 0,
    46.8 -             "Should only fail when parallel.");
    46.9 +      assert(SharedHeap::heap()->workers()->active_workers() > 0,
   46.10 +         "Should only fail when parallel.");
   46.11        return false;
   46.12      }
   46.13    }
   46.14 -  assert(SharedHeap::heap()->n_par_threads() > 0,
   46.15 +  assert(SharedHeap::heap()->workers()->active_workers() > 0,
   46.16           "Should only fail when parallel.");
   46.17    return false;
   46.18  }
   46.19 @@ -3939,7 +3939,15 @@
   46.20    // root groups.  Overhead should be small enough to use all the time,
   46.21    // even in sequential code.
   46.22    SharedHeap* sh = SharedHeap::heap();
   46.23 -  bool is_par = (sh->n_par_threads() > 0);
   46.24 +  // Cannot yet substitute active_workers for n_par_threads
   46.25 +  // because of G1CollectedHeap::verify() use of
   46.26 +  // SharedHeap::process_strong_roots().  n_par_threads == 0 will
   46.27 +  // turn off parallelism in process_strong_roots while active_workers
   46.28 +  // is being used for parallelism elsewhere.
   46.29 +  bool is_par = sh->n_par_threads() > 0;
   46.30 +  assert(!is_par ||
   46.31 +         (SharedHeap::heap()->n_par_threads() ==
   46.32 +          SharedHeap::heap()->workers()->active_workers()), "Mismatch");
   46.33    int cp = SharedHeap::heap()->strong_roots_parity();
   46.34    ALL_JAVA_THREADS(p) {
   46.35      if (p->claim_oops_do(is_par, cp)) {
    47.1 --- a/src/share/vm/services/memoryManager.cpp	Thu Dec 01 13:42:41 2011 -0500
    47.2 +++ b/src/share/vm/services/memoryManager.cpp	Fri Dec 02 08:52:53 2011 -0500
    47.3 @@ -168,10 +168,8 @@
    47.4    // initialize the arrays for memory usage
    47.5    _before_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools);
    47.6    _after_gc_usage_array  = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools);
    47.7 -  size_t len = num_pools * sizeof(MemoryUsage);
    47.8 -  memset(_before_gc_usage_array, 0, len);
    47.9 -  memset(_after_gc_usage_array, 0, len);
   47.10    _usage_array_size = num_pools;
   47.11 +  clear();
   47.12  }
   47.13  
   47.14  GCStatInfo::~GCStatInfo() {
   47.15 @@ -304,12 +302,8 @@
   47.16        pool->set_last_collection_usage(usage);
   47.17        LowMemoryDetector::detect_after_gc_memory(pool);
   47.18      }
   47.19 -    if(is_notification_enabled()) {
   47.20 -      bool isMajorGC = this == MemoryService::get_major_gc_manager();
   47.21 -      GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC",
   47.22 -                                   GCCause::to_string(cause));
   47.23 -    }
   47.24    }
   47.25 +
   47.26    if (countCollection) {
   47.27      _num_collections++;
   47.28      // alternately update two objects making one public when complete
   47.29 @@ -321,6 +315,12 @@
   47.30        // reset the current stat for diagnosability purposes
   47.31        _current_gc_stat->clear();
   47.32      }
   47.33 +
   47.34 +    if (is_notification_enabled()) {
   47.35 +      bool isMajorGC = this == MemoryService::get_major_gc_manager();
   47.36 +      GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC",
   47.37 +                                   GCCause::to_string(cause));
   47.38 +    }
   47.39    }
   47.40  }
   47.41  
    48.1 --- a/src/share/vm/utilities/workgroup.cpp	Thu Dec 01 13:42:41 2011 -0500
    48.2 +++ b/src/share/vm/utilities/workgroup.cpp	Fri Dec 02 08:52:53 2011 -0500
    48.3 @@ -57,7 +57,6 @@
    48.4                     bool        are_GC_task_threads,
    48.5                     bool        are_ConcurrentGC_threads) :
    48.6    AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) {
    48.7 -  // Save arguments.
    48.8    _total_workers = workers;
    48.9  }
   48.10  
   48.11 @@ -127,6 +126,12 @@
   48.12  }
   48.13  
   48.14  void WorkGang::run_task(AbstractGangTask* task) {
   48.15 +  run_task(task, total_workers());
   48.16 +}
   48.17 +
   48.18 +void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) {
   48.19 +  task->set_for_termination(no_of_parallel_workers);
   48.20 +
   48.21    // This thread is executed by the VM thread which does not block
   48.22    // on ordinary MutexLocker's.
   48.23    MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   48.24 @@ -143,22 +148,32 @@
   48.25    // Tell the workers to get to work.
   48.26    monitor()->notify_all();
   48.27    // Wait for them to be finished
   48.28 -  while (finished_workers() < total_workers()) {
   48.29 +  while (finished_workers() < (int) no_of_parallel_workers) {
   48.30      if (TraceWorkGang) {
   48.31        tty->print_cr("Waiting in work gang %s: %d/%d finished sequence %d",
   48.32 -                    name(), finished_workers(), total_workers(),
   48.33 +                    name(), finished_workers(), no_of_parallel_workers,
   48.34                      _sequence_number);
   48.35      }
   48.36      monitor()->wait(/* no_safepoint_check */ true);
   48.37    }
   48.38    _task = NULL;
   48.39    if (TraceWorkGang) {
   48.40 -    tty->print_cr("/nFinished work gang %s: %d/%d sequence %d",
   48.41 -                  name(), finished_workers(), total_workers(),
   48.42 +    tty->print_cr("\nFinished work gang %s: %d/%d sequence %d",
   48.43 +                  name(), finished_workers(), no_of_parallel_workers,
   48.44                    _sequence_number);
   48.45 +    Thread* me = Thread::current();
   48.46 +    tty->print_cr("  T: 0x%x  VM_thread: %d", me, me->is_VM_thread());
   48.47    }
   48.48  }
   48.49  
   48.50 +void FlexibleWorkGang::run_task(AbstractGangTask* task) {
   48.51 +  // If active_workers() is passed, _finished_workers
   48.52 +  // must only be incremented for workers that find non_null
   48.53 +  // work (as opposed to all those that just check that the
   48.54 +  // task is not null).
   48.55 +  WorkGang::run_task(task, (uint) active_workers());
   48.56 +}
   48.57 +
   48.58  void AbstractWorkGang::stop() {
   48.59    // Tell all workers to terminate, then wait for them to become inactive.
   48.60    MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   48.61 @@ -168,10 +183,10 @@
   48.62    _task = NULL;
   48.63    _terminate = true;
   48.64    monitor()->notify_all();
   48.65 -  while (finished_workers() < total_workers()) {
   48.66 +  while (finished_workers() < active_workers()) {
   48.67      if (TraceWorkGang) {
   48.68        tty->print_cr("Waiting in work gang %s: %d/%d finished",
   48.69 -                    name(), finished_workers(), total_workers());
   48.70 +                    name(), finished_workers(), active_workers());
   48.71      }
   48.72      monitor()->wait(/* no_safepoint_check */ true);
   48.73    }
   48.74 @@ -275,10 +290,12 @@
   48.75          // Check for new work.
   48.76          if ((data.task() != NULL) &&
   48.77              (data.sequence_number() != previous_sequence_number)) {
   48.78 -          gang()->internal_note_start();
   48.79 -          gang_monitor->notify_all();
   48.80 -          part = gang()->started_workers() - 1;
   48.81 -          break;
   48.82 +          if (gang()->needs_more_workers()) {
   48.83 +            gang()->internal_note_start();
   48.84 +            gang_monitor->notify_all();
   48.85 +            part = gang()->started_workers() - 1;
   48.86 +            break;
   48.87 +          }
   48.88          }
   48.89          // Nothing to do.
   48.90          gang_monitor->wait(/* no_safepoint_check */ true);
   48.91 @@ -350,6 +367,9 @@
   48.92  
   48.93  #endif /* PRODUCT */
   48.94  
   48.95 +// FlexibleWorkGang
   48.96 +
   48.97 +
   48.98  // *** WorkGangBarrierSync
   48.99  
  48.100  WorkGangBarrierSync::WorkGangBarrierSync()
  48.101 @@ -411,10 +431,8 @@
  48.102  }
  48.103  
  48.104  void SubTasksDone::set_n_threads(int t) {
  48.105 -#ifdef ASSERT
  48.106    assert(_claimed == 0 || _threads_completed == _n_threads,
  48.107           "should not be called while tasks are being processed!");
  48.108 -#endif
  48.109    _n_threads = (t == 0 ? 1 : t);
  48.110  }
  48.111  
    49.1 --- a/src/share/vm/utilities/workgroup.hpp	Thu Dec 01 13:42:41 2011 -0500
    49.2 +++ b/src/share/vm/utilities/workgroup.hpp	Fri Dec 02 08:52:53 2011 -0500
    49.3 @@ -96,11 +96,14 @@
    49.4  
    49.5  protected:
    49.6    // Constructor and desctructor: only construct subclasses.
    49.7 -  AbstractGangTask(const char* name) {
    49.8 +  AbstractGangTask(const char* name)
    49.9 +  {
   49.10      NOT_PRODUCT(_name = name);
   49.11      _counter = 0;
   49.12    }
   49.13    virtual ~AbstractGangTask() { }
   49.14 +
   49.15 +public:
   49.16  };
   49.17  
   49.18  class AbstractGangTaskWOopQueues : public AbstractGangTask {
   49.19 @@ -116,6 +119,7 @@
   49.20    OopTaskQueueSet* queues() { return _queues; }
   49.21  };
   49.22  
   49.23 +
   49.24  // Class AbstractWorkGang:
   49.25  // An abstract class representing a gang of workers.
   49.26  // You subclass this to supply an implementation of run_task().
   49.27 @@ -130,6 +134,8 @@
   49.28    virtual void run_task(AbstractGangTask* task) = 0;
   49.29    // Stop and terminate all workers.
   49.30    virtual void stop();
   49.31 +  // Return true if more workers should be applied to the task.
   49.32 +  virtual bool needs_more_workers() const { return true; }
   49.33  public:
   49.34    // Debugging.
   49.35    const char* name() const;
   49.36 @@ -287,20 +293,62 @@
   49.37    AbstractWorkGang* gang() const { return _gang; }
   49.38  };
   49.39  
   49.40 +// Dynamic number of worker threads
   49.41 +//
   49.42 +// This type of work gang is used to run different numbers of
   49.43 +// worker threads at different times.  The
   49.44 +// number of workers run for a task is "_active_workers"
   49.45 +// instead of "_total_workers" in a WorkGang.  The method
   49.46 +// "needs_more_workers()" returns true until "_active_workers"
   49.47 +// have been started and returns false afterwards.  The
   49.48 +// implementation of "needs_more_workers()" in WorkGang always
   49.49 +// returns true so that all workers are started.  The method
   49.50 +// "loop()" in GangWorker was modified to ask "needs_more_workers()"
   49.51 +// in its loop to decide if it should start working on a task.
   49.52 +// A worker in "loop()" waits for notification on the WorkGang
   49.53 +// monitor and execution of each worker as it checks for work
   49.54 +// is serialized via the same monitor.  The "needs_more_workers()"
   49.55 +// call is serialized and additionally the calculation for the
   49.56 +// "part" (effectively the worker id for executing the task) is
   49.57 +// serialized to give each worker a unique "part".  Workers that
   49.58 +// are not needed for this tasks (i.e., "_active_workers" have
   49.59 +// been started before it, continue to wait for work.
   49.60 +
   49.61  class FlexibleWorkGang: public WorkGang {
   49.62 +  // The currently active workers in this gang.
   49.63 +  // This is a number that is dynamically adjusted
   49.64 +  // and checked in the run_task() method at each invocation.
   49.65 +  // As described above _active_workers determines the number
   49.66 +  // of threads started on a task.  It must also be used to
   49.67 +  // determine completion.
   49.68 +
   49.69   protected:
   49.70    int _active_workers;
   49.71   public:
   49.72    // Constructor and destructor.
   49.73 +  // Initialize active_workers to a minimum value.  Setting it to
   49.74 +  // the parameter "workers" will initialize it to a maximum
   49.75 +  // value which is not desirable.
   49.76    FlexibleWorkGang(const char* name, int workers,
   49.77                     bool are_GC_task_threads,
   49.78                     bool  are_ConcurrentGC_threads) :
   49.79 -    WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) {
   49.80 -    _active_workers = ParallelGCThreads;
   49.81 -  };
   49.82 +    WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
   49.83 +    _active_workers(UseDynamicNumberOfGCThreads ? 1 : ParallelGCThreads) {};
   49.84    // Accessors for fields
   49.85    virtual int active_workers() const { return _active_workers; }
   49.86 -  void set_active_workers(int v) { _active_workers = v; }
   49.87 +  void set_active_workers(int v) {
   49.88 +    assert(v <= _total_workers,
   49.89 +           "Trying to set more workers active than there are");
   49.90 +    _active_workers = MIN2(v, _total_workers);
   49.91 +    assert(v != 0, "Trying to set active workers to 0");
   49.92 +    _active_workers = MAX2(1, _active_workers);
   49.93 +    assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
   49.94 +           "Unless dynamic should use total workers");
   49.95 +  }
   49.96 +  virtual void run_task(AbstractGangTask* task);
   49.97 +  virtual bool needs_more_workers() const {
   49.98 +    return _started_workers < _active_workers;
   49.99 +  }
  49.100  };
  49.101  
  49.102  // Work gangs in garbage collectors: 2009-06-10
  49.103 @@ -357,6 +405,11 @@
  49.104  class SubTasksDone: public CHeapObj {
  49.105    jint* _tasks;
  49.106    int _n_tasks;
  49.107 +  // _n_threads is used to determine when a sub task is done.
  49.108 +  // It does not control how many threads will execute the subtask
  49.109 +  // but must be initialized to the number that do execute the task
  49.110 +  // in order to correctly decide when the subtask is done (all the
  49.111 +  // threads working on the task have finished).
  49.112    int _n_threads;
  49.113    jint _threads_completed;
  49.114  #ifdef ASSERT
    50.1 --- a/src/share/vm/utilities/yieldingWorkgroup.cpp	Thu Dec 01 13:42:41 2011 -0500
    50.2 +++ b/src/share/vm/utilities/yieldingWorkgroup.cpp	Fri Dec 02 08:52:53 2011 -0500
    50.3 @@ -125,7 +125,7 @@
    50.4    if (requested_size != 0) {
    50.5      _active_workers = MIN2(requested_size, total_workers());
    50.6    } else {
    50.7 -    _active_workers = total_workers();
    50.8 +    _active_workers = active_workers();
    50.9    }
   50.10    new_task->set_actual_size(_active_workers);
   50.11    new_task->set_for_termination(_active_workers);
   50.12 @@ -148,22 +148,22 @@
   50.13    for (Status status = yielding_task()->status();
   50.14         status != COMPLETED && status != YIELDED && status != ABORTED;
   50.15         status = yielding_task()->status()) {
   50.16 -    assert(started_workers() <= total_workers(), "invariant");
   50.17 -    assert(finished_workers() <= total_workers(), "invariant");
   50.18 -    assert(yielded_workers() <= total_workers(), "invariant");
   50.19 +    assert(started_workers() <= active_workers(), "invariant");
   50.20 +    assert(finished_workers() <= active_workers(), "invariant");
   50.21 +    assert(yielded_workers() <= active_workers(), "invariant");
   50.22      monitor()->wait(Mutex::_no_safepoint_check_flag);
   50.23    }
   50.24    switch (yielding_task()->status()) {
   50.25      case COMPLETED:
   50.26      case ABORTED: {
   50.27 -      assert(finished_workers() == total_workers(), "Inconsistent status");
   50.28 +      assert(finished_workers() == active_workers(), "Inconsistent status");
   50.29        assert(yielded_workers() == 0, "Invariant");
   50.30        reset();   // for next task; gang<->task binding released
   50.31        break;
   50.32      }
   50.33      case YIELDED: {
   50.34        assert(yielded_workers() > 0, "Invariant");
   50.35 -      assert(yielded_workers() + finished_workers() == total_workers(),
   50.36 +      assert(yielded_workers() + finished_workers() == active_workers(),
   50.37               "Inconsistent counts");
   50.38        break;
   50.39      }
   50.40 @@ -182,7 +182,6 @@
   50.41  
   50.42    MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   50.43    assert(task() != NULL && task() == gang_task, "Incorrect usage");
   50.44 -  // assert(_active_workers == total_workers(), "For now");
   50.45    assert(_started_workers == _active_workers, "Precondition");
   50.46    assert(_yielded_workers > 0 && yielding_task()->status() == YIELDED,
   50.47           "Else why are we calling continue_task()");
   50.48 @@ -202,7 +201,7 @@
   50.49  void YieldingFlexibleWorkGang::yield() {
   50.50    assert(task() != NULL, "Inconsistency; should have task binding");
   50.51    MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
   50.52 -  assert(yielded_workers() < total_workers(), "Consistency check");
   50.53 +  assert(yielded_workers() < active_workers(), "Consistency check");
   50.54    if (yielding_task()->status() == ABORTING) {
   50.55      // Do not yield; we need to abort as soon as possible
   50.56      // XXX NOTE: This can cause a performance pathology in the
   50.57 @@ -213,7 +212,7 @@
   50.58      // us to return at each potential yield point.
   50.59      return;
   50.60    }
   50.61 -  if (++_yielded_workers + finished_workers() == total_workers()) {
   50.62 +  if (++_yielded_workers + finished_workers() == active_workers()) {
   50.63      yielding_task()->set_status(YIELDED);
   50.64      monitor()->notify_all();
   50.65    } else {
    51.1 --- a/src/share/vm/utilities/yieldingWorkgroup.hpp	Thu Dec 01 13:42:41 2011 -0500
    51.2 +++ b/src/share/vm/utilities/yieldingWorkgroup.hpp	Fri Dec 02 08:52:53 2011 -0500
    51.3 @@ -199,17 +199,11 @@
    51.4    void abort();
    51.5  
    51.6  private:
    51.7 -  int _active_workers;
    51.8    int _yielded_workers;
    51.9    void wait_for_gang();
   51.10  
   51.11  public:
   51.12    // Accessors for fields
   51.13 -  int active_workers() const {
   51.14 -    return _active_workers;
   51.15 -  }
   51.16 -
   51.17 -  // Accessors for fields
   51.18    int yielded_workers() const {
   51.19      return _yielded_workers;
   51.20    }

mercurial