Fri, 02 Dec 2011 08:52:53 -0500
Merge
src/share/vm/runtime/globals.hpp | file | annotate | diff | comparison | revisions |
1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Thu Dec 01 13:42:41 2011 -0500 1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp Fri Dec 02 08:52:53 2011 -0500 1.3 @@ -668,12 +668,16 @@ 1.4 1.5 // We de-virtualize the block-related calls below, since we know that our 1.6 // space is a CompactibleFreeListSpace. 1.7 + 1.8 #define FreeListSpace_DCTOC__walk_mem_region_with_cl_DEFN(ClosureType) \ 1.9 void FreeListSpace_DCTOC::walk_mem_region_with_cl(MemRegion mr, \ 1.10 HeapWord* bottom, \ 1.11 HeapWord* top, \ 1.12 ClosureType* cl) { \ 1.13 - if (SharedHeap::heap()->n_par_threads() > 0) { \ 1.14 + bool is_par = SharedHeap::heap()->n_par_threads() > 0; \ 1.15 + if (is_par) { \ 1.16 + assert(SharedHeap::heap()->n_par_threads() == \ 1.17 + SharedHeap::heap()->workers()->active_workers(), "Mismatch"); \ 1.18 walk_mem_region_with_cl_par(mr, bottom, top, cl); \ 1.19 } else { \ 1.20 walk_mem_region_with_cl_nopar(mr, bottom, top, cl); \ 1.21 @@ -1925,6 +1929,9 @@ 1.22 if (rem_size < SmallForDictionary) { 1.23 bool is_par = (SharedHeap::heap()->n_par_threads() > 0); 1.24 if (is_par) _indexedFreeListParLocks[rem_size]->lock(); 1.25 + assert(!is_par || 1.26 + (SharedHeap::heap()->n_par_threads() == 1.27 + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); 1.28 returnChunkToFreeList(ffc); 1.29 split(size, rem_size); 1.30 if (is_par) _indexedFreeListParLocks[rem_size]->unlock();
2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Dec 01 13:42:41 2011 -0500 2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Dec 02 08:52:53 2011 -0500 2.3 @@ -3582,16 +3582,6 @@ 2.4 " or no bits are set in the gc_prologue before the start of the next " 2.5 "subsequent marking phase."); 2.6 2.7 - // Temporarily disabled, since pre/post-consumption closures don't 2.8 - // care about precleaned cards 2.9 - #if 0 2.10 - { 2.11 - MemRegion mr = MemRegion((HeapWord*)_virtual_space.low(), 2.12 - (HeapWord*)_virtual_space.high()); 2.13 - _ct->ct_bs()->preclean_dirty_cards(mr); 2.14 - } 2.15 - #endif 2.16 - 2.17 // Save the end of the used_region of the constituent generations 2.18 // to be used to limit the extent of sweep in each generation. 2.19 save_sweep_limits(); 2.20 @@ -4244,9 +4234,11 @@ 2.21 2.22 bool CMSCollector::do_marking_mt(bool asynch) { 2.23 assert(ConcGCThreads > 0 && conc_workers() != NULL, "precondition"); 2.24 - // In the future this would be determined ergonomically, based 2.25 - // on #cpu's, # active mutator threads (and load), and mutation rate. 2.26 - int num_workers = ConcGCThreads; 2.27 + int num_workers = AdaptiveSizePolicy::calc_active_conc_workers( 2.28 + conc_workers()->total_workers(), 2.29 + conc_workers()->active_workers(), 2.30 + Threads::number_of_non_daemon_threads()); 2.31 + conc_workers()->set_active_workers(num_workers); 2.32 2.33 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 2.34 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 2.35 @@ -5062,6 +5054,8 @@ 2.36 ParallelTaskTerminator _term; 2.37 2.38 public: 2.39 + // A value of 0 passed to n_workers will cause the number of 2.40 + // workers to be taken from the active workers in the work gang. 2.41 CMSParRemarkTask(CMSCollector* collector, 2.42 CompactibleFreeListSpace* cms_space, 2.43 CompactibleFreeListSpace* perm_space, 2.44 @@ -5544,7 +5538,15 @@ 2.45 GenCollectedHeap* gch = GenCollectedHeap::heap(); 2.46 FlexibleWorkGang* workers = gch->workers(); 2.47 assert(workers != NULL, "Need parallel worker threads."); 2.48 - int n_workers = workers->total_workers(); 2.49 + // Choose to use the number of GC workers most recently set 2.50 + // into "active_workers". If active_workers is not set, set it 2.51 + // to ParallelGCThreads. 2.52 + int n_workers = workers->active_workers(); 2.53 + if (n_workers == 0) { 2.54 + assert(n_workers > 0, "Should have been set during scavenge"); 2.55 + n_workers = ParallelGCThreads; 2.56 + workers->set_active_workers(n_workers); 2.57 + } 2.58 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 2.59 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 2.60 2.61 @@ -5884,8 +5886,17 @@ 2.62 // and a different number of discovered lists may have Ref objects. 2.63 // That is OK as long as the Reference lists are balanced (see 2.64 // balance_all_queues() and balance_queues()). 2.65 - 2.66 - rp->set_active_mt_degree(ParallelGCThreads); 2.67 + GenCollectedHeap* gch = GenCollectedHeap::heap(); 2.68 + int active_workers = ParallelGCThreads; 2.69 + FlexibleWorkGang* workers = gch->workers(); 2.70 + if (workers != NULL) { 2.71 + active_workers = workers->active_workers(); 2.72 + // The expectation is that active_workers will have already 2.73 + // been set to a reasonable value. If it has not been set, 2.74 + // investigate. 2.75 + assert(active_workers > 0, "Should have been set during scavenge"); 2.76 + } 2.77 + rp->set_active_mt_degree(active_workers); 2.78 CMSRefProcTaskExecutor task_executor(*this); 2.79 rp->process_discovered_references(&_is_alive_closure, 2.80 &cmsKeepAliveClosure,
3.1 --- a/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Thu Dec 01 13:42:41 2011 -0500 3.2 +++ b/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp Fri Dec 02 08:52:53 2011 -0500 3.3 @@ -255,7 +255,18 @@ 3.4 CollectionSetChooser:: 3.5 prepareForAddMarkedHeapRegionsPar(size_t n_regions, size_t chunkSize) { 3.6 _first_par_unreserved_idx = 0; 3.7 - size_t max_waste = ParallelGCThreads * chunkSize; 3.8 + int n_threads = ParallelGCThreads; 3.9 + if (UseDynamicNumberOfGCThreads) { 3.10 + assert(G1CollectedHeap::heap()->workers()->active_workers() > 0, 3.11 + "Should have been set earlier"); 3.12 + // This is defensive code. As the assertion above says, the number 3.13 + // of active threads should be > 0, but in case there is some path 3.14 + // or some improperly initialized variable with leads to no 3.15 + // active threads, protect against that in a product build. 3.16 + n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(), 3.17 + 1); 3.18 + } 3.19 + size_t max_waste = n_threads * chunkSize; 3.20 // it should be aligned with respect to chunkSize 3.21 size_t aligned_n_regions = 3.22 (n_regions + (chunkSize - 1)) / chunkSize * chunkSize; 3.23 @@ -265,6 +276,11 @@ 3.24 3.25 jint 3.26 CollectionSetChooser::getParMarkedHeapRegionChunk(jint n_regions) { 3.27 + // Don't do this assert because this can be called at a point 3.28 + // where the loop up stream will not execute again but might 3.29 + // try to claim more chunks (loop test has not been done yet). 3.30 + // assert(_markedRegions.length() > _first_par_unreserved_idx, 3.31 + // "Striding beyond the marked regions"); 3.32 jint res = Atomic::add(n_regions, &_first_par_unreserved_idx); 3.33 assert(_markedRegions.length() > res + n_regions - 1, 3.34 "Should already have been expanded");
4.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Dec 01 13:42:41 2011 -0500 4.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Fri Dec 02 08:52:53 2011 -0500 4.3 @@ -44,7 +44,7 @@ 4.4 // 4.5 // CMS Bit Map Wrapper 4.6 4.7 -CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 4.8 +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) : 4.9 _bm((uintptr_t*)NULL,0), 4.10 _shifter(shifter) { 4.11 _bmStartWord = (HeapWord*)(rs.base()); 4.12 @@ -458,12 +458,17 @@ 4.13 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 4.14 #endif // _MSC_VER 4.15 4.16 +size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) { 4.17 + return MAX2((n_par_threads + 2) / 4, (size_t)1); 4.18 +} 4.19 + 4.20 ConcurrentMark::ConcurrentMark(ReservedSpace rs, 4.21 int max_regions) : 4.22 _markBitMap1(rs, MinObjAlignment - 1), 4.23 _markBitMap2(rs, MinObjAlignment - 1), 4.24 4.25 _parallel_marking_threads(0), 4.26 + _max_parallel_marking_threads(0), 4.27 _sleep_factor(0.0), 4.28 _marking_task_overhead(1.0), 4.29 _cleanup_sleep_factor(0.0), 4.30 @@ -554,15 +559,17 @@ 4.31 if (ParallelGCThreads == 0) { 4.32 // if we are not running with any parallel GC threads we will not 4.33 // spawn any marking threads either 4.34 - _parallel_marking_threads = 0; 4.35 - _sleep_factor = 0.0; 4.36 - _marking_task_overhead = 1.0; 4.37 + _parallel_marking_threads = 0; 4.38 + _max_parallel_marking_threads = 0; 4.39 + _sleep_factor = 0.0; 4.40 + _marking_task_overhead = 1.0; 4.41 } else { 4.42 if (ConcGCThreads > 0) { 4.43 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent 4.44 // if both are set 4.45 4.46 _parallel_marking_threads = ConcGCThreads; 4.47 + _max_parallel_marking_threads = _parallel_marking_threads; 4.48 _sleep_factor = 0.0; 4.49 _marking_task_overhead = 1.0; 4.50 } else if (G1MarkingOverheadPercent > 0) { 4.51 @@ -583,10 +590,12 @@ 4.52 (1.0 - marking_task_overhead) / marking_task_overhead; 4.53 4.54 _parallel_marking_threads = (size_t) marking_thread_num; 4.55 + _max_parallel_marking_threads = _parallel_marking_threads; 4.56 _sleep_factor = sleep_factor; 4.57 _marking_task_overhead = marking_task_overhead; 4.58 } else { 4.59 - _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 4.60 + _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads); 4.61 + _max_parallel_marking_threads = _parallel_marking_threads; 4.62 _sleep_factor = 0.0; 4.63 _marking_task_overhead = 1.0; 4.64 } 4.65 @@ -609,7 +618,7 @@ 4.66 4.67 guarantee(parallel_marking_threads() > 0, "peace of mind"); 4.68 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads", 4.69 - (int) _parallel_marking_threads, false, true); 4.70 + (int) _max_parallel_marking_threads, false, true); 4.71 if (_parallel_workers == NULL) { 4.72 vm_exit_during_initialization("Failed necessary allocation."); 4.73 } else { 4.74 @@ -1106,6 +1115,33 @@ 4.75 ~CMConcurrentMarkingTask() { } 4.76 }; 4.77 4.78 +// Calculates the number of active workers for a concurrent 4.79 +// phase. 4.80 +int ConcurrentMark::calc_parallel_marking_threads() { 4.81 + 4.82 + size_t n_conc_workers; 4.83 + if (!G1CollectedHeap::use_parallel_gc_threads()) { 4.84 + n_conc_workers = 1; 4.85 + } else { 4.86 + if (!UseDynamicNumberOfGCThreads || 4.87 + (!FLAG_IS_DEFAULT(ConcGCThreads) && 4.88 + !ForceDynamicNumberOfGCThreads)) { 4.89 + n_conc_workers = max_parallel_marking_threads(); 4.90 + } else { 4.91 + n_conc_workers = 4.92 + AdaptiveSizePolicy::calc_default_active_workers( 4.93 + max_parallel_marking_threads(), 4.94 + 1, /* Minimum workers */ 4.95 + parallel_marking_threads(), 4.96 + Threads::number_of_non_daemon_threads()); 4.97 + // Don't scale down "n_conc_workers" by scale_parallel_threads() because 4.98 + // that scaling has already gone into "_max_parallel_marking_threads". 4.99 + } 4.100 + } 4.101 + assert(n_conc_workers > 0, "Always need at least 1"); 4.102 + return (int) MAX2(n_conc_workers, (size_t) 1); 4.103 +} 4.104 + 4.105 void ConcurrentMark::markFromRoots() { 4.106 // we might be tempted to assert that: 4.107 // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 4.108 @@ -1116,9 +1152,20 @@ 4.109 4.110 _restart_for_overflow = false; 4.111 4.112 - size_t active_workers = MAX2((size_t) 1, parallel_marking_threads()); 4.113 + // Parallel task terminator is set in "set_phase()". 4.114 force_overflow_conc()->init(); 4.115 - set_phase(active_workers, true /* concurrent */); 4.116 + 4.117 + // _g1h has _n_par_threads 4.118 + 4.119 + _parallel_marking_threads = calc_parallel_marking_threads(); 4.120 + assert(parallel_marking_threads() <= max_parallel_marking_threads(), 4.121 + "Maximum number of marking threads exceeded"); 4.122 + _parallel_workers->set_active_workers((int)_parallel_marking_threads); 4.123 + // Don't set _n_par_threads because it affects MT in proceess_strong_roots() 4.124 + // and the decisions on that MT processing is made elsewhere. 4.125 + 4.126 + assert( _parallel_workers->active_workers() > 0, "Should have been set"); 4.127 + set_phase(_parallel_workers->active_workers(), true /* concurrent */); 4.128 4.129 CMConcurrentMarkingTask markingTask(this, cmThread()); 4.130 if (parallel_marking_threads() > 0) { 4.131 @@ -1181,6 +1228,7 @@ 4.132 true /* expected_active */); 4.133 4.134 if (VerifyDuringGC) { 4.135 + 4.136 HandleMark hm; // handle scope 4.137 gclog_or_tty->print(" VerifyDuringGC:(after)"); 4.138 Universe::heap()->prepare_for_verify(); 4.139 @@ -1463,12 +1511,20 @@ 4.140 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 4.141 BitMap* region_bm, BitMap* card_bm) 4.142 : AbstractGangTask("G1 final counting"), _g1h(g1h), 4.143 - _bm(bm), _region_bm(region_bm), _card_bm(card_bm) { 4.144 - if (ParallelGCThreads > 0) { 4.145 - _n_workers = _g1h->workers()->total_workers(); 4.146 + _bm(bm), _region_bm(region_bm), _card_bm(card_bm), 4.147 + _n_workers(0) 4.148 + { 4.149 + // Use the value already set as the number of active threads 4.150 + // in the call to run_task(). Needed for the allocation of 4.151 + // _live_bytes and _used_bytes. 4.152 + if (G1CollectedHeap::use_parallel_gc_threads()) { 4.153 + assert( _g1h->workers()->active_workers() > 0, 4.154 + "Should have been previously set"); 4.155 + _n_workers = _g1h->workers()->active_workers(); 4.156 } else { 4.157 _n_workers = 1; 4.158 } 4.159 + 4.160 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 4.161 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 4.162 } 4.163 @@ -1485,6 +1541,7 @@ 4.164 calccl.no_yield(); 4.165 if (G1CollectedHeap::use_parallel_gc_threads()) { 4.166 _g1h->heap_region_par_iterate_chunked(&calccl, i, 4.167 + (int) _n_workers, 4.168 HeapRegion::FinalCountClaimValue); 4.169 } else { 4.170 _g1h->heap_region_iterate(&calccl); 4.171 @@ -1530,10 +1587,42 @@ 4.172 FreeRegionList* local_cleanup_list, 4.173 OldRegionSet* old_proxy_set, 4.174 HumongousRegionSet* humongous_proxy_set, 4.175 - HRRSCleanupTask* hrrs_cleanup_task); 4.176 + HRRSCleanupTask* hrrs_cleanup_task) : 4.177 + _g1(g1), _worker_num(worker_num), 4.178 + _max_live_bytes(0), _regions_claimed(0), 4.179 + _freed_bytes(0), 4.180 + _claimed_region_time(0.0), _max_region_time(0.0), 4.181 + _local_cleanup_list(local_cleanup_list), 4.182 + _old_proxy_set(old_proxy_set), 4.183 + _humongous_proxy_set(humongous_proxy_set), 4.184 + _hrrs_cleanup_task(hrrs_cleanup_task) { } 4.185 + 4.186 size_t freed_bytes() { return _freed_bytes; } 4.187 4.188 - bool doHeapRegion(HeapRegion *r); 4.189 + bool doHeapRegion(HeapRegion *hr) { 4.190 + // We use a claim value of zero here because all regions 4.191 + // were claimed with value 1 in the FinalCount task. 4.192 + hr->reset_gc_time_stamp(); 4.193 + if (!hr->continuesHumongous()) { 4.194 + double start = os::elapsedTime(); 4.195 + _regions_claimed++; 4.196 + hr->note_end_of_marking(); 4.197 + _max_live_bytes += hr->max_live_bytes(); 4.198 + _g1->free_region_if_empty(hr, 4.199 + &_freed_bytes, 4.200 + _local_cleanup_list, 4.201 + _old_proxy_set, 4.202 + _humongous_proxy_set, 4.203 + _hrrs_cleanup_task, 4.204 + true /* par */); 4.205 + double region_time = (os::elapsedTime() - start); 4.206 + _claimed_region_time += region_time; 4.207 + if (region_time > _max_region_time) { 4.208 + _max_region_time = region_time; 4.209 + } 4.210 + } 4.211 + return false; 4.212 + } 4.213 4.214 size_t max_live_bytes() { return _max_live_bytes; } 4.215 size_t regions_claimed() { return _regions_claimed; } 4.216 @@ -1568,6 +1657,7 @@ 4.217 &hrrs_cleanup_task); 4.218 if (G1CollectedHeap::use_parallel_gc_threads()) { 4.219 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 4.220 + _g1h->workers()->active_workers(), 4.221 HeapRegion::NoteEndClaimValue); 4.222 } else { 4.223 _g1h->heap_region_iterate(&g1_note_end); 4.224 @@ -1644,47 +1734,6 @@ 4.225 4.226 }; 4.227 4.228 -G1NoteEndOfConcMarkClosure:: 4.229 -G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 4.230 - int worker_num, 4.231 - FreeRegionList* local_cleanup_list, 4.232 - OldRegionSet* old_proxy_set, 4.233 - HumongousRegionSet* humongous_proxy_set, 4.234 - HRRSCleanupTask* hrrs_cleanup_task) 4.235 - : _g1(g1), _worker_num(worker_num), 4.236 - _max_live_bytes(0), _regions_claimed(0), 4.237 - _freed_bytes(0), 4.238 - _claimed_region_time(0.0), _max_region_time(0.0), 4.239 - _local_cleanup_list(local_cleanup_list), 4.240 - _old_proxy_set(old_proxy_set), 4.241 - _humongous_proxy_set(humongous_proxy_set), 4.242 - _hrrs_cleanup_task(hrrs_cleanup_task) { } 4.243 - 4.244 -bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) { 4.245 - // We use a claim value of zero here because all regions 4.246 - // were claimed with value 1 in the FinalCount task. 4.247 - hr->reset_gc_time_stamp(); 4.248 - if (!hr->continuesHumongous()) { 4.249 - double start = os::elapsedTime(); 4.250 - _regions_claimed++; 4.251 - hr->note_end_of_marking(); 4.252 - _max_live_bytes += hr->max_live_bytes(); 4.253 - _g1->free_region_if_empty(hr, 4.254 - &_freed_bytes, 4.255 - _local_cleanup_list, 4.256 - _old_proxy_set, 4.257 - _humongous_proxy_set, 4.258 - _hrrs_cleanup_task, 4.259 - true /* par */); 4.260 - double region_time = (os::elapsedTime() - start); 4.261 - _claimed_region_time += region_time; 4.262 - if (region_time > _max_region_time) { 4.263 - _max_region_time = region_time; 4.264 - } 4.265 - } 4.266 - return false; 4.267 -} 4.268 - 4.269 void ConcurrentMark::cleanup() { 4.270 // world is stopped at this checkpoint 4.271 assert(SafepointSynchronize::is_at_safepoint(), 4.272 @@ -1716,6 +1765,9 @@ 4.273 4.274 HeapRegionRemSet::reset_for_cleanup_tasks(); 4.275 4.276 + g1h->set_par_threads(); 4.277 + size_t n_workers = g1h->n_par_threads(); 4.278 + 4.279 // Do counting once more with the world stopped for good measure. 4.280 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 4.281 &_region_bm, &_card_bm); 4.282 @@ -1724,9 +1776,10 @@ 4.283 HeapRegion::InitialClaimValue), 4.284 "sanity check"); 4.285 4.286 - int n_workers = g1h->workers()->total_workers(); 4.287 - g1h->set_par_threads(n_workers); 4.288 + assert(g1h->n_par_threads() == (int) n_workers, 4.289 + "Should not have been reset"); 4.290 g1h->workers()->run_task(&g1_par_count_task); 4.291 + // Done with the parallel phase so reset to 0. 4.292 g1h->set_par_threads(0); 4.293 4.294 assert(g1h->check_heap_region_claim_values( 4.295 @@ -1776,8 +1829,7 @@ 4.296 double note_end_start = os::elapsedTime(); 4.297 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list); 4.298 if (G1CollectedHeap::use_parallel_gc_threads()) { 4.299 - int n_workers = g1h->workers()->total_workers(); 4.300 - g1h->set_par_threads(n_workers); 4.301 + g1h->set_par_threads((int)n_workers); 4.302 g1h->workers()->run_task(&g1_par_note_end_task); 4.303 g1h->set_par_threads(0); 4.304 4.305 @@ -1806,8 +1858,7 @@ 4.306 double rs_scrub_start = os::elapsedTime(); 4.307 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 4.308 if (G1CollectedHeap::use_parallel_gc_threads()) { 4.309 - int n_workers = g1h->workers()->total_workers(); 4.310 - g1h->set_par_threads(n_workers); 4.311 + g1h->set_par_threads((int)n_workers); 4.312 g1h->workers()->run_task(&g1_par_scrub_rs_task); 4.313 g1h->set_par_threads(0); 4.314 4.315 @@ -1825,7 +1876,7 @@ 4.316 4.317 // this will also free any regions totally full of garbage objects, 4.318 // and sort the regions. 4.319 - g1h->g1_policy()->record_concurrent_mark_cleanup_end(); 4.320 + g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers); 4.321 4.322 // Statistics. 4.323 double end = os::elapsedTime(); 4.324 @@ -1991,16 +2042,12 @@ 4.325 class G1CMParKeepAliveAndDrainClosure: public OopClosure { 4.326 ConcurrentMark* _cm; 4.327 CMTask* _task; 4.328 - CMBitMap* _bitMap; 4.329 int _ref_counter_limit; 4.330 int _ref_counter; 4.331 public: 4.332 - G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, 4.333 - CMTask* task, 4.334 - CMBitMap* bitMap) : 4.335 - _cm(cm), _task(task), _bitMap(bitMap), 4.336 - _ref_counter_limit(G1RefProcDrainInterval) 4.337 - { 4.338 + G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) : 4.339 + _cm(cm), _task(task), 4.340 + _ref_counter_limit(G1RefProcDrainInterval) { 4.341 assert(_ref_counter_limit > 0, "sanity"); 4.342 _ref_counter = _ref_counter_limit; 4.343 } 4.344 @@ -2091,19 +2138,16 @@ 4.345 private: 4.346 G1CollectedHeap* _g1h; 4.347 ConcurrentMark* _cm; 4.348 - CMBitMap* _bitmap; 4.349 WorkGang* _workers; 4.350 int _active_workers; 4.351 4.352 public: 4.353 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h, 4.354 ConcurrentMark* cm, 4.355 - CMBitMap* bitmap, 4.356 WorkGang* workers, 4.357 int n_workers) : 4.358 - _g1h(g1h), _cm(cm), _bitmap(bitmap), 4.359 - _workers(workers), _active_workers(n_workers) 4.360 - { } 4.361 + _g1h(g1h), _cm(cm), 4.362 + _workers(workers), _active_workers(n_workers) { } 4.363 4.364 // Executes the given task using concurrent marking worker threads. 4.365 virtual void execute(ProcessTask& task); 4.366 @@ -2115,21 +2159,18 @@ 4.367 ProcessTask& _proc_task; 4.368 G1CollectedHeap* _g1h; 4.369 ConcurrentMark* _cm; 4.370 - CMBitMap* _bitmap; 4.371 4.372 public: 4.373 G1CMRefProcTaskProxy(ProcessTask& proc_task, 4.374 G1CollectedHeap* g1h, 4.375 - ConcurrentMark* cm, 4.376 - CMBitMap* bitmap) : 4.377 + ConcurrentMark* cm) : 4.378 AbstractGangTask("Process reference objects in parallel"), 4.379 - _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap) 4.380 - {} 4.381 + _proc_task(proc_task), _g1h(g1h), _cm(cm) { } 4.382 4.383 virtual void work(int i) { 4.384 CMTask* marking_task = _cm->task(i); 4.385 G1CMIsAliveClosure g1_is_alive(_g1h); 4.386 - G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap); 4.387 + G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task); 4.388 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task); 4.389 4.390 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain); 4.391 @@ -2139,7 +2180,7 @@ 4.392 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) { 4.393 assert(_workers != NULL, "Need parallel worker threads."); 4.394 4.395 - G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap); 4.396 + G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm); 4.397 4.398 // We need to reset the phase for each task execution so that 4.399 // the termination protocol of CMTask::do_marking_step works. 4.400 @@ -2156,8 +2197,7 @@ 4.401 public: 4.402 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) : 4.403 AbstractGangTask("Enqueue reference objects in parallel"), 4.404 - _enq_task(enq_task) 4.405 - { } 4.406 + _enq_task(enq_task) { } 4.407 4.408 virtual void work(int i) { 4.409 _enq_task.work(i); 4.410 @@ -2207,10 +2247,10 @@ 4.411 4.412 // We use the work gang from the G1CollectedHeap and we utilize all 4.413 // the worker threads. 4.414 - int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1; 4.415 + int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1; 4.416 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1); 4.417 4.418 - G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(), 4.419 + G1CMRefProcTaskExecutor par_task_executor(g1h, this, 4.420 g1h->workers(), active_workers); 4.421 4.422 if (rp->processing_is_mt()) { 4.423 @@ -2290,7 +2330,9 @@ 4.424 } 4.425 4.426 CMRemarkTask(ConcurrentMark* cm) : 4.427 - AbstractGangTask("Par Remark"), _cm(cm) { } 4.428 + AbstractGangTask("Par Remark"), _cm(cm) { 4.429 + _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers()); 4.430 + } 4.431 }; 4.432 4.433 void ConcurrentMark::checkpointRootsFinalWork() { 4.434 @@ -2302,16 +2344,21 @@ 4.435 4.436 if (G1CollectedHeap::use_parallel_gc_threads()) { 4.437 G1CollectedHeap::StrongRootsScope srs(g1h); 4.438 - // this is remark, so we'll use up all available threads 4.439 - int active_workers = ParallelGCThreads; 4.440 + // this is remark, so we'll use up all active threads 4.441 + int active_workers = g1h->workers()->active_workers(); 4.442 + if (active_workers == 0) { 4.443 + assert(active_workers > 0, "Should have been set earlier"); 4.444 + active_workers = ParallelGCThreads; 4.445 + g1h->workers()->set_active_workers(active_workers); 4.446 + } 4.447 set_phase(active_workers, false /* concurrent */); 4.448 + // Leave _parallel_marking_threads at it's 4.449 + // value originally calculated in the ConcurrentMark 4.450 + // constructor and pass values of the active workers 4.451 + // through the gang in the task. 4.452 4.453 CMRemarkTask remarkTask(this); 4.454 - // We will start all available threads, even if we decide that the 4.455 - // active_workers will be fewer. The extra ones will just bail out 4.456 - // immediately. 4.457 - int n_workers = g1h->workers()->total_workers(); 4.458 - g1h->set_par_threads(n_workers); 4.459 + g1h->set_par_threads(active_workers); 4.460 g1h->workers()->run_task(&remarkTask); 4.461 g1h->set_par_threads(0); 4.462 } else { 4.463 @@ -2859,8 +2906,10 @@ 4.464 } 4.465 } 4.466 4.467 -class CSMarkOopClosure: public OopClosure { 4.468 - friend class CSMarkBitMapClosure; 4.469 +// Closures used by ConcurrentMark::complete_marking_in_collection_set(). 4.470 + 4.471 +class CSetMarkOopClosure: public OopClosure { 4.472 + friend class CSetMarkBitMapClosure; 4.473 4.474 G1CollectedHeap* _g1h; 4.475 CMBitMap* _bm; 4.476 @@ -2870,6 +2919,7 @@ 4.477 int _ms_size; 4.478 int _ms_ind; 4.479 int _array_increment; 4.480 + int _worker_i; 4.481 4.482 bool push(oop obj, int arr_ind = 0) { 4.483 if (_ms_ind == _ms_size) { 4.484 @@ -2910,7 +2960,6 @@ 4.485 for (int j = arr_ind; j < lim; j++) { 4.486 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j)); 4.487 } 4.488 - 4.489 } else { 4.490 obj->oop_iterate(this); 4.491 } 4.492 @@ -2920,17 +2969,17 @@ 4.493 } 4.494 4.495 public: 4.496 - CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 4.497 + CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 4.498 _g1h(G1CollectedHeap::heap()), 4.499 _cm(cm), 4.500 _bm(cm->nextMarkBitMap()), 4.501 _ms_size(ms_size), _ms_ind(0), 4.502 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 4.503 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 4.504 - _array_increment(MAX2(ms_size/8, 16)) 4.505 - {} 4.506 - 4.507 - ~CSMarkOopClosure() { 4.508 + _array_increment(MAX2(ms_size/8, 16)), 4.509 + _worker_i(worker_i) { } 4.510 + 4.511 + ~CSetMarkOopClosure() { 4.512 FREE_C_HEAP_ARRAY(oop, _ms); 4.513 FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 4.514 } 4.515 @@ -2953,10 +3002,11 @@ 4.516 if (hr != NULL) { 4.517 if (hr->in_collection_set()) { 4.518 if (_g1h->is_obj_ill(obj)) { 4.519 - _bm->mark((HeapWord*)obj); 4.520 - if (!push(obj)) { 4.521 - gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 4.522 - set_abort(); 4.523 + if (_bm->parMark((HeapWord*)obj)) { 4.524 + if (!push(obj)) { 4.525 + gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed."); 4.526 + set_abort(); 4.527 + } 4.528 } 4.529 } 4.530 } else { 4.531 @@ -2967,19 +3017,19 @@ 4.532 } 4.533 }; 4.534 4.535 -class CSMarkBitMapClosure: public BitMapClosure { 4.536 - G1CollectedHeap* _g1h; 4.537 - CMBitMap* _bitMap; 4.538 - ConcurrentMark* _cm; 4.539 - CSMarkOopClosure _oop_cl; 4.540 +class CSetMarkBitMapClosure: public BitMapClosure { 4.541 + G1CollectedHeap* _g1h; 4.542 + CMBitMap* _bitMap; 4.543 + ConcurrentMark* _cm; 4.544 + CSetMarkOopClosure _oop_cl; 4.545 + int _worker_i; 4.546 + 4.547 public: 4.548 - CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 4.549 + CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) : 4.550 _g1h(G1CollectedHeap::heap()), 4.551 _bitMap(cm->nextMarkBitMap()), 4.552 - _oop_cl(cm, ms_size) 4.553 - {} 4.554 - 4.555 - ~CSMarkBitMapClosure() {} 4.556 + _oop_cl(cm, ms_size, worker_i), 4.557 + _worker_i(worker_i) { } 4.558 4.559 bool do_bit(size_t offset) { 4.560 // convert offset into a HeapWord* 4.561 @@ -3001,53 +3051,69 @@ 4.562 } 4.563 }; 4.564 4.565 - 4.566 -class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 4.567 - CMBitMap* _bm; 4.568 - CSMarkBitMapClosure _bit_cl; 4.569 +class CompleteMarkingInCSetHRClosure: public HeapRegionClosure { 4.570 + CMBitMap* _bm; 4.571 + CSetMarkBitMapClosure _bit_cl; 4.572 + int _worker_i; 4.573 + 4.574 enum SomePrivateConstants { 4.575 MSSize = 1000 4.576 }; 4.577 - bool _completed; 4.578 + 4.579 public: 4.580 - CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 4.581 + CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) : 4.582 _bm(cm->nextMarkBitMap()), 4.583 - _bit_cl(cm, MSSize), 4.584 - _completed(true) 4.585 - {} 4.586 - 4.587 - ~CompleteMarkingInCSHRClosure() {} 4.588 - 4.589 - bool doHeapRegion(HeapRegion* r) { 4.590 - if (!r->evacuation_failed()) { 4.591 - MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 4.592 - if (!mr.is_empty()) { 4.593 - if (!_bm->iterate(&_bit_cl, mr)) { 4.594 - _completed = false; 4.595 - return true; 4.596 + _bit_cl(cm, MSSize, worker_i), 4.597 + _worker_i(worker_i) { } 4.598 + 4.599 + bool doHeapRegion(HeapRegion* hr) { 4.600 + if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) { 4.601 + // The current worker has successfully claimed the region. 4.602 + if (!hr->evacuation_failed()) { 4.603 + MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start()); 4.604 + if (!mr.is_empty()) { 4.605 + bool done = false; 4.606 + while (!done) { 4.607 + done = _bm->iterate(&_bit_cl, mr); 4.608 + } 4.609 } 4.610 } 4.611 } 4.612 return false; 4.613 } 4.614 - 4.615 - bool completed() { return _completed; } 4.616 }; 4.617 4.618 -class ClearMarksInHRClosure: public HeapRegionClosure { 4.619 - CMBitMap* _bm; 4.620 +class SetClaimValuesInCSetHRClosure: public HeapRegionClosure { 4.621 + jint _claim_value; 4.622 + 4.623 public: 4.624 - ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 4.625 - 4.626 - bool doHeapRegion(HeapRegion* r) { 4.627 - if (!r->used_region().is_empty() && !r->evacuation_failed()) { 4.628 - MemRegion usedMR = r->used_region(); 4.629 - _bm->clearRange(r->used_region()); 4.630 - } 4.631 + SetClaimValuesInCSetHRClosure(jint claim_value) : 4.632 + _claim_value(claim_value) { } 4.633 + 4.634 + bool doHeapRegion(HeapRegion* hr) { 4.635 + hr->set_claim_value(_claim_value); 4.636 return false; 4.637 } 4.638 }; 4.639 4.640 +class G1ParCompleteMarkInCSetTask: public AbstractGangTask { 4.641 +protected: 4.642 + G1CollectedHeap* _g1h; 4.643 + ConcurrentMark* _cm; 4.644 + 4.645 +public: 4.646 + G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h, 4.647 + ConcurrentMark* cm) : 4.648 + AbstractGangTask("Complete Mark in CSet"), 4.649 + _g1h(g1h), _cm(cm) { } 4.650 + 4.651 + void work(int worker_i) { 4.652 + CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i); 4.653 + HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i); 4.654 + _g1h->collection_set_iterate_from(hr, &cmplt); 4.655 + } 4.656 +}; 4.657 + 4.658 void ConcurrentMark::complete_marking_in_collection_set() { 4.659 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4.660 4.661 @@ -3056,20 +3122,32 @@ 4.662 return; 4.663 } 4.664 4.665 - int i = 1; 4.666 double start = os::elapsedTime(); 4.667 - while (true) { 4.668 - i++; 4.669 - CompleteMarkingInCSHRClosure cmplt(this); 4.670 - g1h->collection_set_iterate(&cmplt); 4.671 - if (cmplt.completed()) break; 4.672 + int n_workers = g1h->workers()->total_workers(); 4.673 + 4.674 + G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this); 4.675 + 4.676 + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 4.677 + 4.678 + if (G1CollectedHeap::use_parallel_gc_threads()) { 4.679 + g1h->set_par_threads(n_workers); 4.680 + g1h->workers()->run_task(&complete_mark_task); 4.681 + g1h->set_par_threads(0); 4.682 + } else { 4.683 + complete_mark_task.work(0); 4.684 } 4.685 + 4.686 + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity"); 4.687 + 4.688 + // Now reset the claim values in the regions in the collection set. 4.689 + SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue); 4.690 + g1h->collection_set_iterate(&set_cv_cl); 4.691 + 4.692 + assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity"); 4.693 + 4.694 double end_time = os::elapsedTime(); 4.695 double elapsed_time_ms = (end_time - start) * 1000.0; 4.696 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 4.697 - 4.698 - ClearMarksInHRClosure clr(nextMarkBitMap()); 4.699 - g1h->collection_set_iterate(&clr); 4.700 } 4.701 4.702 // The next two methods deal with the following optimisation. Some
5.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp Thu Dec 01 13:42:41 2011 -0500 5.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp Fri Dec 02 08:52:53 2011 -0500 5.3 @@ -360,7 +360,7 @@ 5.4 friend class ConcurrentMarkThread; 5.5 friend class CMTask; 5.6 friend class CMBitMapClosure; 5.7 - friend class CSMarkOopClosure; 5.8 + friend class CSetMarkOopClosure; 5.9 friend class CMGlobalObjectClosure; 5.10 friend class CMRemarkTask; 5.11 friend class CMConcurrentMarkingTask; 5.12 @@ -375,7 +375,9 @@ 5.13 ConcurrentMarkThread* _cmThread; // the thread doing the work 5.14 G1CollectedHeap* _g1h; // the heap. 5.15 size_t _parallel_marking_threads; // the number of marking 5.16 - // threads we'll use 5.17 + // threads we're use 5.18 + size_t _max_parallel_marking_threads; // max number of marking 5.19 + // threads we'll ever use 5.20 double _sleep_factor; // how much we have to sleep, with 5.21 // respect to the work we just did, to 5.22 // meet the marking overhead goal 5.23 @@ -473,7 +475,7 @@ 5.24 5.25 double* _accum_task_vtime; // accumulated task vtime 5.26 5.27 - WorkGang* _parallel_workers; 5.28 + FlexibleWorkGang* _parallel_workers; 5.29 5.30 ForceOverflowSettings _force_overflow_conc; 5.31 ForceOverflowSettings _force_overflow_stw; 5.32 @@ -504,6 +506,7 @@ 5.33 5.34 // accessor methods 5.35 size_t parallel_marking_threads() { return _parallel_marking_threads; } 5.36 + size_t max_parallel_marking_threads() { return _max_parallel_marking_threads;} 5.37 double sleep_factor() { return _sleep_factor; } 5.38 double marking_task_overhead() { return _marking_task_overhead;} 5.39 double cleanup_sleep_factor() { return _cleanup_sleep_factor; } 5.40 @@ -709,6 +712,14 @@ 5.41 CMBitMapRO* prevMarkBitMap() const { return _prevMarkBitMap; } 5.42 CMBitMap* nextMarkBitMap() const { return _nextMarkBitMap; } 5.43 5.44 + // Returns the number of GC threads to be used in a concurrent 5.45 + // phase based on the number of GC threads being used in a STW 5.46 + // phase. 5.47 + size_t scale_parallel_threads(size_t n_par_threads); 5.48 + 5.49 + // Calculates the number of GC threads to be used in a concurrent phase. 5.50 + int calc_parallel_marking_threads(); 5.51 + 5.52 // The following three are interaction between CM and 5.53 // G1CollectedHeap 5.54
6.1 --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Thu Dec 01 13:42:41 2011 -0500 6.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp Fri Dec 02 08:52:53 2011 -0500 6.3 @@ -191,7 +191,11 @@ 6.4 VM_CGC_Operation op(&cl_cl, verbose_str); 6.5 VMThread::execute(&op); 6.6 } else { 6.7 + // We don't want to update the marking status if a GC pause 6.8 + // is already underway. 6.9 + _sts.join(); 6.10 g1h->set_marking_complete(); 6.11 + _sts.leave(); 6.12 } 6.13 6.14 // Check if cleanup set the free_regions_coming flag. If it
7.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Dec 01 13:42:41 2011 -0500 7.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Dec 02 08:52:53 2011 -0500 7.3 @@ -66,6 +66,18 @@ 7.4 // apply to TLAB allocation, which is not part of this interface: it 7.5 // is done by clients of this interface.) 7.6 7.7 +// Notes on implementation of parallelism in different tasks. 7.8 +// 7.9 +// G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism. 7.10 +// The number of GC workers is passed to heap_region_par_iterate_chunked(). 7.11 +// It does use run_task() which sets _n_workers in the task. 7.12 +// G1ParTask executes g1_process_strong_roots() -> 7.13 +// SharedHeap::process_strong_roots() which calls eventuall to 7.14 +// CardTableModRefBS::par_non_clean_card_iterate_work() which uses 7.15 +// SequentialSubTasksDone. SharedHeap::process_strong_roots() also 7.16 +// directly uses SubTasksDone (_process_strong_tasks field in SharedHeap). 7.17 +// 7.18 + 7.19 // Local to this file. 7.20 7.21 class RefineCardTableEntryClosure: public CardTableEntryClosure { 7.22 @@ -176,8 +188,7 @@ 7.23 hr->set_next_young_region(_head); 7.24 _head = hr; 7.25 7.26 - hr->set_young(); 7.27 - double yg_surv_rate = _g1h->g1_policy()->predict_yg_surv_rate((int)_length); 7.28 + _g1h->g1_policy()->set_region_eden(hr, (int) _length); 7.29 ++_length; 7.30 } 7.31 7.32 @@ -190,7 +201,6 @@ 7.33 _survivor_tail = hr; 7.34 } 7.35 _survivor_head = hr; 7.36 - 7.37 ++_survivor_length; 7.38 } 7.39 7.40 @@ -315,16 +325,20 @@ 7.41 _g1h->g1_policy()->note_start_adding_survivor_regions(); 7.42 _g1h->g1_policy()->finished_recalculating_age_indexes(true /* is_survivors */); 7.43 7.44 + int young_index_in_cset = 0; 7.45 for (HeapRegion* curr = _survivor_head; 7.46 curr != NULL; 7.47 curr = curr->get_next_young_region()) { 7.48 - _g1h->g1_policy()->set_region_survivors(curr); 7.49 + _g1h->g1_policy()->set_region_survivor(curr, young_index_in_cset); 7.50 7.51 // The region is a non-empty survivor so let's add it to 7.52 // the incremental collection set for the next evacuation 7.53 // pause. 7.54 _g1h->g1_policy()->add_region_to_incremental_cset_rhs(curr); 7.55 - } 7.56 + young_index_in_cset += 1; 7.57 + } 7.58 + assert((size_t) young_index_in_cset == _survivor_length, 7.59 + "post-condition"); 7.60 _g1h->g1_policy()->note_stop_adding_survivor_regions(); 7.61 7.62 _head = _survivor_head; 7.63 @@ -1154,6 +1168,7 @@ 7.64 void work(int i) { 7.65 RebuildRSOutOfRegionClosure rebuild_rs(_g1, i); 7.66 _g1->heap_region_par_iterate_chunked(&rebuild_rs, i, 7.67 + _g1->workers()->active_workers(), 7.68 HeapRegion::RebuildRSClaimValue); 7.69 } 7.70 }; 7.71 @@ -1358,12 +1373,32 @@ 7.72 } 7.73 7.74 // Rebuild remembered sets of all regions. 7.75 - 7.76 if (G1CollectedHeap::use_parallel_gc_threads()) { 7.77 + int n_workers = 7.78 + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), 7.79 + workers()->active_workers(), 7.80 + Threads::number_of_non_daemon_threads()); 7.81 + assert(UseDynamicNumberOfGCThreads || 7.82 + n_workers == workers()->total_workers(), 7.83 + "If not dynamic should be using all the workers"); 7.84 + workers()->set_active_workers(n_workers); 7.85 + // Set parallel threads in the heap (_n_par_threads) only 7.86 + // before a parallel phase and always reset it to 0 after 7.87 + // the phase so that the number of parallel threads does 7.88 + // no get carried forward to a serial phase where there 7.89 + // may be code that is "possibly_parallel". 7.90 + set_par_threads(n_workers); 7.91 + 7.92 ParRebuildRSTask rebuild_rs_task(this); 7.93 assert(check_heap_region_claim_values( 7.94 HeapRegion::InitialClaimValue), "sanity check"); 7.95 - set_par_threads(workers()->total_workers()); 7.96 + assert(UseDynamicNumberOfGCThreads || 7.97 + workers()->active_workers() == workers()->total_workers(), 7.98 + "Unless dynamic should use total workers"); 7.99 + // Use the most recent number of active workers 7.100 + assert(workers()->active_workers() > 0, 7.101 + "Active workers not properly set"); 7.102 + set_par_threads(workers()->active_workers()); 7.103 workers()->run_task(&rebuild_rs_task); 7.104 set_par_threads(0); 7.105 assert(check_heap_region_claim_values( 7.106 @@ -2475,11 +2510,17 @@ 7.107 void 7.108 G1CollectedHeap::heap_region_par_iterate_chunked(HeapRegionClosure* cl, 7.109 int worker, 7.110 + int no_of_par_workers, 7.111 jint claim_value) { 7.112 const size_t regions = n_regions(); 7.113 - const size_t worker_num = (G1CollectedHeap::use_parallel_gc_threads() ? ParallelGCThreads : 1); 7.114 + const size_t max_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 7.115 + no_of_par_workers : 7.116 + 1); 7.117 + assert(UseDynamicNumberOfGCThreads || 7.118 + no_of_par_workers == workers()->total_workers(), 7.119 + "Non dynamic should use fixed number of workers"); 7.120 // try to spread out the starting points of the workers 7.121 - const size_t start_index = regions / worker_num * (size_t) worker; 7.122 + const size_t start_index = regions / max_workers * (size_t) worker; 7.123 7.124 // each worker will actually look at all regions 7.125 for (size_t count = 0; count < regions; ++count) { 7.126 @@ -2576,10 +2617,10 @@ 7.127 _claim_value(claim_value), _failures(0), _sh_region(NULL) { } 7.128 bool doHeapRegion(HeapRegion* r) { 7.129 if (r->claim_value() != _claim_value) { 7.130 - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " 7.131 + gclog_or_tty->print_cr("Region " HR_FORMAT ", " 7.132 "claim value = %d, should be %d", 7.133 - r->bottom(), r->end(), r->claim_value(), 7.134 - _claim_value); 7.135 + HR_FORMAT_PARAMS(r), 7.136 + r->claim_value(), _claim_value); 7.137 ++_failures; 7.138 } 7.139 if (!r->isHumongous()) { 7.140 @@ -2588,9 +2629,9 @@ 7.141 _sh_region = r; 7.142 } else if (r->continuesHumongous()) { 7.143 if (r->humongous_start_region() != _sh_region) { 7.144 - gclog_or_tty->print_cr("Region ["PTR_FORMAT","PTR_FORMAT"), " 7.145 + gclog_or_tty->print_cr("Region " HR_FORMAT ", " 7.146 "HS = "PTR_FORMAT", should be "PTR_FORMAT, 7.147 - r->bottom(), r->end(), 7.148 + HR_FORMAT_PARAMS(r), 7.149 r->humongous_start_region(), 7.150 _sh_region); 7.151 ++_failures; 7.152 @@ -2608,8 +2649,63 @@ 7.153 heap_region_iterate(&cl); 7.154 return cl.failures() == 0; 7.155 } 7.156 + 7.157 +class CheckClaimValuesInCSetHRClosure: public HeapRegionClosure { 7.158 + jint _claim_value; 7.159 + size_t _failures; 7.160 + 7.161 +public: 7.162 + CheckClaimValuesInCSetHRClosure(jint claim_value) : 7.163 + _claim_value(claim_value), 7.164 + _failures(0) { } 7.165 + 7.166 + size_t failures() { 7.167 + return _failures; 7.168 + } 7.169 + 7.170 + bool doHeapRegion(HeapRegion* hr) { 7.171 + assert(hr->in_collection_set(), "how?"); 7.172 + assert(!hr->isHumongous(), "H-region in CSet"); 7.173 + if (hr->claim_value() != _claim_value) { 7.174 + gclog_or_tty->print_cr("CSet Region " HR_FORMAT ", " 7.175 + "claim value = %d, should be %d", 7.176 + HR_FORMAT_PARAMS(hr), 7.177 + hr->claim_value(), _claim_value); 7.178 + _failures += 1; 7.179 + } 7.180 + return false; 7.181 + } 7.182 +}; 7.183 + 7.184 +bool G1CollectedHeap::check_cset_heap_region_claim_values(jint claim_value) { 7.185 + CheckClaimValuesInCSetHRClosure cl(claim_value); 7.186 + collection_set_iterate(&cl); 7.187 + return cl.failures() == 0; 7.188 +} 7.189 #endif // ASSERT 7.190 7.191 +// We want the parallel threads to start their collection 7.192 +// set iteration at different collection set regions to 7.193 +// avoid contention. 7.194 +// If we have: 7.195 +// n collection set regions 7.196 +// p threads 7.197 +// Then thread t will start at region t * floor (n/p) 7.198 + 7.199 +HeapRegion* G1CollectedHeap::start_cset_region_for_worker(int worker_i) { 7.200 + HeapRegion* result = g1_policy()->collection_set(); 7.201 + if (G1CollectedHeap::use_parallel_gc_threads()) { 7.202 + size_t cs_size = g1_policy()->cset_region_length(); 7.203 + int n_workers = workers()->total_workers(); 7.204 + size_t cs_spans = cs_size / n_workers; 7.205 + size_t ind = cs_spans * worker_i; 7.206 + for (size_t i = 0; i < ind; i++) { 7.207 + result = result->next_in_collection_set(); 7.208 + } 7.209 + } 7.210 + return result; 7.211 +} 7.212 + 7.213 void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) { 7.214 HeapRegion* r = g1_policy()->collection_set(); 7.215 while (r != NULL) { 7.216 @@ -2918,6 +3014,7 @@ 7.217 HandleMark hm; 7.218 VerifyRegionClosure blk(_allow_dirty, true, _vo); 7.219 _g1h->heap_region_par_iterate_chunked(&blk, worker_i, 7.220 + _g1h->workers()->active_workers(), 7.221 HeapRegion::ParVerifyClaimValue); 7.222 if (blk.failures()) { 7.223 _failures = true; 7.224 @@ -2935,6 +3032,10 @@ 7.225 if (SafepointSynchronize::is_at_safepoint() || ! UseTLAB) { 7.226 if (!silent) { gclog_or_tty->print("Roots (excluding permgen) "); } 7.227 VerifyRootsClosure rootsCl(vo); 7.228 + 7.229 + assert(Thread::current()->is_VM_thread(), 7.230 + "Expected to be executed serially by the VM thread at this point"); 7.231 + 7.232 CodeBlobToOopClosure blobsCl(&rootsCl, /*do_marking=*/ false); 7.233 7.234 // We apply the relevant closures to all the oops in the 7.235 @@ -2979,7 +3080,10 @@ 7.236 "sanity check"); 7.237 7.238 G1ParVerifyTask task(this, allow_dirty, vo); 7.239 - int n_workers = workers()->total_workers(); 7.240 + assert(UseDynamicNumberOfGCThreads || 7.241 + workers()->active_workers() == workers()->total_workers(), 7.242 + "If not dynamic should be using all the workers"); 7.243 + int n_workers = workers()->active_workers(); 7.244 set_par_threads(n_workers); 7.245 workers()->run_task(&task); 7.246 set_par_threads(0); 7.247 @@ -2987,6 +3091,8 @@ 7.248 failures = true; 7.249 } 7.250 7.251 + // Checks that the expected amount of parallel work was done. 7.252 + // The implication is that n_workers is > 0. 7.253 assert(check_heap_region_claim_values(HeapRegion::ParVerifyClaimValue), 7.254 "sanity check"); 7.255 7.256 @@ -3210,8 +3316,6 @@ 7.257 } 7.258 } 7.259 7.260 -// <NEW PREDICTION> 7.261 - 7.262 double G1CollectedHeap::predict_region_elapsed_time_ms(HeapRegion *hr, 7.263 bool young) { 7.264 return _g1_policy->predict_region_elapsed_time_ms(hr, young); 7.265 @@ -3251,7 +3355,7 @@ 7.266 void 7.267 G1CollectedHeap::setup_surviving_young_words() { 7.268 guarantee( _surviving_young_words == NULL, "pre-condition" ); 7.269 - size_t array_length = g1_policy()->young_cset_length(); 7.270 + size_t array_length = g1_policy()->young_cset_region_length(); 7.271 _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, array_length); 7.272 if (_surviving_young_words == NULL) { 7.273 vm_exit_out_of_memory(sizeof(size_t) * array_length, 7.274 @@ -3268,7 +3372,7 @@ 7.275 void 7.276 G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) { 7.277 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 7.278 - size_t array_length = g1_policy()->young_cset_length(); 7.279 + size_t array_length = g1_policy()->young_cset_region_length(); 7.280 for (size_t i = 0; i < array_length; ++i) 7.281 _surviving_young_words[i] += surv_young_words[i]; 7.282 } 7.283 @@ -3280,8 +3384,6 @@ 7.284 _surviving_young_words = NULL; 7.285 } 7.286 7.287 -// </NEW PREDICTION> 7.288 - 7.289 #ifdef ASSERT 7.290 class VerifyCSetClosure: public HeapRegionClosure { 7.291 public: 7.292 @@ -3404,6 +3506,10 @@ 7.293 assert(check_young_list_well_formed(), 7.294 "young list should be well formed"); 7.295 7.296 + // Don't dynamically change the number of GC threads this early. A value of 7.297 + // 0 is used to indicate serial work. When parallel work is done, 7.298 + // it will be set. 7.299 + 7.300 { // Call to jvmpi::post_class_unload_events must occur outside of active GC 7.301 IsGCActiveMark x; 7.302 7.303 @@ -3617,7 +3723,8 @@ 7.304 double end_time_sec = os::elapsedTime(); 7.305 double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; 7.306 g1_policy()->record_pause_time_ms(pause_time_ms); 7.307 - g1_policy()->record_collection_pause_end(); 7.308 + int active_gc_threads = workers()->active_workers(); 7.309 + g1_policy()->record_collection_pause_end(active_gc_threads); 7.310 7.311 MemoryService::track_memory_usage(); 7.312 7.313 @@ -4158,7 +4265,7 @@ 7.314 // non-young regions (where the age is -1) 7.315 // We also add a few elements at the beginning and at the end in 7.316 // an attempt to eliminate cache contention 7.317 - size_t real_length = 1 + _g1h->g1_policy()->young_cset_length(); 7.318 + size_t real_length = 1 + _g1h->g1_policy()->young_cset_region_length(); 7.319 size_t array_length = PADDING_ELEM_NUM + 7.320 real_length + 7.321 PADDING_ELEM_NUM; 7.322 @@ -4564,13 +4671,13 @@ 7.323 } 7.324 7.325 public: 7.326 - G1ParTask(G1CollectedHeap* g1h, int workers, RefToScanQueueSet *task_queues) 7.327 + G1ParTask(G1CollectedHeap* g1h, 7.328 + RefToScanQueueSet *task_queues) 7.329 : AbstractGangTask("G1 collection"), 7.330 _g1h(g1h), 7.331 _queues(task_queues), 7.332 - _terminator(workers, _queues), 7.333 - _stats_lock(Mutex::leaf, "parallel G1 stats lock", true), 7.334 - _n_workers(workers) 7.335 + _terminator(0, _queues), 7.336 + _stats_lock(Mutex::leaf, "parallel G1 stats lock", true) 7.337 {} 7.338 7.339 RefToScanQueueSet* queues() { return _queues; } 7.340 @@ -4579,6 +4686,20 @@ 7.341 return queues()->queue(i); 7.342 } 7.343 7.344 + ParallelTaskTerminator* terminator() { return &_terminator; } 7.345 + 7.346 + virtual void set_for_termination(int active_workers) { 7.347 + // This task calls set_n_termination() in par_non_clean_card_iterate_work() 7.348 + // in the young space (_par_seq_tasks) in the G1 heap 7.349 + // for SequentialSubTasksDone. 7.350 + // This task also uses SubTasksDone in SharedHeap and G1CollectedHeap 7.351 + // both of which need setting by set_n_termination(). 7.352 + _g1h->SharedHeap::set_n_termination(active_workers); 7.353 + _g1h->set_n_termination(active_workers); 7.354 + terminator()->reset_for_reuse(active_workers); 7.355 + _n_workers = active_workers; 7.356 + } 7.357 + 7.358 void work(int i) { 7.359 if (i >= _n_workers) return; // no work needed this round 7.360 7.361 @@ -4863,12 +4984,12 @@ 7.362 private: 7.363 G1CollectedHeap* _g1h; 7.364 RefToScanQueueSet* _queues; 7.365 - WorkGang* _workers; 7.366 + FlexibleWorkGang* _workers; 7.367 int _active_workers; 7.368 7.369 public: 7.370 G1STWRefProcTaskExecutor(G1CollectedHeap* g1h, 7.371 - WorkGang* workers, 7.372 + FlexibleWorkGang* workers, 7.373 RefToScanQueueSet *task_queues, 7.374 int n_workers) : 7.375 _g1h(g1h), 7.376 @@ -5124,11 +5245,13 @@ 7.377 // referents points to another object which is also referenced by an 7.378 // object discovered by the STW ref processor. 7.379 7.380 - int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 7.381 - workers()->total_workers() : 1); 7.382 - 7.383 - set_par_threads(n_workers); 7.384 - G1ParPreserveCMReferentsTask keep_cm_referents(this, n_workers, _task_queues); 7.385 + int active_workers = (G1CollectedHeap::use_parallel_gc_threads() ? 7.386 + workers()->active_workers() : 1); 7.387 + 7.388 + assert(active_workers == workers()->active_workers(), 7.389 + "Need to reset active_workers"); 7.390 + set_par_threads(active_workers); 7.391 + G1ParPreserveCMReferentsTask keep_cm_referents(this, active_workers, _task_queues); 7.392 7.393 if (G1CollectedHeap::use_parallel_gc_threads()) { 7.394 workers()->run_task(&keep_cm_referents); 7.395 @@ -5194,7 +5317,6 @@ 7.396 NULL); 7.397 } else { 7.398 // Parallel reference processing 7.399 - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); 7.400 assert(rp->num_q() == active_workers, "sanity"); 7.401 assert(active_workers <= rp->max_num_q(), "sanity"); 7.402 7.403 @@ -5227,7 +5349,9 @@ 7.404 } else { 7.405 // Parallel reference enqueuing 7.406 7.407 - int active_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); 7.408 + int active_workers = (ParallelGCThreads > 0 ? workers()->active_workers() : 1); 7.409 + assert(active_workers == workers()->active_workers(), 7.410 + "Need to reset active_workers"); 7.411 assert(rp->num_q() == active_workers, "sanity"); 7.412 assert(active_workers <= rp->max_num_q(), "sanity"); 7.413 7.414 @@ -5254,9 +5378,24 @@ 7.415 concurrent_g1_refine()->set_use_cache(false); 7.416 concurrent_g1_refine()->clear_hot_cache_claimed_index(); 7.417 7.418 - int n_workers = (ParallelGCThreads > 0 ? workers()->total_workers() : 1); 7.419 - set_par_threads(n_workers); 7.420 - G1ParTask g1_par_task(this, n_workers, _task_queues); 7.421 + int n_workers; 7.422 + if (G1CollectedHeap::use_parallel_gc_threads()) { 7.423 + n_workers = 7.424 + AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(), 7.425 + workers()->active_workers(), 7.426 + Threads::number_of_non_daemon_threads()); 7.427 + assert(UseDynamicNumberOfGCThreads || 7.428 + n_workers == workers()->total_workers(), 7.429 + "If not dynamic should be using all the workers"); 7.430 + set_par_threads(n_workers); 7.431 + } else { 7.432 + assert(n_par_threads() == 0, 7.433 + "Should be the original non-parallel value"); 7.434 + n_workers = 1; 7.435 + } 7.436 + workers()->set_active_workers(n_workers); 7.437 + 7.438 + G1ParTask g1_par_task(this, _task_queues); 7.439 7.440 init_for_evac_failure(NULL); 7.441 7.442 @@ -5269,6 +5408,10 @@ 7.443 // The individual threads will set their evac-failure closures. 7.444 StrongRootsScope srs(this); 7.445 if (ParallelGCVerbose) G1ParScanThreadState::print_termination_stats_hdr(); 7.446 + // These tasks use ShareHeap::_process_strong_tasks 7.447 + assert(UseDynamicNumberOfGCThreads || 7.448 + workers()->active_workers() == workers()->total_workers(), 7.449 + "If not dynamic should be using all the workers"); 7.450 workers()->run_task(&g1_par_task); 7.451 } else { 7.452 StrongRootsScope srs(this); 7.453 @@ -5277,6 +5420,7 @@ 7.454 7.455 double par_time = (os::elapsedTime() - start_par) * 1000.0; 7.456 g1_policy()->record_par_time(par_time); 7.457 + 7.458 set_par_threads(0); 7.459 7.460 // Process any discovered reference objects - we have 7.461 @@ -5304,8 +5448,11 @@ 7.462 7.463 finalize_for_evac_failure(); 7.464 7.465 - // Must do this before removing self-forwarding pointers, which clears 7.466 - // the per-region evac-failure flags. 7.467 + // Must do this before clearing the per-region evac-failure flags 7.468 + // (which is currently done when we free the collection set). 7.469 + // We also only do this if marking is actually in progress and so 7.470 + // have to do this before we set the mark_in_progress flag at the 7.471 + // end of an initial mark pause. 7.472 concurrent_mark()->complete_marking_in_collection_set(); 7.473 7.474 if (evacuation_failed()) { 7.475 @@ -5567,7 +5714,6 @@ 7.476 7.477 while (cur != NULL) { 7.478 assert(!is_on_master_free_list(cur), "sanity"); 7.479 - 7.480 if (non_young) { 7.481 if (cur->is_young()) { 7.482 double end_sec = os::elapsedTime(); 7.483 @@ -5578,12 +5724,14 @@ 7.484 non_young = false; 7.485 } 7.486 } else { 7.487 - double end_sec = os::elapsedTime(); 7.488 - double elapsed_ms = (end_sec - start_sec) * 1000.0; 7.489 - young_time_ms += elapsed_ms; 7.490 - 7.491 - start_sec = os::elapsedTime(); 7.492 - non_young = true; 7.493 + if (!cur->is_young()) { 7.494 + double end_sec = os::elapsedTime(); 7.495 + double elapsed_ms = (end_sec - start_sec) * 1000.0; 7.496 + young_time_ms += elapsed_ms; 7.497 + 7.498 + start_sec = os::elapsedTime(); 7.499 + non_young = true; 7.500 + } 7.501 } 7.502 7.503 rs_lengths += cur->rem_set()->occupied(); 7.504 @@ -5595,8 +5743,8 @@ 7.505 7.506 if (cur->is_young()) { 7.507 int index = cur->young_index_in_cset(); 7.508 - guarantee( index != -1, "invariant" ); 7.509 - guarantee( (size_t)index < policy->young_cset_length(), "invariant" ); 7.510 + assert(index != -1, "invariant"); 7.511 + assert((size_t) index < policy->young_cset_region_length(), "invariant"); 7.512 size_t words_survived = _surviving_young_words[index]; 7.513 cur->record_surv_words_in_group(words_survived); 7.514 7.515 @@ -5607,7 +5755,7 @@ 7.516 cur->set_next_young_region(NULL); 7.517 } else { 7.518 int index = cur->young_index_in_cset(); 7.519 - guarantee( index == -1, "invariant" ); 7.520 + assert(index == -1, "invariant"); 7.521 } 7.522 7.523 assert( (cur->is_young() && cur->young_index_in_cset() > -1) || 7.524 @@ -5615,13 +5763,26 @@ 7.525 "invariant" ); 7.526 7.527 if (!cur->evacuation_failed()) { 7.528 + MemRegion used_mr = cur->used_region(); 7.529 + 7.530 // And the region is empty. 7.531 - assert(!cur->is_empty(), "Should not have empty regions in a CS."); 7.532 + assert(!used_mr.is_empty(), "Should not have empty regions in a CS."); 7.533 + 7.534 + // If marking is in progress then clear any objects marked in 7.535 + // the current region. Note mark_in_progress() returns false, 7.536 + // even during an initial mark pause, until the set_marking_started() 7.537 + // call which takes place later in the pause. 7.538 + if (mark_in_progress()) { 7.539 + assert(!g1_policy()->during_initial_mark_pause(), "sanity"); 7.540 + _cm->nextMarkBitMap()->clearRange(used_mr); 7.541 + } 7.542 + 7.543 free_region(cur, &pre_used, &local_free_list, false /* par */); 7.544 } else { 7.545 cur->uninstall_surv_rate_group(); 7.546 - if (cur->is_young()) 7.547 + if (cur->is_young()) { 7.548 cur->set_young_index_in_cset(-1); 7.549 + } 7.550 cur->set_not_young(); 7.551 cur->set_evacuation_failed(false); 7.552 // The region is now considered to be old. 7.553 @@ -5635,10 +5796,12 @@ 7.554 7.555 double end_sec = os::elapsedTime(); 7.556 double elapsed_ms = (end_sec - start_sec) * 1000.0; 7.557 - if (non_young) 7.558 + 7.559 + if (non_young) { 7.560 non_young_time_ms += elapsed_ms; 7.561 - else 7.562 + } else { 7.563 young_time_ms += elapsed_ms; 7.564 + } 7.565 7.566 update_sets_after_freeing_regions(pre_used, &local_free_list, 7.567 NULL /* old_proxy_set */, 7.568 @@ -5722,7 +5885,6 @@ 7.569 assert(heap_lock_held_for_gc(), 7.570 "the heap lock should already be held by or for this thread"); 7.571 _young_list->push_region(hr); 7.572 - g1_policy()->set_region_short_lived(hr); 7.573 } 7.574 7.575 class NoYoungRegionsClosure: public HeapRegionClosure { 7.576 @@ -5880,7 +6042,6 @@ 7.577 HeapRegion* new_alloc_region = new_region(word_size, 7.578 false /* do_expand */); 7.579 if (new_alloc_region != NULL) { 7.580 - g1_policy()->update_region_num(true /* next_is_young */); 7.581 set_region_short_lived_locked(new_alloc_region); 7.582 _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full); 7.583 return new_alloc_region; 7.584 @@ -5908,6 +6069,21 @@ 7.585 return _g1h->new_mutator_alloc_region(word_size, force); 7.586 } 7.587 7.588 +void G1CollectedHeap::set_par_threads() { 7.589 + // Don't change the number of workers. Use the value previously set 7.590 + // in the workgroup. 7.591 + int n_workers = workers()->active_workers(); 7.592 + assert(UseDynamicNumberOfGCThreads || 7.593 + n_workers == workers()->total_workers(), 7.594 + "Otherwise should be using the total number of workers"); 7.595 + if (n_workers == 0) { 7.596 + assert(false, "Should have been set in prior evacuation pause."); 7.597 + n_workers = ParallelGCThreads; 7.598 + workers()->set_active_workers(n_workers); 7.599 + } 7.600 + set_par_threads(n_workers); 7.601 +} 7.602 + 7.603 void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, 7.604 size_t allocated_bytes) { 7.605 _g1h->retire_mutator_alloc_region(alloc_region, allocated_bytes);
8.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Thu Dec 01 13:42:41 2011 -0500 8.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Dec 02 08:52:53 2011 -0500 8.3 @@ -987,6 +987,16 @@ 8.4 8.5 void set_par_threads(int t) { 8.6 SharedHeap::set_par_threads(t); 8.7 + // Done in SharedHeap but oddly there are 8.8 + // two _process_strong_tasks's in a G1CollectedHeap 8.9 + // so do it here too. 8.10 + _process_strong_tasks->set_n_threads(t); 8.11 + } 8.12 + 8.13 + // Set _n_par_threads according to a policy TBD. 8.14 + void set_par_threads(); 8.15 + 8.16 + void set_n_termination(int t) { 8.17 _process_strong_tasks->set_n_threads(t); 8.18 } 8.19 8.20 @@ -1276,6 +1286,7 @@ 8.21 // i.e., that a closure never attempt to abort a traversal. 8.22 void heap_region_par_iterate_chunked(HeapRegionClosure* blk, 8.23 int worker, 8.24 + int no_of_par_workers, 8.25 jint claim_value); 8.26 8.27 // It resets all the region claim values to the default. 8.28 @@ -1283,8 +1294,17 @@ 8.29 8.30 #ifdef ASSERT 8.31 bool check_heap_region_claim_values(jint claim_value); 8.32 + 8.33 + // Same as the routine above but only checks regions in the 8.34 + // current collection set. 8.35 + bool check_cset_heap_region_claim_values(jint claim_value); 8.36 #endif // ASSERT 8.37 8.38 + // Given the id of a worker, calculate a suitable 8.39 + // starting region for iterating over the current 8.40 + // collection set. 8.41 + HeapRegion* start_cset_region_for_worker(int worker_i); 8.42 + 8.43 // Iterate over the regions (if any) in the current collection set. 8.44 void collection_set_iterate(HeapRegionClosure* blk); 8.45 8.46 @@ -1610,16 +1630,12 @@ 8.47 public: 8.48 void stop_conc_gc_threads(); 8.49 8.50 - // <NEW PREDICTION> 8.51 - 8.52 double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); 8.53 void check_if_region_is_too_expensive(double predicted_time_ms); 8.54 size_t pending_card_num(); 8.55 size_t max_pending_card_num(); 8.56 size_t cards_scanned(); 8.57 8.58 - // </NEW PREDICTION> 8.59 - 8.60 protected: 8.61 size_t _max_heap_capacity; 8.62 };
9.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Dec 01 13:42:41 2011 -0500 9.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Dec 02 08:52:53 2011 -0500 9.3 @@ -36,10 +36,6 @@ 9.4 #include "runtime/mutexLocker.hpp" 9.5 #include "utilities/debug.hpp" 9.6 9.7 -#define PREDICTIONS_VERBOSE 0 9.8 - 9.9 -// <NEW PREDICTION> 9.10 - 9.11 // Different defaults for different number of GC threads 9.12 // They were chosen by running GCOld and SPECjbb on debris with different 9.13 // numbers of GC threads and choosing them based on the results 9.14 @@ -80,8 +76,6 @@ 9.15 1.0, 0.7, 0.7, 0.5, 0.5, 0.42, 0.42, 0.30 9.16 }; 9.17 9.18 -// </NEW PREDICTION> 9.19 - 9.20 // Help class for avoiding interleaved logging 9.21 class LineBuffer: public StackObj { 9.22 9.23 @@ -137,10 +131,6 @@ 9.24 _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads() 9.25 ? ParallelGCThreads : 1), 9.26 9.27 - _n_pauses(0), 9.28 - _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.29 - _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.30 - _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.31 _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.32 _all_pause_times_ms(new NumberSeq()), 9.33 _stop_world_start(0.0), 9.34 @@ -148,11 +138,10 @@ 9.35 _all_yield_times_ms(new NumberSeq()), 9.36 _using_new_ratio_calculations(false), 9.37 9.38 - _all_mod_union_times_ms(new NumberSeq()), 9.39 - 9.40 _summary(new Summary()), 9.41 9.42 _cur_clear_ct_time_ms(0.0), 9.43 + _mark_closure_time_ms(0.0), 9.44 9.45 _cur_ref_proc_time_ms(0.0), 9.46 _cur_ref_enq_time_ms(0.0), 9.47 @@ -165,11 +154,6 @@ 9.48 _num_cc_clears(0L), 9.49 #endif 9.50 9.51 - _region_num_young(0), 9.52 - _region_num_tenured(0), 9.53 - _prev_region_num_young(0), 9.54 - _prev_region_num_tenured(0), 9.55 - 9.56 _aux_num(10), 9.57 _all_aux_times_ms(new NumberSeq[_aux_num]), 9.58 _cur_aux_start_times_ms(new double[_aux_num]), 9.59 @@ -179,8 +163,6 @@ 9.60 _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.61 _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.62 9.63 - // <NEW PREDICTION> 9.64 - 9.65 _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)), 9.66 _prev_collection_pause_end_ms(0.0), 9.67 _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)), 9.68 @@ -199,13 +181,10 @@ 9.69 new TruncatedSeq(TruncatedSeqLength)), 9.70 9.71 _pending_cards_seq(new TruncatedSeq(TruncatedSeqLength)), 9.72 - _scanned_cards_seq(new TruncatedSeq(TruncatedSeqLength)), 9.73 _rs_lengths_seq(new TruncatedSeq(TruncatedSeqLength)), 9.74 9.75 _pause_time_target_ms((double) MaxGCPauseMillis), 9.76 9.77 - // </NEW PREDICTION> 9.78 - 9.79 _full_young_gcs(true), 9.80 _full_young_pause_num(0), 9.81 _partial_young_pause_num(0), 9.82 @@ -221,16 +200,10 @@ 9.83 9.84 _recent_prev_end_times_for_all_gcs_sec(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.85 9.86 - _recent_CS_bytes_used_before(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.87 - _recent_CS_bytes_surviving(new TruncatedSeq(NumPrevPausesForHeuristics)), 9.88 - 9.89 _recent_avg_pause_time_ratio(0.0), 9.90 9.91 _all_full_gc_times_ms(new NumberSeq()), 9.92 9.93 - // G1PausesBtwnConcMark defaults to -1 9.94 - // so the hack is to do the cast QQQ FIXME 9.95 - _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), 9.96 _initiate_conc_mark_if_possible(false), 9.97 _during_initial_mark_pause(false), 9.98 _should_revert_to_full_young_gcs(false), 9.99 @@ -242,22 +215,21 @@ 9.100 9.101 _prev_collection_pause_used_at_end_bytes(0), 9.102 9.103 + _eden_cset_region_length(0), 9.104 + _survivor_cset_region_length(0), 9.105 + _old_cset_region_length(0), 9.106 + 9.107 _collection_set(NULL), 9.108 - _collection_set_size(0), 9.109 _collection_set_bytes_used_before(0), 9.110 9.111 // Incremental CSet attributes 9.112 _inc_cset_build_state(Inactive), 9.113 _inc_cset_head(NULL), 9.114 _inc_cset_tail(NULL), 9.115 - _inc_cset_size(0), 9.116 - _inc_cset_young_index(0), 9.117 _inc_cset_bytes_used_before(0), 9.118 _inc_cset_max_finger(NULL), 9.119 - _inc_cset_recorded_young_bytes(0), 9.120 _inc_cset_recorded_rs_lengths(0), 9.121 _inc_cset_predicted_elapsed_time_ms(0.0), 9.122 - _inc_cset_predicted_bytes_to_copy(0), 9.123 9.124 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 9.125 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 9.126 @@ -325,8 +297,6 @@ 9.127 // start conservatively 9.128 _expensive_region_limit_ms = 0.5 * (double) MaxGCPauseMillis; 9.129 9.130 - // <NEW PREDICTION> 9.131 - 9.132 int index; 9.133 if (ParallelGCThreads == 0) 9.134 index = 0; 9.135 @@ -348,8 +318,6 @@ 9.136 _non_young_other_cost_per_region_ms_seq->add( 9.137 non_young_other_cost_per_region_ms_defaults[index]); 9.138 9.139 - // </NEW PREDICTION> 9.140 - 9.141 // Below, we might need to calculate the pause time target based on 9.142 // the pause interval. When we do so we are going to give G1 maximum 9.143 // flexibility and allow it to do pauses when it needs to. So, we'll 9.144 @@ -908,9 +876,6 @@ 9.145 9.146 record_survivor_regions(0, NULL, NULL); 9.147 9.148 - _prev_region_num_young = _region_num_young; 9.149 - _prev_region_num_tenured = _region_num_tenured; 9.150 - 9.151 _free_regions_at_end_of_collection = _g1->free_regions(); 9.152 // Reset survivors SurvRateGroup. 9.153 _survivor_surv_rate_group->reset(); 9.154 @@ -982,10 +947,9 @@ 9.155 _cur_aux_times_set[i] = false; 9.156 } 9.157 9.158 - // These are initialized to zero here and they are set during 9.159 + // This is initialized to zero here and is set during 9.160 // the evacuation pause if marking is in progress. 9.161 _cur_satb_drain_time_ms = 0.0; 9.162 - _last_satb_drain_processed_buffers = 0; 9.163 9.164 _last_young_gc_full = false; 9.165 9.166 @@ -996,10 +960,6 @@ 9.167 assert( verify_young_ages(), "region age verification" ); 9.168 } 9.169 9.170 -void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { 9.171 - _mark_closure_time_ms = mark_closure_time_ms; 9.172 -} 9.173 - 9.174 void G1CollectorPolicy::record_concurrent_mark_init_end(double 9.175 mark_init_elapsed_time_ms) { 9.176 _during_marking = true; 9.177 @@ -1060,7 +1020,7 @@ 9.178 double total = 0.0; 9.179 LineBuffer buf(level); 9.180 buf.append("[%s (ms):", str); 9.181 - for (uint i = 0; i < ParallelGCThreads; ++i) { 9.182 + for (uint i = 0; i < no_of_gc_threads(); ++i) { 9.183 double val = data[i]; 9.184 if (val < min) 9.185 min = val; 9.186 @@ -1070,7 +1030,7 @@ 9.187 buf.append(" %3.1lf", val); 9.188 } 9.189 buf.append_and_print_cr(""); 9.190 - double avg = total / (double) ParallelGCThreads; 9.191 + double avg = total / (double) no_of_gc_threads(); 9.192 buf.append_and_print_cr(" Avg: %5.1lf, Min: %5.1lf, Max: %5.1lf, Diff: %5.1lf]", 9.193 avg, min, max, max - min); 9.194 } 9.195 @@ -1082,7 +1042,7 @@ 9.196 double total = 0.0; 9.197 LineBuffer buf(level); 9.198 buf.append("[%s :", str); 9.199 - for (uint i = 0; i < ParallelGCThreads; ++i) { 9.200 + for (uint i = 0; i < no_of_gc_threads(); ++i) { 9.201 double val = data[i]; 9.202 if (val < min) 9.203 min = val; 9.204 @@ -1092,7 +1052,7 @@ 9.205 buf.append(" %d", (int) val); 9.206 } 9.207 buf.append_and_print_cr(""); 9.208 - double avg = total / (double) ParallelGCThreads; 9.209 + double avg = total / (double) no_of_gc_threads(); 9.210 buf.append_and_print_cr(" Sum: %d, Avg: %d, Min: %d, Max: %d, Diff: %d]", 9.211 (int)total, (int)avg, (int)min, (int)max, (int)max - (int)min); 9.212 } 9.213 @@ -1112,10 +1072,10 @@ 9.214 double G1CollectorPolicy::avg_value(double* data) { 9.215 if (G1CollectedHeap::use_parallel_gc_threads()) { 9.216 double ret = 0.0; 9.217 - for (uint i = 0; i < ParallelGCThreads; ++i) { 9.218 + for (uint i = 0; i < no_of_gc_threads(); ++i) { 9.219 ret += data[i]; 9.220 } 9.221 - return ret / (double) ParallelGCThreads; 9.222 + return ret / (double) no_of_gc_threads(); 9.223 } else { 9.224 return data[0]; 9.225 } 9.226 @@ -1124,7 +1084,7 @@ 9.227 double G1CollectorPolicy::max_value(double* data) { 9.228 if (G1CollectedHeap::use_parallel_gc_threads()) { 9.229 double ret = data[0]; 9.230 - for (uint i = 1; i < ParallelGCThreads; ++i) { 9.231 + for (uint i = 1; i < no_of_gc_threads(); ++i) { 9.232 if (data[i] > ret) { 9.233 ret = data[i]; 9.234 } 9.235 @@ -1138,7 +1098,7 @@ 9.236 double G1CollectorPolicy::sum_of_values(double* data) { 9.237 if (G1CollectedHeap::use_parallel_gc_threads()) { 9.238 double sum = 0.0; 9.239 - for (uint i = 0; i < ParallelGCThreads; i++) { 9.240 + for (uint i = 0; i < no_of_gc_threads(); i++) { 9.241 sum += data[i]; 9.242 } 9.243 return sum; 9.244 @@ -1151,7 +1111,7 @@ 9.245 double ret = data1[0] + data2[0]; 9.246 9.247 if (G1CollectedHeap::use_parallel_gc_threads()) { 9.248 - for (uint i = 1; i < ParallelGCThreads; ++i) { 9.249 + for (uint i = 1; i < no_of_gc_threads(); ++i) { 9.250 double data = data1[i] + data2[i]; 9.251 if (data > ret) { 9.252 ret = data; 9.253 @@ -1164,16 +1124,19 @@ 9.254 // Anything below that is considered to be zero 9.255 #define MIN_TIMER_GRANULARITY 0.0000001 9.256 9.257 -void G1CollectorPolicy::record_collection_pause_end() { 9.258 +void G1CollectorPolicy::record_collection_pause_end(int no_of_gc_threads) { 9.259 double end_time_sec = os::elapsedTime(); 9.260 double elapsed_ms = _last_pause_time_ms; 9.261 bool parallel = G1CollectedHeap::use_parallel_gc_threads(); 9.262 + assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(), 9.263 + "otherwise, the subtraction below does not make sense"); 9.264 size_t rs_size = 9.265 - _cur_collection_pause_used_regions_at_start - collection_set_size(); 9.266 + _cur_collection_pause_used_regions_at_start - cset_region_length(); 9.267 size_t cur_used_bytes = _g1->used(); 9.268 assert(cur_used_bytes == _g1->recalculate_used(), "It should!"); 9.269 bool last_pause_included_initial_mark = false; 9.270 bool update_stats = !_g1->evacuation_failed(); 9.271 + set_no_of_gc_threads(no_of_gc_threads); 9.272 9.273 #ifndef PRODUCT 9.274 if (G1YoungSurvRateVerbose) { 9.275 @@ -1226,10 +1189,6 @@ 9.276 _mmu_tracker->add_pause(end_time_sec - elapsed_ms/1000.0, 9.277 end_time_sec, false); 9.278 9.279 - guarantee(_cur_collection_pause_used_regions_at_start >= 9.280 - collection_set_size(), 9.281 - "Negative RS size?"); 9.282 - 9.283 // This assert is exempted when we're doing parallel collection pauses, 9.284 // because the fragmentation caused by the parallel GC allocation buffers 9.285 // can lead to more memory being used during collection than was used 9.286 @@ -1253,8 +1212,6 @@ 9.287 (double)surviving_bytes/ 9.288 (double)_collection_set_bytes_used_before; 9.289 9.290 - _n_pauses++; 9.291 - 9.292 // These values are used to update the summary information that is 9.293 // displayed when TraceGen0Time is enabled, and are output as part 9.294 // of the PrintGCDetails output, in the non-parallel case. 9.295 @@ -1291,14 +1248,15 @@ 9.296 // current value of "other time" 9.297 other_time_ms -= _cur_clear_ct_time_ms; 9.298 9.299 + // Subtract the time spent completing marking in the collection 9.300 + // set. Note if marking is not in progress during the pause 9.301 + // the value of _mark_closure_time_ms will be zero. 9.302 + other_time_ms -= _mark_closure_time_ms; 9.303 + 9.304 // TraceGen0Time and TraceGen1Time summary info updating. 9.305 _all_pause_times_ms->add(elapsed_ms); 9.306 9.307 if (update_stats) { 9.308 - _recent_rs_scan_times_ms->add(scan_rs_time); 9.309 - _recent_pause_times_ms->add(elapsed_ms); 9.310 - _recent_rs_sizes->add(rs_size); 9.311 - 9.312 _summary->record_total_time_ms(elapsed_ms); 9.313 _summary->record_other_time_ms(other_time_ms); 9.314 9.315 @@ -1342,9 +1300,6 @@ 9.316 || surviving_bytes <= _collection_set_bytes_used_before, 9.317 "Or else negative collection!"); 9.318 9.319 - _recent_CS_bytes_used_before->add(_collection_set_bytes_used_before); 9.320 - _recent_CS_bytes_surviving->add(surviving_bytes); 9.321 - 9.322 // this is where we update the allocation rate of the application 9.323 double app_time_ms = 9.324 (_cur_collection_start_sec * 1000.0 - _prev_collection_pause_end_ms); 9.325 @@ -1354,13 +1309,17 @@ 9.326 // We'll just set it to something (arbitrarily) small. 9.327 app_time_ms = 1.0; 9.328 } 9.329 - size_t regions_allocated = 9.330 - (_region_num_young - _prev_region_num_young) + 9.331 - (_region_num_tenured - _prev_region_num_tenured); 9.332 + // We maintain the invariant that all objects allocated by mutator 9.333 + // threads will be allocated out of eden regions. So, we can use 9.334 + // the eden region number allocated since the previous GC to 9.335 + // calculate the application's allocate rate. The only exception 9.336 + // to that is humongous objects that are allocated separately. But 9.337 + // given that humongous object allocations do not really affect 9.338 + // either the pause's duration nor when the next pause will take 9.339 + // place we can safely ignore them here. 9.340 + size_t regions_allocated = eden_cset_region_length(); 9.341 double alloc_rate_ms = (double) regions_allocated / app_time_ms; 9.342 _alloc_rate_ms_seq->add(alloc_rate_ms); 9.343 - _prev_region_num_young = _region_num_young; 9.344 - _prev_region_num_tenured = _region_num_tenured; 9.345 9.346 double interval_ms = 9.347 (end_time_sec - _recent_prev_end_times_for_all_gcs_sec->oldest()) * 1000.0; 9.348 @@ -1398,33 +1357,6 @@ 9.349 } 9.350 } 9.351 9.352 - 9.353 - if (G1PolicyVerbose > 1) { 9.354 - gclog_or_tty->print_cr(" Recording collection pause(%d)", _n_pauses); 9.355 - } 9.356 - 9.357 - if (G1PolicyVerbose > 1) { 9.358 - gclog_or_tty->print_cr(" ET: %10.6f ms (avg: %10.6f ms)\n" 9.359 - " ET-RS: %10.6f ms (avg: %10.6f ms)\n" 9.360 - " |RS|: " SIZE_FORMAT, 9.361 - elapsed_ms, recent_avg_time_for_pauses_ms(), 9.362 - scan_rs_time, recent_avg_time_for_rs_scan_ms(), 9.363 - rs_size); 9.364 - 9.365 - gclog_or_tty->print_cr(" Used at start: " SIZE_FORMAT"K" 9.366 - " At end " SIZE_FORMAT "K\n" 9.367 - " garbage : " SIZE_FORMAT "K" 9.368 - " of " SIZE_FORMAT "K\n" 9.369 - " survival : %6.2f%% (%6.2f%% avg)", 9.370 - _cur_collection_pause_used_at_start_bytes/K, 9.371 - _g1->used()/K, freed_bytes/K, 9.372 - _collection_set_bytes_used_before/K, 9.373 - survival_fraction*100.0, 9.374 - recent_avg_survival_fraction()*100.0); 9.375 - gclog_or_tty->print_cr(" Recent %% gc pause time: %6.2f", 9.376 - recent_avg_pause_time_ratio() * 100.0); 9.377 - } 9.378 - 9.379 // PrintGCDetails output 9.380 if (PrintGCDetails) { 9.381 bool print_marking_info = 9.382 @@ -1436,7 +1368,6 @@ 9.383 9.384 if (print_marking_info) { 9.385 print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); 9.386 - print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); 9.387 } 9.388 9.389 if (parallel) { 9.390 @@ -1478,6 +1409,9 @@ 9.391 print_stats(1, "Scan RS", scan_rs_time); 9.392 print_stats(1, "Object Copying", obj_copy_time); 9.393 } 9.394 + if (print_marking_info) { 9.395 + print_stats(1, "Complete CSet Marking", _mark_closure_time_ms); 9.396 + } 9.397 print_stats(1, "Clear CT", _cur_clear_ct_time_ms); 9.398 #ifndef PRODUCT 9.399 print_stats(1, "Cur Clear CC", _cur_clear_cc_time_ms); 9.400 @@ -1489,9 +1423,14 @@ 9.401 } 9.402 #endif 9.403 print_stats(1, "Other", other_time_ms); 9.404 - print_stats(2, "Choose CSet", _recorded_young_cset_choice_time_ms); 9.405 + print_stats(2, "Choose CSet", 9.406 + (_recorded_young_cset_choice_time_ms + 9.407 + _recorded_non_young_cset_choice_time_ms)); 9.408 print_stats(2, "Ref Proc", _cur_ref_proc_time_ms); 9.409 print_stats(2, "Ref Enq", _cur_ref_enq_time_ms); 9.410 + print_stats(2, "Free CSet", 9.411 + (_recorded_young_free_cset_time_ms + 9.412 + _recorded_non_young_free_cset_time_ms)); 9.413 9.414 for (int i = 0; i < _aux_num; ++i) { 9.415 if (_cur_aux_times_set[i]) { 9.416 @@ -1576,8 +1515,6 @@ 9.417 _short_lived_surv_rate_group->start_adding_regions(); 9.418 // do that for any other surv rate groupsx 9.419 9.420 - // <NEW PREDICTION> 9.421 - 9.422 if (update_stats) { 9.423 double pause_time_ms = elapsed_ms; 9.424 9.425 @@ -1631,21 +1568,21 @@ 9.426 _mark_closure_time_ms + termination_time); 9.427 9.428 double young_other_time_ms = 0.0; 9.429 - if (_recorded_young_regions > 0) { 9.430 + if (young_cset_region_length() > 0) { 9.431 young_other_time_ms = 9.432 _recorded_young_cset_choice_time_ms + 9.433 _recorded_young_free_cset_time_ms; 9.434 _young_other_cost_per_region_ms_seq->add(young_other_time_ms / 9.435 - (double) _recorded_young_regions); 9.436 + (double) young_cset_region_length()); 9.437 } 9.438 double non_young_other_time_ms = 0.0; 9.439 - if (_recorded_non_young_regions > 0) { 9.440 + if (old_cset_region_length() > 0) { 9.441 non_young_other_time_ms = 9.442 _recorded_non_young_cset_choice_time_ms + 9.443 _recorded_non_young_free_cset_time_ms; 9.444 9.445 _non_young_other_cost_per_region_ms_seq->add(non_young_other_time_ms / 9.446 - (double) _recorded_non_young_regions); 9.447 + (double) old_cset_region_length()); 9.448 } 9.449 9.450 double constant_other_time_ms = all_other_time_ms - 9.451 @@ -1659,7 +1596,6 @@ 9.452 } 9.453 9.454 _pending_cards_seq->add((double) _pending_cards); 9.455 - _scanned_cards_seq->add((double) cards_scanned); 9.456 _rs_lengths_seq->add((double) _max_rs_lengths); 9.457 9.458 double expensive_region_limit_ms = 9.459 @@ -1670,49 +1606,6 @@ 9.460 expensive_region_limit_ms = (double) MaxGCPauseMillis; 9.461 } 9.462 _expensive_region_limit_ms = expensive_region_limit_ms; 9.463 - 9.464 - if (PREDICTIONS_VERBOSE) { 9.465 - gclog_or_tty->print_cr(""); 9.466 - gclog_or_tty->print_cr("PREDICTIONS %1.4lf %d " 9.467 - "REGIONS %d %d %d " 9.468 - "PENDING_CARDS %d %d " 9.469 - "CARDS_SCANNED %d %d " 9.470 - "RS_LENGTHS %d %d " 9.471 - "RS_UPDATE %1.6lf %1.6lf RS_SCAN %1.6lf %1.6lf " 9.472 - "SURVIVAL_RATIO %1.6lf %1.6lf " 9.473 - "OBJECT_COPY %1.6lf %1.6lf OTHER_CONSTANT %1.6lf %1.6lf " 9.474 - "OTHER_YOUNG %1.6lf %1.6lf " 9.475 - "OTHER_NON_YOUNG %1.6lf %1.6lf " 9.476 - "VTIME_DIFF %1.6lf TERMINATION %1.6lf " 9.477 - "ELAPSED %1.6lf %1.6lf ", 9.478 - _cur_collection_start_sec, 9.479 - (!_last_young_gc_full) ? 2 : 9.480 - (last_pause_included_initial_mark) ? 1 : 0, 9.481 - _recorded_region_num, 9.482 - _recorded_young_regions, 9.483 - _recorded_non_young_regions, 9.484 - _predicted_pending_cards, _pending_cards, 9.485 - _predicted_cards_scanned, cards_scanned, 9.486 - _predicted_rs_lengths, _max_rs_lengths, 9.487 - _predicted_rs_update_time_ms, update_rs_time, 9.488 - _predicted_rs_scan_time_ms, scan_rs_time, 9.489 - _predicted_survival_ratio, survival_ratio, 9.490 - _predicted_object_copy_time_ms, obj_copy_time, 9.491 - _predicted_constant_other_time_ms, constant_other_time_ms, 9.492 - _predicted_young_other_time_ms, young_other_time_ms, 9.493 - _predicted_non_young_other_time_ms, 9.494 - non_young_other_time_ms, 9.495 - _vtime_diff_ms, termination_time, 9.496 - _predicted_pause_time_ms, elapsed_ms); 9.497 - } 9.498 - 9.499 - if (G1PolicyVerbose > 0) { 9.500 - gclog_or_tty->print_cr("Pause Time, predicted: %1.4lfms (predicted %s), actual: %1.4lfms", 9.501 - _predicted_pause_time_ms, 9.502 - (_within_target) ? "within" : "outside", 9.503 - elapsed_ms); 9.504 - } 9.505 - 9.506 } 9.507 9.508 _in_marking_window = new_in_marking_window; 9.509 @@ -1723,7 +1616,6 @@ 9.510 // Note that _mmu_tracker->max_gc_time() returns the time in seconds. 9.511 double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSetUpdatingPauseTimePercent / 100.0; 9.512 adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); 9.513 - // </NEW PREDICTION> 9.514 9.515 assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); 9.516 } 9.517 @@ -1768,8 +1660,6 @@ 9.518 } 9.519 } 9.520 9.521 -// <NEW PREDICTION> 9.522 - 9.523 void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, 9.524 double update_rs_processed_buffers, 9.525 double goal_ms) { 9.526 @@ -1905,98 +1795,17 @@ 9.527 } 9.528 9.529 void 9.530 -G1CollectorPolicy::start_recording_regions() { 9.531 - _recorded_rs_lengths = 0; 9.532 - _recorded_young_regions = 0; 9.533 - _recorded_non_young_regions = 0; 9.534 - 9.535 -#if PREDICTIONS_VERBOSE 9.536 - _recorded_marked_bytes = 0; 9.537 - _recorded_young_bytes = 0; 9.538 - _predicted_bytes_to_copy = 0; 9.539 - _predicted_rs_lengths = 0; 9.540 - _predicted_cards_scanned = 0; 9.541 -#endif // PREDICTIONS_VERBOSE 9.542 -} 9.543 - 9.544 -void 9.545 -G1CollectorPolicy::record_cset_region_info(HeapRegion* hr, bool young) { 9.546 -#if PREDICTIONS_VERBOSE 9.547 - if (!young) { 9.548 - _recorded_marked_bytes += hr->max_live_bytes(); 9.549 - } 9.550 - _predicted_bytes_to_copy += predict_bytes_to_copy(hr); 9.551 -#endif // PREDICTIONS_VERBOSE 9.552 - 9.553 - size_t rs_length = hr->rem_set()->occupied(); 9.554 - _recorded_rs_lengths += rs_length; 9.555 -} 9.556 - 9.557 -void 9.558 -G1CollectorPolicy::record_non_young_cset_region(HeapRegion* hr) { 9.559 - assert(!hr->is_young(), "should not call this"); 9.560 - ++_recorded_non_young_regions; 9.561 - record_cset_region_info(hr, false); 9.562 -} 9.563 - 9.564 -void 9.565 -G1CollectorPolicy::set_recorded_young_regions(size_t n_regions) { 9.566 - _recorded_young_regions = n_regions; 9.567 -} 9.568 - 9.569 -void G1CollectorPolicy::set_recorded_young_bytes(size_t bytes) { 9.570 -#if PREDICTIONS_VERBOSE 9.571 - _recorded_young_bytes = bytes; 9.572 -#endif // PREDICTIONS_VERBOSE 9.573 +G1CollectorPolicy::init_cset_region_lengths(size_t eden_cset_region_length, 9.574 + size_t survivor_cset_region_length) { 9.575 + _eden_cset_region_length = eden_cset_region_length; 9.576 + _survivor_cset_region_length = survivor_cset_region_length; 9.577 + _old_cset_region_length = 0; 9.578 } 9.579 9.580 void G1CollectorPolicy::set_recorded_rs_lengths(size_t rs_lengths) { 9.581 _recorded_rs_lengths = rs_lengths; 9.582 } 9.583 9.584 -void G1CollectorPolicy::set_predicted_bytes_to_copy(size_t bytes) { 9.585 - _predicted_bytes_to_copy = bytes; 9.586 -} 9.587 - 9.588 -void 9.589 -G1CollectorPolicy::end_recording_regions() { 9.590 - // The _predicted_pause_time_ms field is referenced in code 9.591 - // not under PREDICTIONS_VERBOSE. Let's initialize it. 9.592 - _predicted_pause_time_ms = -1.0; 9.593 - 9.594 -#if PREDICTIONS_VERBOSE 9.595 - _predicted_pending_cards = predict_pending_cards(); 9.596 - _predicted_rs_lengths = _recorded_rs_lengths + predict_rs_length_diff(); 9.597 - if (full_young_gcs()) 9.598 - _predicted_cards_scanned += predict_young_card_num(_predicted_rs_lengths); 9.599 - else 9.600 - _predicted_cards_scanned += 9.601 - predict_non_young_card_num(_predicted_rs_lengths); 9.602 - _recorded_region_num = _recorded_young_regions + _recorded_non_young_regions; 9.603 - 9.604 - _predicted_rs_update_time_ms = 9.605 - predict_rs_update_time_ms(_g1->pending_card_num()); 9.606 - _predicted_rs_scan_time_ms = 9.607 - predict_rs_scan_time_ms(_predicted_cards_scanned); 9.608 - _predicted_object_copy_time_ms = 9.609 - predict_object_copy_time_ms(_predicted_bytes_to_copy); 9.610 - _predicted_constant_other_time_ms = 9.611 - predict_constant_other_time_ms(); 9.612 - _predicted_young_other_time_ms = 9.613 - predict_young_other_time_ms(_recorded_young_regions); 9.614 - _predicted_non_young_other_time_ms = 9.615 - predict_non_young_other_time_ms(_recorded_non_young_regions); 9.616 - 9.617 - _predicted_pause_time_ms = 9.618 - _predicted_rs_update_time_ms + 9.619 - _predicted_rs_scan_time_ms + 9.620 - _predicted_object_copy_time_ms + 9.621 - _predicted_constant_other_time_ms + 9.622 - _predicted_young_other_time_ms + 9.623 - _predicted_non_young_other_time_ms; 9.624 -#endif // PREDICTIONS_VERBOSE 9.625 -} 9.626 - 9.627 void G1CollectorPolicy::check_if_region_is_too_expensive(double 9.628 predicted_time_ms) { 9.629 // I don't think we need to do this when in young GC mode since 9.630 @@ -2013,9 +1822,6 @@ 9.631 } 9.632 } 9.633 9.634 -// </NEW PREDICTION> 9.635 - 9.636 - 9.637 void G1CollectorPolicy::update_recent_gc_times(double end_time_sec, 9.638 double elapsed_ms) { 9.639 _recent_gc_times_ms->add(elapsed_ms); 9.640 @@ -2023,99 +1829,6 @@ 9.641 _prev_collection_pause_end_ms = end_time_sec * 1000.0; 9.642 } 9.643 9.644 -double G1CollectorPolicy::recent_avg_time_for_pauses_ms() { 9.645 - if (_recent_pause_times_ms->num() == 0) { 9.646 - return (double) MaxGCPauseMillis; 9.647 - } 9.648 - return _recent_pause_times_ms->avg(); 9.649 -} 9.650 - 9.651 -double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() { 9.652 - if (_recent_rs_scan_times_ms->num() == 0) { 9.653 - return (double)MaxGCPauseMillis/3.0; 9.654 - } 9.655 - return _recent_rs_scan_times_ms->avg(); 9.656 -} 9.657 - 9.658 -int G1CollectorPolicy::number_of_recent_gcs() { 9.659 - assert(_recent_rs_scan_times_ms->num() == 9.660 - _recent_pause_times_ms->num(), "Sequence out of sync"); 9.661 - assert(_recent_pause_times_ms->num() == 9.662 - _recent_CS_bytes_used_before->num(), "Sequence out of sync"); 9.663 - assert(_recent_CS_bytes_used_before->num() == 9.664 - _recent_CS_bytes_surviving->num(), "Sequence out of sync"); 9.665 - 9.666 - return _recent_pause_times_ms->num(); 9.667 -} 9.668 - 9.669 -double G1CollectorPolicy::recent_avg_survival_fraction() { 9.670 - return recent_avg_survival_fraction_work(_recent_CS_bytes_surviving, 9.671 - _recent_CS_bytes_used_before); 9.672 -} 9.673 - 9.674 -double G1CollectorPolicy::last_survival_fraction() { 9.675 - return last_survival_fraction_work(_recent_CS_bytes_surviving, 9.676 - _recent_CS_bytes_used_before); 9.677 -} 9.678 - 9.679 -double 9.680 -G1CollectorPolicy::recent_avg_survival_fraction_work(TruncatedSeq* surviving, 9.681 - TruncatedSeq* before) { 9.682 - assert(surviving->num() == before->num(), "Sequence out of sync"); 9.683 - if (before->sum() > 0.0) { 9.684 - double recent_survival_rate = surviving->sum() / before->sum(); 9.685 - // We exempt parallel collection from this check because Alloc Buffer 9.686 - // fragmentation can produce negative collections. 9.687 - // Further, we're now always doing parallel collection. But I'm still 9.688 - // leaving this here as a placeholder for a more precise assertion later. 9.689 - // (DLD, 10/05.) 9.690 - assert((true || G1CollectedHeap::use_parallel_gc_threads()) || 9.691 - _g1->evacuation_failed() || 9.692 - recent_survival_rate <= 1.0, "Or bad frac"); 9.693 - return recent_survival_rate; 9.694 - } else { 9.695 - return 1.0; // Be conservative. 9.696 - } 9.697 -} 9.698 - 9.699 -double 9.700 -G1CollectorPolicy::last_survival_fraction_work(TruncatedSeq* surviving, 9.701 - TruncatedSeq* before) { 9.702 - assert(surviving->num() == before->num(), "Sequence out of sync"); 9.703 - if (surviving->num() > 0 && before->last() > 0.0) { 9.704 - double last_survival_rate = surviving->last() / before->last(); 9.705 - // We exempt parallel collection from this check because Alloc Buffer 9.706 - // fragmentation can produce negative collections. 9.707 - // Further, we're now always doing parallel collection. But I'm still 9.708 - // leaving this here as a placeholder for a more precise assertion later. 9.709 - // (DLD, 10/05.) 9.710 - assert((true || G1CollectedHeap::use_parallel_gc_threads()) || 9.711 - last_survival_rate <= 1.0, "Or bad frac"); 9.712 - return last_survival_rate; 9.713 - } else { 9.714 - return 1.0; 9.715 - } 9.716 -} 9.717 - 9.718 -static const int survival_min_obs = 5; 9.719 -static double survival_min_obs_limits[] = { 0.9, 0.7, 0.5, 0.3, 0.1 }; 9.720 -static const double min_survival_rate = 0.1; 9.721 - 9.722 -double 9.723 -G1CollectorPolicy::conservative_avg_survival_fraction_work(double avg, 9.724 - double latest) { 9.725 - double res = avg; 9.726 - if (number_of_recent_gcs() < survival_min_obs) { 9.727 - res = MAX2(res, survival_min_obs_limits[number_of_recent_gcs()]); 9.728 - } 9.729 - res = MAX2(res, latest); 9.730 - res = MAX2(res, min_survival_rate); 9.731 - // In the parallel case, LAB fragmentation can produce "negative 9.732 - // collections"; so can evac failure. Cap at 1.0 9.733 - res = MIN2(res, 1.0); 9.734 - return res; 9.735 -} 9.736 - 9.737 size_t G1CollectorPolicy::expansion_amount() { 9.738 double recent_gc_overhead = recent_avg_pause_time_ratio() * 100.0; 9.739 double threshold = _gc_overhead_perc; 9.740 @@ -2331,15 +2044,6 @@ 9.741 print_summary_sd(0, buffer, &_all_aux_times_ms[i]); 9.742 } 9.743 } 9.744 - 9.745 - size_t all_region_num = _region_num_young + _region_num_tenured; 9.746 - gclog_or_tty->print_cr(" New Regions %8d, Young %8d (%6.2lf%%), " 9.747 - "Tenured %8d (%6.2lf%%)", 9.748 - all_region_num, 9.749 - _region_num_young, 9.750 - (double) _region_num_young / (double) all_region_num * 100.0, 9.751 - _region_num_tenured, 9.752 - (double) _region_num_tenured / (double) all_region_num * 100.0); 9.753 } 9.754 if (TraceGen1Time) { 9.755 if (_all_full_gc_times_ms->num() > 0) { 9.756 @@ -2361,14 +2065,6 @@ 9.757 #endif // PRODUCT 9.758 } 9.759 9.760 -void G1CollectorPolicy::update_region_num(bool young) { 9.761 - if (young) { 9.762 - ++_region_num_young; 9.763 - } else { 9.764 - ++_region_num_tenured; 9.765 - } 9.766 -} 9.767 - 9.768 #ifndef PRODUCT 9.769 // for debugging, bit of a hack... 9.770 static char* 9.771 @@ -2617,6 +2313,7 @@ 9.772 ParKnownGarbageHRClosure parKnownGarbageCl(_hrSorted, _chunk_size, i); 9.773 // Back to zero for the claim value. 9.774 _g1->heap_region_par_iterate_chunked(&parKnownGarbageCl, i, 9.775 + _g1->workers()->active_workers(), 9.776 HeapRegion::InitialClaimValue); 9.777 jint regions_added = parKnownGarbageCl.marked_regions_added(); 9.778 _hrSorted->incNumMarkedHeapRegions(regions_added); 9.779 @@ -2628,7 +2325,7 @@ 9.780 }; 9.781 9.782 void 9.783 -G1CollectorPolicy::record_concurrent_mark_cleanup_end() { 9.784 +G1CollectorPolicy::record_concurrent_mark_cleanup_end(int no_of_gc_threads) { 9.785 double start_sec; 9.786 if (G1PrintParCleanupStats) { 9.787 start_sec = os::elapsedTime(); 9.788 @@ -2644,10 +2341,27 @@ 9.789 9.790 if (G1CollectedHeap::use_parallel_gc_threads()) { 9.791 const size_t OverpartitionFactor = 4; 9.792 - const size_t MinWorkUnit = 8; 9.793 - const size_t WorkUnit = 9.794 - MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), 9.795 - MinWorkUnit); 9.796 + size_t WorkUnit; 9.797 + // The use of MinChunkSize = 8 in the original code 9.798 + // causes some assertion failures when the total number of 9.799 + // region is less than 8. The code here tries to fix that. 9.800 + // Should the original code also be fixed? 9.801 + if (no_of_gc_threads > 0) { 9.802 + const size_t MinWorkUnit = 9.803 + MAX2(_g1->n_regions() / no_of_gc_threads, (size_t) 1U); 9.804 + WorkUnit = 9.805 + MAX2(_g1->n_regions() / (no_of_gc_threads * OverpartitionFactor), 9.806 + MinWorkUnit); 9.807 + } else { 9.808 + assert(no_of_gc_threads > 0, 9.809 + "The active gc workers should be greater than 0"); 9.810 + // In a product build do something reasonable to avoid a crash. 9.811 + const size_t MinWorkUnit = 9.812 + MAX2(_g1->n_regions() / ParallelGCThreads, (size_t) 1U); 9.813 + WorkUnit = 9.814 + MAX2(_g1->n_regions() / (ParallelGCThreads * OverpartitionFactor), 9.815 + MinWorkUnit); 9.816 + } 9.817 _collectionSetChooser->prepareForAddMarkedHeapRegionsPar(_g1->n_regions(), 9.818 WorkUnit); 9.819 ParKnownGarbageTask parKnownGarbageTask(_collectionSetChooser, 9.820 @@ -2682,8 +2396,7 @@ 9.821 } 9.822 9.823 // Add the heap region at the head of the non-incremental collection set 9.824 -void G1CollectorPolicy:: 9.825 -add_to_collection_set(HeapRegion* hr) { 9.826 +void G1CollectorPolicy::add_old_region_to_cset(HeapRegion* hr) { 9.827 assert(_inc_cset_build_state == Active, "Precondition"); 9.828 assert(!hr->is_young(), "non-incremental add of young region"); 9.829 9.830 @@ -2694,9 +2407,11 @@ 9.831 hr->set_in_collection_set(true); 9.832 hr->set_next_in_collection_set(_collection_set); 9.833 _collection_set = hr; 9.834 - _collection_set_size++; 9.835 _collection_set_bytes_used_before += hr->used(); 9.836 _g1->register_region_with_in_cset_fast_test(hr); 9.837 + size_t rs_length = hr->rem_set()->occupied(); 9.838 + _recorded_rs_lengths += rs_length; 9.839 + _old_cset_region_length += 1; 9.840 } 9.841 9.842 // Initialize the per-collection-set information 9.843 @@ -2705,16 +2420,11 @@ 9.844 9.845 _inc_cset_head = NULL; 9.846 _inc_cset_tail = NULL; 9.847 - _inc_cset_size = 0; 9.848 _inc_cset_bytes_used_before = 0; 9.849 9.850 - _inc_cset_young_index = 0; 9.851 - 9.852 _inc_cset_max_finger = 0; 9.853 - _inc_cset_recorded_young_bytes = 0; 9.854 _inc_cset_recorded_rs_lengths = 0; 9.855 _inc_cset_predicted_elapsed_time_ms = 0; 9.856 - _inc_cset_predicted_bytes_to_copy = 0; 9.857 _inc_cset_build_state = Active; 9.858 } 9.859 9.860 @@ -2745,20 +2455,6 @@ 9.861 // rset sampling code 9.862 hr->set_recorded_rs_length(rs_length); 9.863 hr->set_predicted_elapsed_time_ms(region_elapsed_time_ms); 9.864 - 9.865 -#if PREDICTIONS_VERBOSE 9.866 - size_t bytes_to_copy = predict_bytes_to_copy(hr); 9.867 - _inc_cset_predicted_bytes_to_copy += bytes_to_copy; 9.868 - 9.869 - // Record the number of bytes used in this region 9.870 - _inc_cset_recorded_young_bytes += used_bytes; 9.871 - 9.872 - // Cache the values we have added to the aggregated informtion 9.873 - // in the heap region in case we have to remove this region from 9.874 - // the incremental collection set, or it is updated by the 9.875 - // rset sampling code 9.876 - hr->set_predicted_bytes_to_copy(bytes_to_copy); 9.877 -#endif // PREDICTIONS_VERBOSE 9.878 } 9.879 9.880 void G1CollectorPolicy::remove_from_incremental_cset_info(HeapRegion* hr) { 9.881 @@ -2784,17 +2480,6 @@ 9.882 // Clear the values cached in the heap region 9.883 hr->set_recorded_rs_length(0); 9.884 hr->set_predicted_elapsed_time_ms(0); 9.885 - 9.886 -#if PREDICTIONS_VERBOSE 9.887 - size_t old_predicted_bytes_to_copy = hr->predicted_bytes_to_copy(); 9.888 - _inc_cset_predicted_bytes_to_copy -= old_predicted_bytes_to_copy; 9.889 - 9.890 - // Subtract the number of bytes used in this region 9.891 - _inc_cset_recorded_young_bytes -= used_bytes; 9.892 - 9.893 - // Clear the values cached in the heap region 9.894 - hr->set_predicted_bytes_to_copy(0); 9.895 -#endif // PREDICTIONS_VERBOSE 9.896 } 9.897 9.898 void G1CollectorPolicy::update_incremental_cset_info(HeapRegion* hr, size_t new_rs_length) { 9.899 @@ -2806,8 +2491,8 @@ 9.900 } 9.901 9.902 void G1CollectorPolicy::add_region_to_incremental_cset_common(HeapRegion* hr) { 9.903 - assert( hr->is_young(), "invariant"); 9.904 - assert( hr->young_index_in_cset() == -1, "invariant" ); 9.905 + assert(hr->is_young(), "invariant"); 9.906 + assert(hr->young_index_in_cset() > -1, "should have already been set"); 9.907 assert(_inc_cset_build_state == Active, "Precondition"); 9.908 9.909 // We need to clear and set the cached recorded/cached collection set 9.910 @@ -2827,11 +2512,7 @@ 9.911 hr->set_in_collection_set(true); 9.912 assert( hr->next_in_collection_set() == NULL, "invariant"); 9.913 9.914 - _inc_cset_size++; 9.915 _g1->register_region_with_in_cset_fast_test(hr); 9.916 - 9.917 - hr->set_young_index_in_cset((int) _inc_cset_young_index); 9.918 - ++_inc_cset_young_index; 9.919 } 9.920 9.921 // Add the region at the RHS of the incremental cset 9.922 @@ -2899,8 +2580,6 @@ 9.923 9.924 YoungList* young_list = _g1->young_list(); 9.925 9.926 - start_recording_regions(); 9.927 - 9.928 guarantee(target_pause_time_ms > 0.0, 9.929 err_msg("target_pause_time_ms = %1.6lf should be positive", 9.930 target_pause_time_ms)); 9.931 @@ -2923,7 +2602,6 @@ 9.932 if (time_remaining_ms < threshold) { 9.933 double prev_time_remaining_ms = time_remaining_ms; 9.934 time_remaining_ms = 0.50 * target_pause_time_ms; 9.935 - _within_target = false; 9.936 ergo_verbose3(ErgoCSetConstruction, 9.937 "adjust remaining time", 9.938 ergo_format_reason("remaining time lower than threshold") 9.939 @@ -2931,8 +2609,6 @@ 9.940 ergo_format_ms("threshold") 9.941 ergo_format_ms("adjusted remaining time"), 9.942 prev_time_remaining_ms, threshold, time_remaining_ms); 9.943 - } else { 9.944 - _within_target = true; 9.945 } 9.946 9.947 size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes; 9.948 @@ -2941,8 +2617,6 @@ 9.949 double young_start_time_sec = os::elapsedTime(); 9.950 9.951 _collection_set_bytes_used_before = 0; 9.952 - _collection_set_size = 0; 9.953 - _young_cset_length = 0; 9.954 _last_young_gc_full = full_young_gcs() ? true : false; 9.955 9.956 if (_last_young_gc_full) { 9.957 @@ -2955,9 +2629,9 @@ 9.958 // pause are appended to the RHS of the young list, i.e. 9.959 // [Newly Young Regions ++ Survivors from last pause]. 9.960 9.961 - size_t survivor_region_num = young_list->survivor_length(); 9.962 - size_t eden_region_num = young_list->length() - survivor_region_num; 9.963 - size_t old_region_num = 0; 9.964 + size_t survivor_region_length = young_list->survivor_length(); 9.965 + size_t eden_region_length = young_list->length() - survivor_region_length; 9.966 + init_cset_region_lengths(eden_region_length, survivor_region_length); 9.967 hr = young_list->first_survivor_region(); 9.968 while (hr != NULL) { 9.969 assert(hr->is_survivor(), "badly formed young list"); 9.970 @@ -2971,9 +2645,7 @@ 9.971 if (_g1->mark_in_progress()) 9.972 _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger); 9.973 9.974 - _young_cset_length = _inc_cset_young_index; 9.975 _collection_set = _inc_cset_head; 9.976 - _collection_set_size = _inc_cset_size; 9.977 _collection_set_bytes_used_before = _inc_cset_bytes_used_before; 9.978 time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms; 9.979 predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms; 9.980 @@ -2983,19 +2655,12 @@ 9.981 ergo_format_region("eden") 9.982 ergo_format_region("survivors") 9.983 ergo_format_ms("predicted young region time"), 9.984 - eden_region_num, survivor_region_num, 9.985 + eden_region_length, survivor_region_length, 9.986 _inc_cset_predicted_elapsed_time_ms); 9.987 9.988 // The number of recorded young regions is the incremental 9.989 // collection set's current size 9.990 - set_recorded_young_regions(_inc_cset_size); 9.991 set_recorded_rs_lengths(_inc_cset_recorded_rs_lengths); 9.992 - set_recorded_young_bytes(_inc_cset_recorded_young_bytes); 9.993 -#if PREDICTIONS_VERBOSE 9.994 - set_predicted_bytes_to_copy(_inc_cset_predicted_bytes_to_copy); 9.995 -#endif // PREDICTIONS_VERBOSE 9.996 - 9.997 - assert(_inc_cset_size == young_list->length(), "Invariant"); 9.998 9.999 double young_end_time_sec = os::elapsedTime(); 9.1000 _recorded_young_cset_choice_time_ms = 9.1001 @@ -3009,9 +2674,16 @@ 9.1002 NumberSeq seq; 9.1003 double avg_prediction = 100000000000000000.0; // something very large 9.1004 9.1005 - size_t prev_collection_set_size = _collection_set_size; 9.1006 double prev_predicted_pause_time_ms = predicted_pause_time_ms; 9.1007 do { 9.1008 + // Note that add_old_region_to_cset() increments the 9.1009 + // _old_cset_region_length field and cset_region_length() returns the 9.1010 + // sum of _eden_cset_region_length, _survivor_cset_region_length, and 9.1011 + // _old_cset_region_length. So, as old regions are added to the 9.1012 + // CSet, _old_cset_region_length will be incremented and 9.1013 + // cset_region_length(), which is used below, will always reflect 9.1014 + // the the total number of regions added up to this point to the CSet. 9.1015 + 9.1016 hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, 9.1017 avg_prediction); 9.1018 if (hr != NULL) { 9.1019 @@ -3019,8 +2691,7 @@ 9.1020 double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); 9.1021 time_remaining_ms -= predicted_time_ms; 9.1022 predicted_pause_time_ms += predicted_time_ms; 9.1023 - add_to_collection_set(hr); 9.1024 - record_non_young_cset_region(hr); 9.1025 + add_old_region_to_cset(hr); 9.1026 seq.add(predicted_time_ms); 9.1027 avg_prediction = seq.avg() + seq.sd(); 9.1028 } 9.1029 @@ -3041,13 +2712,13 @@ 9.1030 should_continue = false; 9.1031 } 9.1032 } else { 9.1033 - if (_collection_set_size >= _young_list_fixed_length) { 9.1034 + if (cset_region_length() >= _young_list_fixed_length) { 9.1035 ergo_verbose2(ErgoCSetConstruction, 9.1036 "stop adding old regions to CSet", 9.1037 ergo_format_reason("CSet length reached target") 9.1038 ergo_format_region("CSet") 9.1039 ergo_format_region("young target"), 9.1040 - _collection_set_size, _young_list_fixed_length); 9.1041 + cset_region_length(), _young_list_fixed_length); 9.1042 should_continue = false; 9.1043 } 9.1044 } 9.1045 @@ -3055,23 +2726,21 @@ 9.1046 } while (should_continue); 9.1047 9.1048 if (!adaptive_young_list_length() && 9.1049 - _collection_set_size < _young_list_fixed_length) { 9.1050 + cset_region_length() < _young_list_fixed_length) { 9.1051 ergo_verbose2(ErgoCSetConstruction, 9.1052 "request partially-young GCs end", 9.1053 ergo_format_reason("CSet length lower than target") 9.1054 ergo_format_region("CSet") 9.1055 ergo_format_region("young target"), 9.1056 - _collection_set_size, _young_list_fixed_length); 9.1057 + cset_region_length(), _young_list_fixed_length); 9.1058 _should_revert_to_full_young_gcs = true; 9.1059 } 9.1060 9.1061 - old_region_num = _collection_set_size - prev_collection_set_size; 9.1062 - 9.1063 ergo_verbose2(ErgoCSetConstruction | ErgoHigh, 9.1064 "add old regions to CSet", 9.1065 ergo_format_region("old") 9.1066 ergo_format_ms("predicted old region time"), 9.1067 - old_region_num, 9.1068 + old_cset_region_length(), 9.1069 predicted_pause_time_ms - prev_predicted_pause_time_ms); 9.1070 } 9.1071 9.1072 @@ -3079,8 +2748,6 @@ 9.1073 9.1074 count_CS_bytes_used(); 9.1075 9.1076 - end_recording_regions(); 9.1077 - 9.1078 ergo_verbose5(ErgoCSetConstruction, 9.1079 "finish choosing CSet", 9.1080 ergo_format_region("eden") 9.1081 @@ -3088,7 +2755,8 @@ 9.1082 ergo_format_region("old") 9.1083 ergo_format_ms("predicted pause time") 9.1084 ergo_format_ms("target pause time"), 9.1085 - eden_region_num, survivor_region_num, old_region_num, 9.1086 + eden_region_length, survivor_region_length, 9.1087 + old_cset_region_length(), 9.1088 predicted_pause_time_ms, target_pause_time_ms); 9.1089 9.1090 double non_young_end_time_sec = os::elapsedTime();
10.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Dec 01 13:42:41 2011 -0500 10.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Dec 02 08:52:53 2011 -0500 10.3 @@ -85,13 +85,13 @@ 10.4 10.5 class G1CollectorPolicy: public CollectorPolicy { 10.6 private: 10.7 - // The number of pauses during the execution. 10.8 - long _n_pauses; 10.9 - 10.10 // either equal to the number of parallel threads, if ParallelGCThreads 10.11 // has been set, or 1 otherwise 10.12 int _parallel_gc_threads; 10.13 10.14 + // The number of GC threads currently active. 10.15 + uintx _no_of_gc_threads; 10.16 + 10.17 enum SomePrivateConstants { 10.18 NumPrevPausesForHeuristics = 10 10.19 }; 10.20 @@ -127,18 +127,9 @@ 10.21 jlong _num_cc_clears; // number of times the card count cache has been cleared 10.22 #endif 10.23 10.24 - // Statistics for recent GC pauses. See below for how indexed. 10.25 - TruncatedSeq* _recent_rs_scan_times_ms; 10.26 - 10.27 // These exclude marking times. 10.28 - TruncatedSeq* _recent_pause_times_ms; 10.29 TruncatedSeq* _recent_gc_times_ms; 10.30 10.31 - TruncatedSeq* _recent_CS_bytes_used_before; 10.32 - TruncatedSeq* _recent_CS_bytes_surviving; 10.33 - 10.34 - TruncatedSeq* _recent_rs_sizes; 10.35 - 10.36 TruncatedSeq* _concurrent_mark_remark_times_ms; 10.37 TruncatedSeq* _concurrent_mark_cleanup_times_ms; 10.38 10.39 @@ -150,13 +141,6 @@ 10.40 NumberSeq* _all_stop_world_times_ms; 10.41 NumberSeq* _all_yield_times_ms; 10.42 10.43 - size_t _region_num_young; 10.44 - size_t _region_num_tenured; 10.45 - size_t _prev_region_num_young; 10.46 - size_t _prev_region_num_tenured; 10.47 - 10.48 - NumberSeq* _all_mod_union_times_ms; 10.49 - 10.50 int _aux_num; 10.51 NumberSeq* _all_aux_times_ms; 10.52 double* _cur_aux_start_times_ms; 10.53 @@ -194,7 +178,6 @@ 10.54 // locker is active. This should be >= _young_list_target_length; 10.55 size_t _young_list_max_length; 10.56 10.57 - size_t _young_cset_length; 10.58 bool _last_young_gc_full; 10.59 10.60 unsigned _full_young_pause_num; 10.61 @@ -217,8 +200,6 @@ 10.62 return _during_marking; 10.63 } 10.64 10.65 - // <NEW PREDICTION> 10.66 - 10.67 private: 10.68 enum PredictionConstants { 10.69 TruncatedSeqLength = 10 10.70 @@ -240,47 +221,32 @@ 10.71 TruncatedSeq* _non_young_other_cost_per_region_ms_seq; 10.72 10.73 TruncatedSeq* _pending_cards_seq; 10.74 - TruncatedSeq* _scanned_cards_seq; 10.75 TruncatedSeq* _rs_lengths_seq; 10.76 10.77 TruncatedSeq* _cost_per_byte_ms_during_cm_seq; 10.78 10.79 TruncatedSeq* _young_gc_eff_seq; 10.80 10.81 - TruncatedSeq* _max_conc_overhead_seq; 10.82 - 10.83 bool _using_new_ratio_calculations; 10.84 size_t _min_desired_young_length; // as set on the command line or default calculations 10.85 size_t _max_desired_young_length; // as set on the command line or default calculations 10.86 10.87 - size_t _recorded_young_regions; 10.88 - size_t _recorded_non_young_regions; 10.89 - size_t _recorded_region_num; 10.90 + size_t _eden_cset_region_length; 10.91 + size_t _survivor_cset_region_length; 10.92 + size_t _old_cset_region_length; 10.93 + 10.94 + void init_cset_region_lengths(size_t eden_cset_region_length, 10.95 + size_t survivor_cset_region_length); 10.96 + 10.97 + size_t eden_cset_region_length() { return _eden_cset_region_length; } 10.98 + size_t survivor_cset_region_length() { return _survivor_cset_region_length; } 10.99 + size_t old_cset_region_length() { return _old_cset_region_length; } 10.100 10.101 size_t _free_regions_at_end_of_collection; 10.102 10.103 size_t _recorded_rs_lengths; 10.104 size_t _max_rs_lengths; 10.105 10.106 - size_t _recorded_marked_bytes; 10.107 - size_t _recorded_young_bytes; 10.108 - 10.109 - size_t _predicted_pending_cards; 10.110 - size_t _predicted_cards_scanned; 10.111 - size_t _predicted_rs_lengths; 10.112 - size_t _predicted_bytes_to_copy; 10.113 - 10.114 - double _predicted_survival_ratio; 10.115 - double _predicted_rs_update_time_ms; 10.116 - double _predicted_rs_scan_time_ms; 10.117 - double _predicted_object_copy_time_ms; 10.118 - double _predicted_constant_other_time_ms; 10.119 - double _predicted_young_other_time_ms; 10.120 - double _predicted_non_young_other_time_ms; 10.121 - double _predicted_pause_time_ms; 10.122 - 10.123 - double _vtime_diff_ms; 10.124 - 10.125 double _recorded_young_free_cset_time_ms; 10.126 double _recorded_non_young_free_cset_time_ms; 10.127 10.128 @@ -317,21 +283,28 @@ 10.129 double update_rs_processed_buffers, 10.130 double goal_ms); 10.131 10.132 + uintx no_of_gc_threads() { return _no_of_gc_threads; } 10.133 + void set_no_of_gc_threads(uintx v) { _no_of_gc_threads = v; } 10.134 + 10.135 double _pause_time_target_ms; 10.136 double _recorded_young_cset_choice_time_ms; 10.137 double _recorded_non_young_cset_choice_time_ms; 10.138 - bool _within_target; 10.139 size_t _pending_cards; 10.140 size_t _max_pending_cards; 10.141 10.142 public: 10.143 + // Accessors 10.144 10.145 - void set_region_short_lived(HeapRegion* hr) { 10.146 + void set_region_eden(HeapRegion* hr, int young_index_in_cset) { 10.147 + hr->set_young(); 10.148 hr->install_surv_rate_group(_short_lived_surv_rate_group); 10.149 + hr->set_young_index_in_cset(young_index_in_cset); 10.150 } 10.151 10.152 - void set_region_survivors(HeapRegion* hr) { 10.153 + void set_region_survivor(HeapRegion* hr, int young_index_in_cset) { 10.154 + assert(hr->is_young() && hr->is_survivor(), "pre-condition"); 10.155 hr->install_surv_rate_group(_survivor_surv_rate_group); 10.156 + hr->set_young_index_in_cset(young_index_in_cset); 10.157 } 10.158 10.159 #ifndef PRODUCT 10.160 @@ -343,10 +316,6 @@ 10.161 seq->davg() * confidence_factor(seq->num())); 10.162 } 10.163 10.164 - size_t young_cset_length() { 10.165 - return _young_cset_length; 10.166 - } 10.167 - 10.168 void record_max_rs_lengths(size_t rs_lengths) { 10.169 _max_rs_lengths = rs_lengths; 10.170 } 10.171 @@ -465,20 +434,12 @@ 10.172 size_t predict_bytes_to_copy(HeapRegion* hr); 10.173 double predict_region_elapsed_time_ms(HeapRegion* hr, bool young); 10.174 10.175 - void start_recording_regions(); 10.176 - void record_cset_region_info(HeapRegion* hr, bool young); 10.177 - void record_non_young_cset_region(HeapRegion* hr); 10.178 + void set_recorded_rs_lengths(size_t rs_lengths); 10.179 10.180 - void set_recorded_young_regions(size_t n_regions); 10.181 - void set_recorded_young_bytes(size_t bytes); 10.182 - void set_recorded_rs_lengths(size_t rs_lengths); 10.183 - void set_predicted_bytes_to_copy(size_t bytes); 10.184 - 10.185 - void end_recording_regions(); 10.186 - 10.187 - void record_vtime_diff_ms(double vtime_diff_ms) { 10.188 - _vtime_diff_ms = vtime_diff_ms; 10.189 - } 10.190 + size_t cset_region_length() { return young_cset_region_length() + 10.191 + old_cset_region_length(); } 10.192 + size_t young_cset_region_length() { return eden_cset_region_length() + 10.193 + survivor_cset_region_length(); } 10.194 10.195 void record_young_free_cset_time_ms(double time_ms) { 10.196 _recorded_young_free_cset_time_ms = time_ms; 10.197 @@ -494,8 +455,6 @@ 10.198 10.199 double predict_survivor_regions_evac_time(); 10.200 10.201 - // </NEW PREDICTION> 10.202 - 10.203 void cset_regions_freed() { 10.204 bool propagate = _last_young_gc_full && !_in_marking_window; 10.205 _short_lived_surv_rate_group->all_surviving_words_recorded(propagate); 10.206 @@ -575,8 +534,6 @@ 10.207 double sum_of_values (double* data); 10.208 double max_sum (double* data1, double* data2); 10.209 10.210 - int _last_satb_drain_processed_buffers; 10.211 - int _last_update_rs_processed_buffers; 10.212 double _last_pause_time_ms; 10.213 10.214 size_t _bytes_in_collection_set_before_gc; 10.215 @@ -596,10 +553,6 @@ 10.216 // set at the start of the pause. 10.217 HeapRegion* _collection_set; 10.218 10.219 - // The number of regions in the collection set. Set from the incrementally 10.220 - // built collection set at the start of an evacuation pause. 10.221 - size_t _collection_set_size; 10.222 - 10.223 // The number of bytes in the collection set before the pause. Set from 10.224 // the incrementally built collection set at the start of an evacuation 10.225 // pause. 10.226 @@ -622,16 +575,6 @@ 10.227 // The tail of the incrementally built collection set. 10.228 HeapRegion* _inc_cset_tail; 10.229 10.230 - // The number of regions in the incrementally built collection set. 10.231 - // Used to set _collection_set_size at the start of an evacuation 10.232 - // pause. 10.233 - size_t _inc_cset_size; 10.234 - 10.235 - // Used as the index in the surving young words structure 10.236 - // which tracks the amount of space, for each young region, 10.237 - // that survives the pause. 10.238 - size_t _inc_cset_young_index; 10.239 - 10.240 // The number of bytes in the incrementally built collection set. 10.241 // Used to set _collection_set_bytes_used_before at the start of 10.242 // an evacuation pause. 10.243 @@ -640,11 +583,6 @@ 10.244 // Used to record the highest end of heap region in collection set 10.245 HeapWord* _inc_cset_max_finger; 10.246 10.247 - // The number of recorded used bytes in the young regions 10.248 - // of the collection set. This is the sum of the used() bytes 10.249 - // of retired young regions in the collection set. 10.250 - size_t _inc_cset_recorded_young_bytes; 10.251 - 10.252 // The RSet lengths recorded for regions in the collection set 10.253 // (updated by the periodic sampling of the regions in the 10.254 // young list/collection set). 10.255 @@ -655,68 +593,9 @@ 10.256 // regions in the young list/collection set). 10.257 double _inc_cset_predicted_elapsed_time_ms; 10.258 10.259 - // The predicted bytes to copy for the regions in the collection 10.260 - // set (updated by the periodic sampling of the regions in the 10.261 - // young list/collection set). 10.262 - size_t _inc_cset_predicted_bytes_to_copy; 10.263 - 10.264 // Stash a pointer to the g1 heap. 10.265 G1CollectedHeap* _g1; 10.266 10.267 - // The average time in ms per collection pause, averaged over recent pauses. 10.268 - double recent_avg_time_for_pauses_ms(); 10.269 - 10.270 - // The average time in ms for RS scanning, per pause, averaged 10.271 - // over recent pauses. (Note the RS scanning time for a pause 10.272 - // is itself an average of the RS scanning time for each worker 10.273 - // thread.) 10.274 - double recent_avg_time_for_rs_scan_ms(); 10.275 - 10.276 - // The number of "recent" GCs recorded in the number sequences 10.277 - int number_of_recent_gcs(); 10.278 - 10.279 - // The average survival ratio, computed by the total number of bytes 10.280 - // suriviving / total number of bytes before collection over the last 10.281 - // several recent pauses. 10.282 - double recent_avg_survival_fraction(); 10.283 - // The survival fraction of the most recent pause; if there have been no 10.284 - // pauses, returns 1.0. 10.285 - double last_survival_fraction(); 10.286 - 10.287 - // Returns a "conservative" estimate of the recent survival rate, i.e., 10.288 - // one that may be higher than "recent_avg_survival_fraction". 10.289 - // This is conservative in several ways: 10.290 - // If there have been few pauses, it will assume a potential high 10.291 - // variance, and err on the side of caution. 10.292 - // It puts a lower bound (currently 0.1) on the value it will return. 10.293 - // To try to detect phase changes, if the most recent pause ("latest") has a 10.294 - // higher-than average ("avg") survival rate, it returns that rate. 10.295 - // "work" version is a utility function; young is restricted to young regions. 10.296 - double conservative_avg_survival_fraction_work(double avg, 10.297 - double latest); 10.298 - 10.299 - // The arguments are the two sequences that keep track of the number of bytes 10.300 - // surviving and the total number of bytes before collection, resp., 10.301 - // over the last evereal recent pauses 10.302 - // Returns the survival rate for the category in the most recent pause. 10.303 - // If there have been no pauses, returns 1.0. 10.304 - double last_survival_fraction_work(TruncatedSeq* surviving, 10.305 - TruncatedSeq* before); 10.306 - 10.307 - // The arguments are the two sequences that keep track of the number of bytes 10.308 - // surviving and the total number of bytes before collection, resp., 10.309 - // over the last several recent pauses 10.310 - // Returns the average survival ration over the last several recent pauses 10.311 - // If there have been no pauses, return 1.0 10.312 - double recent_avg_survival_fraction_work(TruncatedSeq* surviving, 10.313 - TruncatedSeq* before); 10.314 - 10.315 - double conservative_avg_survival_fraction() { 10.316 - double avg = recent_avg_survival_fraction(); 10.317 - double latest = last_survival_fraction(); 10.318 - return conservative_avg_survival_fraction_work(avg, latest); 10.319 - } 10.320 - 10.321 // The ratio of gc time to elapsed time, computed over recent pauses. 10.322 double _recent_avg_pause_time_ratio; 10.323 10.324 @@ -724,9 +603,6 @@ 10.325 return _recent_avg_pause_time_ratio; 10.326 } 10.327 10.328 - // Number of pauses between concurrent marking. 10.329 - size_t _pauses_btwn_concurrent_mark; 10.330 - 10.331 // At the end of a pause we check the heap occupancy and we decide 10.332 // whether we will start a marking cycle during the next pause. If 10.333 // we decide that we want to do that, we will set this parameter to 10.334 @@ -849,9 +725,6 @@ 10.335 10.336 GenRemSet::Name rem_set_name() { return GenRemSet::CardTable; } 10.337 10.338 - // The number of collection pauses so far. 10.339 - long n_pauses() const { return _n_pauses; } 10.340 - 10.341 // Update the heuristic info to record a collection pause of the given 10.342 // start time, where the given number of bytes were used at the start. 10.343 // This may involve changing the desired size of a collection set. 10.344 @@ -864,19 +737,21 @@ 10.345 void record_concurrent_mark_init_end(double 10.346 mark_init_elapsed_time_ms); 10.347 10.348 - void record_mark_closure_time(double mark_closure_time_ms); 10.349 + void record_mark_closure_time(double mark_closure_time_ms) { 10.350 + _mark_closure_time_ms = mark_closure_time_ms; 10.351 + } 10.352 10.353 void record_concurrent_mark_remark_start(); 10.354 void record_concurrent_mark_remark_end(); 10.355 10.356 void record_concurrent_mark_cleanup_start(); 10.357 - void record_concurrent_mark_cleanup_end(); 10.358 + void record_concurrent_mark_cleanup_end(int no_of_gc_threads); 10.359 void record_concurrent_mark_cleanup_completed(); 10.360 10.361 void record_concurrent_pause(); 10.362 void record_concurrent_pause_end(); 10.363 10.364 - void record_collection_pause_end(); 10.365 + void record_collection_pause_end(int no_of_gc_threads); 10.366 void print_heap_transition(); 10.367 10.368 // Record the fact that a full collection occurred. 10.369 @@ -900,15 +775,6 @@ 10.370 _cur_satb_drain_time_ms = ms; 10.371 } 10.372 10.373 - void record_satb_drain_processed_buffers(int processed_buffers) { 10.374 - assert(_g1->mark_in_progress(), "shouldn't be here otherwise"); 10.375 - _last_satb_drain_processed_buffers = processed_buffers; 10.376 - } 10.377 - 10.378 - void record_mod_union_time(double ms) { 10.379 - _all_mod_union_times_ms->add(ms); 10.380 - } 10.381 - 10.382 void record_update_rs_time(int thread, double ms) { 10.383 _par_last_update_rs_times_ms[thread] = ms; 10.384 } 10.385 @@ -1009,11 +875,8 @@ 10.386 10.387 void clear_collection_set() { _collection_set = NULL; } 10.388 10.389 - // The number of elements in the current collection set. 10.390 - size_t collection_set_size() { return _collection_set_size; } 10.391 - 10.392 - // Add "hr" to the CS. 10.393 - void add_to_collection_set(HeapRegion* hr); 10.394 + // Add old region "hr" to the CSet. 10.395 + void add_old_region_to_cset(HeapRegion* hr); 10.396 10.397 // Incremental CSet Support 10.398 10.399 @@ -1023,9 +886,6 @@ 10.400 // The tail of the incrementally built collection set. 10.401 HeapRegion* inc_set_tail() { return _inc_cset_tail; } 10.402 10.403 - // The number of elements in the incrementally built collection set. 10.404 - size_t inc_cset_size() { return _inc_cset_size; } 10.405 - 10.406 // Initialize incremental collection set info. 10.407 void start_incremental_cset_building(); 10.408 10.409 @@ -1125,8 +985,6 @@ 10.410 return _young_list_max_length; 10.411 } 10.412 10.413 - void update_region_num(bool young); 10.414 - 10.415 bool full_young_gcs() { 10.416 return _full_young_gcs; 10.417 }
11.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Thu Dec 01 13:42:41 2011 -0500 11.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Dec 02 08:52:53 2011 -0500 11.3 @@ -209,29 +209,9 @@ 11.4 size_t cards_looked_up() { return _cards;} 11.5 }; 11.6 11.7 -// We want the parallel threads to start their scanning at 11.8 -// different collection set regions to avoid contention. 11.9 -// If we have: 11.10 -// n collection set regions 11.11 -// p threads 11.12 -// Then thread t will start at region t * floor (n/p) 11.13 - 11.14 -HeapRegion* G1RemSet::calculateStartRegion(int worker_i) { 11.15 - HeapRegion* result = _g1p->collection_set(); 11.16 - if (ParallelGCThreads > 0) { 11.17 - size_t cs_size = _g1p->collection_set_size(); 11.18 - int n_workers = _g1->workers()->total_workers(); 11.19 - size_t cs_spans = cs_size / n_workers; 11.20 - size_t ind = cs_spans * worker_i; 11.21 - for (size_t i = 0; i < ind; i++) 11.22 - result = result->next_in_collection_set(); 11.23 - } 11.24 - return result; 11.25 -} 11.26 - 11.27 void G1RemSet::scanRS(OopsInHeapRegionClosure* oc, int worker_i) { 11.28 double rs_time_start = os::elapsedTime(); 11.29 - HeapRegion *startRegion = calculateStartRegion(worker_i); 11.30 + HeapRegion *startRegion = _g1->start_cset_region_for_worker(worker_i); 11.31 11.32 ScanRSClosure scanRScl(oc, worker_i); 11.33 11.34 @@ -430,8 +410,10 @@ 11.35 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 11.36 dcqs.concatenate_logs(); 11.37 11.38 - if (ParallelGCThreads > 0) { 11.39 - _seq_task->set_n_threads((int)n_workers()); 11.40 + if (G1CollectedHeap::use_parallel_gc_threads()) { 11.41 + // Don't set the number of workers here. It will be set 11.42 + // when the task is run 11.43 + // _seq_task->set_n_termination((int)n_workers()); 11.44 } 11.45 guarantee( _cards_scanned == NULL, "invariant" ); 11.46 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); 11.47 @@ -578,7 +560,10 @@ 11.48 void G1RemSet::scrub_par(BitMap* region_bm, BitMap* card_bm, 11.49 int worker_num, int claim_val) { 11.50 ScrubRSClosure scrub_cl(region_bm, card_bm); 11.51 - _g1->heap_region_par_iterate_chunked(&scrub_cl, worker_num, claim_val); 11.52 + _g1->heap_region_par_iterate_chunked(&scrub_cl, 11.53 + worker_num, 11.54 + (int) n_workers(), 11.55 + claim_val); 11.56 } 11.57 11.58
12.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Thu Dec 01 13:42:41 2011 -0500 12.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Fri Dec 02 08:52:53 2011 -0500 12.3 @@ -104,8 +104,6 @@ 12.4 void scanRS(OopsInHeapRegionClosure* oc, int worker_i); 12.5 void updateRS(DirtyCardQueue* into_cset_dcq, int worker_i); 12.6 12.7 - HeapRegion* calculateStartRegion(int i); 12.8 - 12.9 CardTableModRefBS* ct_bs() { return _ct_bs; } 12.10 size_t cardsScanned() { return _total_cards_scanned; } 12.11
13.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Dec 01 13:42:41 2011 -0500 13.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Dec 02 08:52:53 2011 -0500 13.3 @@ -39,10 +39,6 @@ 13.4 develop(intx, G1MarkingOverheadPercent, 0, \ 13.5 "Overhead of concurrent marking") \ 13.6 \ 13.7 - \ 13.8 - develop(intx, G1PolicyVerbose, 0, \ 13.9 - "The verbosity level on G1 policy decisions") \ 13.10 - \ 13.11 develop(intx, G1MarkingVerboseLevel, 0, \ 13.12 "Level (0-4) of verboseness of the marking code") \ 13.13 \ 13.14 @@ -58,9 +54,6 @@ 13.15 develop(bool, G1TraceMarkStackOverflow, false, \ 13.16 "If true, extra debugging code for CM restart for ovflw.") \ 13.17 \ 13.18 - develop(intx, G1PausesBtwnConcMark, -1, \ 13.19 - "If positive, fixed number of pauses between conc markings") \ 13.20 - \ 13.21 diagnostic(bool, G1SummarizeConcMark, false, \ 13.22 "Summarize concurrent mark info") \ 13.23 \
14.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Thu Dec 01 13:42:41 2011 -0500 14.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Fri Dec 02 08:52:53 2011 -0500 14.3 @@ -367,12 +367,13 @@ 14.4 static void setup_heap_region_size(uintx min_heap_size); 14.5 14.6 enum ClaimValues { 14.7 - InitialClaimValue = 0, 14.8 - FinalCountClaimValue = 1, 14.9 - NoteEndClaimValue = 2, 14.10 - ScrubRemSetClaimValue = 3, 14.11 - ParVerifyClaimValue = 4, 14.12 - RebuildRSClaimValue = 5 14.13 + InitialClaimValue = 0, 14.14 + FinalCountClaimValue = 1, 14.15 + NoteEndClaimValue = 2, 14.16 + ScrubRemSetClaimValue = 3, 14.17 + ParVerifyClaimValue = 4, 14.18 + RebuildRSClaimValue = 5, 14.19 + CompleteMarkCSetClaimValue = 6 14.20 }; 14.21 14.22 inline HeapWord* par_allocate_no_bot_updates(size_t word_size) { 14.23 @@ -416,7 +417,7 @@ 14.24 14.25 void add_to_marked_bytes(size_t incr_bytes) { 14.26 _next_marked_bytes = _next_marked_bytes + incr_bytes; 14.27 - guarantee( _next_marked_bytes <= used(), "invariant" ); 14.28 + assert(_next_marked_bytes <= used(), "invariant" ); 14.29 } 14.30 14.31 void zero_marked_bytes() {
15.1 --- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Thu Dec 01 13:42:41 2011 -0500 15.2 +++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp Fri Dec 02 08:52:53 2011 -0500 15.3 @@ -33,6 +33,7 @@ 15.4 #include "runtime/java.hpp" 15.5 #include "runtime/mutexLocker.hpp" 15.6 #include "runtime/virtualspace.hpp" 15.7 +#include "runtime/vmThread.hpp" 15.8 15.9 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr, 15.10 OopsInGenClosure* cl, 15.11 @@ -42,6 +43,11 @@ 15.12 assert((n_threads == 1 && ParallelGCThreads == 0) || 15.13 n_threads <= (int)ParallelGCThreads, 15.14 "# worker threads != # requested!"); 15.15 + assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread"); 15.16 + assert(UseDynamicNumberOfGCThreads || 15.17 + !FLAG_IS_DEFAULT(ParallelGCThreads) || 15.18 + n_threads == (int)ParallelGCThreads, 15.19 + "# worker threads != # requested!"); 15.20 // Make sure the LNC array is valid for the space. 15.21 jbyte** lowest_non_clean; 15.22 uintptr_t lowest_non_clean_base_chunk_index; 15.23 @@ -52,6 +58,8 @@ 15.24 15.25 int n_strides = n_threads * ParGCStridesPerThread; 15.26 SequentialSubTasksDone* pst = sp->par_seq_tasks(); 15.27 + // Sets the condition for completion of the subtask (how many threads 15.28 + // need to finish in order to be done). 15.29 pst->set_n_threads(n_threads); 15.30 pst->set_n_tasks(n_strides); 15.31
16.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Thu Dec 01 13:42:41 2011 -0500 16.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp Fri Dec 02 08:52:53 2011 -0500 16.3 @@ -305,7 +305,7 @@ 16.4 16.5 inline ParScanThreadState& thread_state(int i); 16.6 16.7 - void reset(bool promotion_failed); 16.8 + void reset(int active_workers, bool promotion_failed); 16.9 void flush(); 16.10 16.11 #if TASKQUEUE_STATS 16.12 @@ -322,6 +322,9 @@ 16.13 ParallelTaskTerminator& _term; 16.14 ParNewGeneration& _gen; 16.15 Generation& _next_gen; 16.16 + public: 16.17 + bool is_valid(int id) const { return id < length(); } 16.18 + ParallelTaskTerminator* terminator() { return &_term; } 16.19 }; 16.20 16.21 16.22 @@ -351,9 +354,9 @@ 16.23 } 16.24 16.25 16.26 -void ParScanThreadStateSet::reset(bool promotion_failed) 16.27 +void ParScanThreadStateSet::reset(int active_threads, bool promotion_failed) 16.28 { 16.29 - _term.reset_for_reuse(); 16.30 + _term.reset_for_reuse(active_threads); 16.31 if (promotion_failed) { 16.32 for (int i = 0; i < length(); ++i) { 16.33 thread_state(i).print_and_clear_promotion_failure_size(); 16.34 @@ -569,6 +572,24 @@ 16.35 _state_set(state_set) 16.36 {} 16.37 16.38 +// Reset the terminator for the given number of 16.39 +// active threads. 16.40 +void ParNewGenTask::set_for_termination(int active_workers) { 16.41 + _state_set->reset(active_workers, _gen->promotion_failed()); 16.42 + // Should the heap be passed in? There's only 1 for now so 16.43 + // grab it instead. 16.44 + GenCollectedHeap* gch = GenCollectedHeap::heap(); 16.45 + gch->set_n_termination(active_workers); 16.46 +} 16.47 + 16.48 +// The "i" passed to this method is the part of the work for 16.49 +// this thread. It is not the worker ID. The "i" is derived 16.50 +// from _started_workers which is incremented in internal_note_start() 16.51 +// called in GangWorker loop() and which is called under the 16.52 +// which is called under the protection of the gang monitor and is 16.53 +// called after a task is started. So "i" is based on 16.54 +// first-come-first-served. 16.55 + 16.56 void ParNewGenTask::work(int i) { 16.57 GenCollectedHeap* gch = GenCollectedHeap::heap(); 16.58 // Since this is being done in a separate thread, need new resource 16.59 @@ -581,6 +602,8 @@ 16.60 Generation* old_gen = gch->next_gen(_gen); 16.61 16.62 ParScanThreadState& par_scan_state = _state_set->thread_state(i); 16.63 + assert(_state_set->is_valid(i), "Should not have been called"); 16.64 + 16.65 par_scan_state.set_young_old_boundary(_young_old_boundary); 16.66 16.67 par_scan_state.start_strong_roots(); 16.68 @@ -733,7 +756,9 @@ 16.69 16.70 private: 16.71 virtual void work(int i); 16.72 - 16.73 + virtual void set_for_termination(int active_workers) { 16.74 + _state_set.terminator()->reset_for_reuse(active_workers); 16.75 + } 16.76 private: 16.77 ParNewGeneration& _gen; 16.78 ProcessTask& _task; 16.79 @@ -789,18 +814,20 @@ 16.80 GenCollectedHeap* gch = GenCollectedHeap::heap(); 16.81 assert(gch->kind() == CollectedHeap::GenCollectedHeap, 16.82 "not a generational heap"); 16.83 - WorkGang* workers = gch->workers(); 16.84 + FlexibleWorkGang* workers = gch->workers(); 16.85 assert(workers != NULL, "Need parallel worker threads."); 16.86 + _state_set.reset(workers->active_workers(), _generation.promotion_failed()); 16.87 ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(), 16.88 _generation.reserved().end(), _state_set); 16.89 workers->run_task(&rp_task); 16.90 - _state_set.reset(_generation.promotion_failed()); 16.91 + _state_set.reset(0 /* bad value in debug if not reset */, 16.92 + _generation.promotion_failed()); 16.93 } 16.94 16.95 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task) 16.96 { 16.97 GenCollectedHeap* gch = GenCollectedHeap::heap(); 16.98 - WorkGang* workers = gch->workers(); 16.99 + FlexibleWorkGang* workers = gch->workers(); 16.100 assert(workers != NULL, "Need parallel worker threads."); 16.101 ParNewRefEnqueueTaskProxy enq_task(task); 16.102 workers->run_task(&enq_task); 16.103 @@ -856,7 +883,13 @@ 16.104 assert(gch->kind() == CollectedHeap::GenCollectedHeap, 16.105 "not a CMS generational heap"); 16.106 AdaptiveSizePolicy* size_policy = gch->gen_policy()->size_policy(); 16.107 - WorkGang* workers = gch->workers(); 16.108 + FlexibleWorkGang* workers = gch->workers(); 16.109 + assert(workers != NULL, "Need workgang for parallel work"); 16.110 + int active_workers = 16.111 + AdaptiveSizePolicy::calc_active_workers(workers->total_workers(), 16.112 + workers->active_workers(), 16.113 + Threads::number_of_non_daemon_threads()); 16.114 + workers->set_active_workers(active_workers); 16.115 _next_gen = gch->next_gen(this); 16.116 assert(_next_gen != NULL, 16.117 "This must be the youngest gen, and not the only gen"); 16.118 @@ -894,13 +927,19 @@ 16.119 16.120 gch->save_marks(); 16.121 assert(workers != NULL, "Need parallel worker threads."); 16.122 - ParallelTaskTerminator _term(workers->total_workers(), task_queues()); 16.123 - ParScanThreadStateSet thread_state_set(workers->total_workers(), 16.124 + int n_workers = active_workers; 16.125 + 16.126 + // Set the correct parallelism (number of queues) in the reference processor 16.127 + ref_processor()->set_active_mt_degree(n_workers); 16.128 + 16.129 + // Always set the terminator for the active number of workers 16.130 + // because only those workers go through the termination protocol. 16.131 + ParallelTaskTerminator _term(n_workers, task_queues()); 16.132 + ParScanThreadStateSet thread_state_set(workers->active_workers(), 16.133 *to(), *this, *_next_gen, *task_queues(), 16.134 _overflow_stacks, desired_plab_sz(), _term); 16.135 16.136 ParNewGenTask tsk(this, _next_gen, reserved().end(), &thread_state_set); 16.137 - int n_workers = workers->total_workers(); 16.138 gch->set_par_threads(n_workers); 16.139 gch->rem_set()->prepare_for_younger_refs_iterate(true); 16.140 // It turns out that even when we're using 1 thread, doing the work in a 16.141 @@ -914,7 +953,8 @@ 16.142 GenCollectedHeap::StrongRootsScope srs(gch); 16.143 tsk.work(0); 16.144 } 16.145 - thread_state_set.reset(promotion_failed()); 16.146 + thread_state_set.reset(0 /* Bad value in debug if not reset */, 16.147 + promotion_failed()); 16.148 16.149 // Process (weak) reference objects found during scavenge. 16.150 ReferenceProcessor* rp = ref_processor(); 16.151 @@ -927,6 +967,8 @@ 16.152 EvacuateFollowersClosureGeneral evacuate_followers(gch, _level, 16.153 &scan_without_gc_barrier, &scan_with_gc_barrier); 16.154 rp->setup_policy(clear_all_soft_refs); 16.155 + // Can the mt_degree be set later (at run_task() time would be best)? 16.156 + rp->set_active_mt_degree(active_workers); 16.157 if (rp->processing_is_mt()) { 16.158 ParNewRefProcTaskExecutor task_executor(*this, thread_state_set); 16.159 rp->process_discovered_references(&is_alive, &keep_alive,
17.1 --- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Thu Dec 01 13:42:41 2011 -0500 17.2 +++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp Fri Dec 02 08:52:53 2011 -0500 17.3 @@ -240,6 +240,10 @@ 17.4 HeapWord* young_old_boundary() { return _young_old_boundary; } 17.5 17.6 void work(int i); 17.7 + 17.8 + // Reset the terminator in ParScanThreadStateSet for 17.9 + // "active_workers" threads. 17.10 + virtual void set_for_termination(int active_workers); 17.11 }; 17.12 17.13 class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
18.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Thu Dec 01 13:42:41 2011 -0500 18.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.cpp Fri Dec 02 08:52:53 2011 -0500 18.3 @@ -223,7 +223,8 @@ 18.4 MutableSpace* sp, 18.5 HeapWord* space_top, 18.6 PSPromotionManager* pm, 18.7 - uint stripe_number) { 18.8 + uint stripe_number, 18.9 + uint stripe_total) { 18.10 int ssize = 128; // Naked constant! Work unit = 64k. 18.11 int dirty_card_count = 0; 18.12 18.13 @@ -231,7 +232,11 @@ 18.14 jbyte* start_card = byte_for(sp->bottom()); 18.15 jbyte* end_card = byte_for(sp_top - 1) + 1; 18.16 oop* last_scanned = NULL; // Prevent scanning objects more than once 18.17 - for (jbyte* slice = start_card; slice < end_card; slice += ssize*ParallelGCThreads) { 18.18 + // The width of the stripe ssize*stripe_total must be 18.19 + // consistent with the number of stripes so that the complete slice 18.20 + // is covered. 18.21 + size_t slice_width = ssize * stripe_total; 18.22 + for (jbyte* slice = start_card; slice < end_card; slice += slice_width) { 18.23 jbyte* worker_start_card = slice + stripe_number * ssize; 18.24 if (worker_start_card >= end_card) 18.25 return; // We're done.
19.1 --- a/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Thu Dec 01 13:42:41 2011 -0500 19.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/cardTableExtension.hpp Fri Dec 02 08:52:53 2011 -0500 19.3 @@ -69,7 +69,8 @@ 19.4 MutableSpace* sp, 19.5 HeapWord* space_top, 19.6 PSPromotionManager* pm, 19.7 - uint stripe_number); 19.8 + uint stripe_number, 19.9 + uint stripe_total); 19.10 19.11 // Verification 19.12 static void verify_all_young_refs_imprecise();
20.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp Thu Dec 01 13:42:41 2011 -0500 20.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.cpp Fri Dec 02 08:52:53 2011 -0500 20.3 @@ -25,6 +25,7 @@ 20.4 #include "precompiled.hpp" 20.5 #include "gc_implementation/parallelScavenge/gcTaskManager.hpp" 20.6 #include "gc_implementation/parallelScavenge/gcTaskThread.hpp" 20.7 +#include "gc_implementation/shared/adaptiveSizePolicy.hpp" 20.8 #include "memory/allocation.hpp" 20.9 #include "memory/allocation.inline.hpp" 20.10 #include "runtime/mutex.hpp" 20.11 @@ -181,6 +182,7 @@ 20.12 } 20.13 set_insert_end(task); 20.14 increment_length(); 20.15 + verify_length(); 20.16 if (TraceGCTaskQueue) { 20.17 print("after:"); 20.18 } 20.19 @@ -192,7 +194,7 @@ 20.20 tty->print_cr("[" INTPTR_FORMAT "]" 20.21 " GCTaskQueue::enqueue(list: " 20.22 INTPTR_FORMAT ")", 20.23 - this); 20.24 + this, list); 20.25 print("before:"); 20.26 list->print("list:"); 20.27 } 20.28 @@ -211,14 +213,15 @@ 20.29 list->remove_end()->set_older(insert_end()); 20.30 insert_end()->set_newer(list->remove_end()); 20.31 set_insert_end(list->insert_end()); 20.32 + set_length(length() + list_length); 20.33 // empty the argument list. 20.34 } 20.35 - set_length(length() + list_length); 20.36 list->initialize(); 20.37 if (TraceGCTaskQueue) { 20.38 print("after:"); 20.39 list->print("list:"); 20.40 } 20.41 + verify_length(); 20.42 } 20.43 20.44 // Dequeue one task. 20.45 @@ -288,6 +291,7 @@ 20.46 decrement_length(); 20.47 assert(result->newer() == NULL, "shouldn't be on queue"); 20.48 assert(result->older() == NULL, "shouldn't be on queue"); 20.49 + verify_length(); 20.50 return result; 20.51 } 20.52 20.53 @@ -311,22 +315,40 @@ 20.54 result->set_newer(NULL); 20.55 result->set_older(NULL); 20.56 decrement_length(); 20.57 + verify_length(); 20.58 return result; 20.59 } 20.60 20.61 NOT_PRODUCT( 20.62 +// Count the elements in the queue and verify the length against 20.63 +// that count. 20.64 +void GCTaskQueue::verify_length() const { 20.65 + uint count = 0; 20.66 + for (GCTask* element = insert_end(); 20.67 + element != NULL; 20.68 + element = element->older()) { 20.69 + 20.70 + count++; 20.71 + } 20.72 + assert(count == length(), "Length does not match queue"); 20.73 +} 20.74 + 20.75 void GCTaskQueue::print(const char* message) const { 20.76 tty->print_cr("[" INTPTR_FORMAT "] GCTaskQueue:" 20.77 " insert_end: " INTPTR_FORMAT 20.78 " remove_end: " INTPTR_FORMAT 20.79 + " length: %d" 20.80 " %s", 20.81 - this, insert_end(), remove_end(), message); 20.82 + this, insert_end(), remove_end(), length(), message); 20.83 + uint count = 0; 20.84 for (GCTask* element = insert_end(); 20.85 element != NULL; 20.86 element = element->older()) { 20.87 element->print(" "); 20.88 + count++; 20.89 tty->cr(); 20.90 } 20.91 + tty->print("Total tasks: %d", count); 20.92 } 20.93 ) 20.94 20.95 @@ -351,12 +373,16 @@ 20.96 // 20.97 GCTaskManager::GCTaskManager(uint workers) : 20.98 _workers(workers), 20.99 + _active_workers(0), 20.100 + _idle_workers(0), 20.101 _ndc(NULL) { 20.102 initialize(); 20.103 } 20.104 20.105 GCTaskManager::GCTaskManager(uint workers, NotifyDoneClosure* ndc) : 20.106 _workers(workers), 20.107 + _active_workers(0), 20.108 + _idle_workers(0), 20.109 _ndc(ndc) { 20.110 initialize(); 20.111 } 20.112 @@ -373,6 +399,7 @@ 20.113 GCTaskQueue* unsynchronized_queue = GCTaskQueue::create_on_c_heap(); 20.114 _queue = SynchronizedGCTaskQueue::create(unsynchronized_queue, lock()); 20.115 _noop_task = NoopGCTask::create_on_c_heap(); 20.116 + _idle_inactive_task = WaitForBarrierGCTask::create_on_c_heap(); 20.117 _resource_flag = NEW_C_HEAP_ARRAY(bool, workers()); 20.118 { 20.119 // Set up worker threads. 20.120 @@ -418,6 +445,8 @@ 20.121 assert(queue()->is_empty(), "still have queued work"); 20.122 NoopGCTask::destroy(_noop_task); 20.123 _noop_task = NULL; 20.124 + WaitForBarrierGCTask::destroy(_idle_inactive_task); 20.125 + _idle_inactive_task = NULL; 20.126 if (_thread != NULL) { 20.127 for (uint i = 0; i < workers(); i += 1) { 20.128 GCTaskThread::destroy(thread(i)); 20.129 @@ -442,6 +471,86 @@ 20.130 } 20.131 } 20.132 20.133 +void GCTaskManager::set_active_gang() { 20.134 + _active_workers = 20.135 + AdaptiveSizePolicy::calc_active_workers(workers(), 20.136 + active_workers(), 20.137 + Threads::number_of_non_daemon_threads()); 20.138 + 20.139 + assert(!all_workers_active() || active_workers() == ParallelGCThreads, 20.140 + err_msg("all_workers_active() is incorrect: " 20.141 + "active %d ParallelGCThreads %d", active_workers(), 20.142 + ParallelGCThreads)); 20.143 + if (TraceDynamicGCThreads) { 20.144 + gclog_or_tty->print_cr("GCTaskManager::set_active_gang(): " 20.145 + "all_workers_active() %d workers %d " 20.146 + "active %d ParallelGCThreads %d ", 20.147 + all_workers_active(), workers(), active_workers(), 20.148 + ParallelGCThreads); 20.149 + } 20.150 +} 20.151 + 20.152 +// Create IdleGCTasks for inactive workers. 20.153 +// Creates tasks in a ResourceArea and assumes 20.154 +// an appropriate ResourceMark. 20.155 +void GCTaskManager::task_idle_workers() { 20.156 + { 20.157 + int more_inactive_workers = 0; 20.158 + { 20.159 + // Stop any idle tasks from exiting their IdleGCTask's 20.160 + // and get the count for additional IdleGCTask's under 20.161 + // the GCTaskManager's monitor so that the "more_inactive_workers" 20.162 + // count is correct. 20.163 + MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); 20.164 + _idle_inactive_task->set_should_wait(true); 20.165 + // active_workers are a number being requested. idle_workers 20.166 + // are the number currently idle. If all the workers are being 20.167 + // requested to be active but some are already idle, reduce 20.168 + // the number of active_workers to be consistent with the 20.169 + // number of idle_workers. The idle_workers are stuck in 20.170 + // idle tasks and will no longer be release (since a new GC 20.171 + // is starting). Try later to release enough idle_workers 20.172 + // to allow the desired number of active_workers. 20.173 + more_inactive_workers = 20.174 + workers() - active_workers() - idle_workers(); 20.175 + if (more_inactive_workers < 0) { 20.176 + int reduced_active_workers = active_workers() + more_inactive_workers; 20.177 + set_active_workers(reduced_active_workers); 20.178 + more_inactive_workers = 0; 20.179 + } 20.180 + if (TraceDynamicGCThreads) { 20.181 + gclog_or_tty->print_cr("JT: %d workers %d active %d " 20.182 + "idle %d more %d", 20.183 + Threads::number_of_non_daemon_threads(), 20.184 + workers(), 20.185 + active_workers(), 20.186 + idle_workers(), 20.187 + more_inactive_workers); 20.188 + } 20.189 + } 20.190 + GCTaskQueue* q = GCTaskQueue::create(); 20.191 + for(uint i = 0; i < (uint) more_inactive_workers; i++) { 20.192 + q->enqueue(IdleGCTask::create_on_c_heap()); 20.193 + increment_idle_workers(); 20.194 + } 20.195 + assert(workers() == active_workers() + idle_workers(), 20.196 + "total workers should equal active + inactive"); 20.197 + add_list(q); 20.198 + // GCTaskQueue* q was created in a ResourceArea so a 20.199 + // destroy() call is not needed. 20.200 + } 20.201 +} 20.202 + 20.203 +void GCTaskManager::release_idle_workers() { 20.204 + { 20.205 + MutexLockerEx ml(monitor(), 20.206 + Mutex::_no_safepoint_check_flag); 20.207 + _idle_inactive_task->set_should_wait(false); 20.208 + monitor()->notify_all(); 20.209 + // Release monitor 20.210 + } 20.211 +} 20.212 + 20.213 void GCTaskManager::print_task_time_stamps() { 20.214 for(uint i=0; i<ParallelGCThreads; i++) { 20.215 GCTaskThread* t = thread(i); 20.216 @@ -510,6 +619,13 @@ 20.217 // Release monitor(). 20.218 } 20.219 20.220 +// GC workers wait in get_task() for new work to be added 20.221 +// to the GCTaskManager's queue. When new work is added, 20.222 +// a notify is sent to the waiting GC workers which then 20.223 +// compete to get tasks. If a GC worker wakes up and there 20.224 +// is no work on the queue, it is given a noop_task to execute 20.225 +// and then loops to find more work. 20.226 + 20.227 GCTask* GCTaskManager::get_task(uint which) { 20.228 GCTask* result = NULL; 20.229 // Grab the queue lock. 20.230 @@ -558,8 +674,10 @@ 20.231 which, result, GCTask::Kind::to_string(result->kind())); 20.232 tty->print_cr(" %s", result->name()); 20.233 } 20.234 - increment_busy_workers(); 20.235 - increment_delivered_tasks(); 20.236 + if (!result->is_idle_task()) { 20.237 + increment_busy_workers(); 20.238 + increment_delivered_tasks(); 20.239 + } 20.240 return result; 20.241 // Release monitor(). 20.242 } 20.243 @@ -622,6 +740,7 @@ 20.244 20.245 uint GCTaskManager::decrement_busy_workers() { 20.246 assert(queue()->own_lock(), "don't own the lock"); 20.247 + assert(_busy_workers > 0, "About to make a mistake"); 20.248 _busy_workers -= 1; 20.249 return _busy_workers; 20.250 } 20.251 @@ -643,11 +762,28 @@ 20.252 set_resource_flag(which, false); 20.253 } 20.254 20.255 +// "list" contains tasks that are ready to execute. Those 20.256 +// tasks are added to the GCTaskManager's queue of tasks and 20.257 +// then the GC workers are notified that there is new work to 20.258 +// do. 20.259 +// 20.260 +// Typically different types of tasks can be added to the "list". 20.261 +// For example in PSScavenge OldToYoungRootsTask, SerialOldToYoungRootsTask, 20.262 +// ScavengeRootsTask, and StealTask tasks are all added to the list 20.263 +// and then the GC workers are notified of new work. The tasks are 20.264 +// handed out in the order in which they are added to the list 20.265 +// (although execution is not necessarily in that order). As long 20.266 +// as any tasks are running the GCTaskManager will wait for execution 20.267 +// to complete. GC workers that execute a stealing task remain in 20.268 +// the stealing task until all stealing tasks have completed. The load 20.269 +// balancing afforded by the stealing tasks work best if the stealing 20.270 +// tasks are added last to the list. 20.271 + 20.272 void GCTaskManager::execute_and_wait(GCTaskQueue* list) { 20.273 WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); 20.274 list->enqueue(fin); 20.275 add_list(list); 20.276 - fin->wait_for(); 20.277 + fin->wait_for(true /* reset */); 20.278 // We have to release the barrier tasks! 20.279 WaitForBarrierGCTask::destroy(fin); 20.280 } 20.281 @@ -692,6 +828,72 @@ 20.282 } 20.283 20.284 // 20.285 +// IdleGCTask 20.286 +// 20.287 + 20.288 +IdleGCTask* IdleGCTask::create() { 20.289 + IdleGCTask* result = new IdleGCTask(false); 20.290 + return result; 20.291 +} 20.292 + 20.293 +IdleGCTask* IdleGCTask::create_on_c_heap() { 20.294 + IdleGCTask* result = new(ResourceObj::C_HEAP) IdleGCTask(true); 20.295 + return result; 20.296 +} 20.297 + 20.298 +void IdleGCTask::do_it(GCTaskManager* manager, uint which) { 20.299 + WaitForBarrierGCTask* wait_for_task = manager->idle_inactive_task(); 20.300 + if (TraceGCTaskManager) { 20.301 + tty->print_cr("[" INTPTR_FORMAT "]" 20.302 + " IdleGCTask:::do_it()" 20.303 + " should_wait: %s", 20.304 + this, wait_for_task->should_wait() ? "true" : "false"); 20.305 + } 20.306 + MutexLockerEx ml(manager->monitor(), Mutex::_no_safepoint_check_flag); 20.307 + if (TraceDynamicGCThreads) { 20.308 + gclog_or_tty->print_cr("--- idle %d", which); 20.309 + } 20.310 + // Increment has to be done when the idle tasks are created. 20.311 + // manager->increment_idle_workers(); 20.312 + manager->monitor()->notify_all(); 20.313 + while (wait_for_task->should_wait()) { 20.314 + if (TraceGCTaskManager) { 20.315 + tty->print_cr("[" INTPTR_FORMAT "]" 20.316 + " IdleGCTask::do_it()" 20.317 + " [" INTPTR_FORMAT "] (%s)->wait()", 20.318 + this, manager->monitor(), manager->monitor()->name()); 20.319 + } 20.320 + manager->monitor()->wait(Mutex::_no_safepoint_check_flag, 0); 20.321 + } 20.322 + manager->decrement_idle_workers(); 20.323 + if (TraceDynamicGCThreads) { 20.324 + gclog_or_tty->print_cr("--- release %d", which); 20.325 + } 20.326 + if (TraceGCTaskManager) { 20.327 + tty->print_cr("[" INTPTR_FORMAT "]" 20.328 + " IdleGCTask::do_it() returns" 20.329 + " should_wait: %s", 20.330 + this, wait_for_task->should_wait() ? "true" : "false"); 20.331 + } 20.332 + // Release monitor(). 20.333 +} 20.334 + 20.335 +void IdleGCTask::destroy(IdleGCTask* that) { 20.336 + if (that != NULL) { 20.337 + that->destruct(); 20.338 + if (that->is_c_heap_obj()) { 20.339 + FreeHeap(that); 20.340 + } 20.341 + } 20.342 +} 20.343 + 20.344 +void IdleGCTask::destruct() { 20.345 + // This has to know it's superclass structure, just like the constructor. 20.346 + this->GCTask::destruct(); 20.347 + // Nothing else to do. 20.348 +} 20.349 + 20.350 +// 20.351 // BarrierGCTask 20.352 // 20.353 20.354 @@ -768,7 +970,8 @@ 20.355 } 20.356 20.357 WaitForBarrierGCTask* WaitForBarrierGCTask::create_on_c_heap() { 20.358 - WaitForBarrierGCTask* result = new WaitForBarrierGCTask(true); 20.359 + WaitForBarrierGCTask* result = 20.360 + new (ResourceObj::C_HEAP) WaitForBarrierGCTask(true); 20.361 return result; 20.362 } 20.363 20.364 @@ -849,7 +1052,7 @@ 20.365 } 20.366 } 20.367 20.368 -void WaitForBarrierGCTask::wait_for() { 20.369 +void WaitForBarrierGCTask::wait_for(bool reset) { 20.370 if (TraceGCTaskManager) { 20.371 tty->print_cr("[" INTPTR_FORMAT "]" 20.372 " WaitForBarrierGCTask::wait_for()" 20.373 @@ -869,7 +1072,9 @@ 20.374 monitor()->wait(Mutex::_no_safepoint_check_flag, 0); 20.375 } 20.376 // Reset the flag in case someone reuses this task. 20.377 - set_should_wait(true); 20.378 + if (reset) { 20.379 + set_should_wait(true); 20.380 + } 20.381 if (TraceGCTaskManager) { 20.382 tty->print_cr("[" INTPTR_FORMAT "]" 20.383 " WaitForBarrierGCTask::wait_for() returns"
21.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp Thu Dec 01 13:42:41 2011 -0500 21.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskManager.hpp Fri Dec 02 08:52:53 2011 -0500 21.3 @@ -45,6 +45,7 @@ 21.4 class ReleasingBarrierGCTask; 21.5 class NotifyingBarrierGCTask; 21.6 class WaitForBarrierGCTask; 21.7 +class IdleGCTask; 21.8 // A free list of Monitor*'s. 21.9 class MonitorSupply; 21.10 21.11 @@ -64,7 +65,8 @@ 21.12 unknown_task, 21.13 ordinary_task, 21.14 barrier_task, 21.15 - noop_task 21.16 + noop_task, 21.17 + idle_task 21.18 }; 21.19 static const char* to_string(kind value); 21.20 }; 21.21 @@ -108,6 +110,9 @@ 21.22 bool is_noop_task() const { 21.23 return kind()==Kind::noop_task; 21.24 } 21.25 + bool is_idle_task() const { 21.26 + return kind()==Kind::idle_task; 21.27 + } 21.28 void print(const char* message) const PRODUCT_RETURN; 21.29 protected: 21.30 // Constructors: Only create subclasses. 21.31 @@ -153,6 +158,7 @@ 21.32 assert(((insert_end() == NULL && remove_end() == NULL) || 21.33 (insert_end() != NULL && remove_end() != NULL)), 21.34 "insert_end and remove_end don't match"); 21.35 + assert((insert_end() != NULL) || (_length == 0), "Not empty"); 21.36 return insert_end() == NULL; 21.37 } 21.38 uint length() const { 21.39 @@ -204,6 +210,8 @@ 21.40 GCTask* remove(); // Remove from remove end. 21.41 GCTask* remove(GCTask* task); // Remove from the middle. 21.42 void print(const char* message) const PRODUCT_RETURN; 21.43 + // Debug support 21.44 + void verify_length() const PRODUCT_RETURN; 21.45 }; 21.46 21.47 // A GCTaskQueue that can be synchronized. 21.48 @@ -285,12 +293,76 @@ 21.49 } 21.50 }; 21.51 21.52 +// Dynamic number of GC threads 21.53 +// 21.54 +// GC threads wait in get_task() for work (i.e., a task) to perform. 21.55 +// When the number of GC threads was static, the number of tasks 21.56 +// created to do a job was equal to or greater than the maximum 21.57 +// number of GC threads (ParallelGCThreads). The job might be divided 21.58 +// into a number of tasks greater than the number of GC threads for 21.59 +// load balancing (i.e., over partitioning). The last task to be 21.60 +// executed by a GC thread in a job is a work stealing task. A 21.61 +// GC thread that gets a work stealing task continues to execute 21.62 +// that task until the job is done. In the static number of GC theads 21.63 +// case, tasks are added to a queue (FIFO). The work stealing tasks are 21.64 +// the last to be added. Once the tasks are added, the GC threads grab 21.65 +// a task and go. A single thread can do all the non-work stealing tasks 21.66 +// and then execute a work stealing and wait for all the other GC threads 21.67 +// to execute their work stealing task. 21.68 +// In the dynamic number of GC threads implementation, idle-tasks are 21.69 +// created to occupy the non-participating or "inactive" threads. An 21.70 +// idle-task makes the GC thread wait on a barrier that is part of the 21.71 +// GCTaskManager. The GC threads that have been "idled" in a IdleGCTask 21.72 +// are released once all the active GC threads have finished their work 21.73 +// stealing tasks. The GCTaskManager does not wait for all the "idled" 21.74 +// GC threads to resume execution. When those GC threads do resume 21.75 +// execution in the course of the thread scheduling, they call get_tasks() 21.76 +// as all the other GC threads do. Because all the "idled" threads are 21.77 +// not required to execute in order to finish a job, it is possible for 21.78 +// a GC thread to still be "idled" when the next job is started. Such 21.79 +// a thread stays "idled" for the next job. This can result in a new 21.80 +// job not having all the expected active workers. For example if on 21.81 +// job requests 4 active workers out of a total of 10 workers so the 21.82 +// remaining 6 are "idled", if the next job requests 6 active workers 21.83 +// but all 6 of the "idled" workers are still idle, then the next job 21.84 +// will only get 4 active workers. 21.85 +// The implementation for the parallel old compaction phase has an 21.86 +// added complication. In the static case parold partitions the chunks 21.87 +// ready to be filled into stacks, one for each GC thread. A GC thread 21.88 +// executing a draining task (drains the stack of ready chunks) 21.89 +// claims a stack according to it's id (the unique ordinal value assigned 21.90 +// to each GC thread). In the dynamic case not all GC threads will 21.91 +// actively participate so stacks with ready to fill chunks can only be 21.92 +// given to the active threads. An initial implementation chose stacks 21.93 +// number 1-n to get the ready chunks and required that GC threads 21.94 +// 1-n be the active workers. This was undesirable because it required 21.95 +// certain threads to participate. In the final implementation a 21.96 +// list of stacks equal in number to the active workers are filled 21.97 +// with ready chunks. GC threads that participate get a stack from 21.98 +// the task (DrainStacksCompactionTask), empty the stack, and then add it to a 21.99 +// recycling list at the end of the task. If the same GC thread gets 21.100 +// a second task, it gets a second stack to drain and returns it. The 21.101 +// stacks are added to a recycling list so that later stealing tasks 21.102 +// for this tasks can get a stack from the recycling list. Stealing tasks 21.103 +// use the stacks in its work in a way similar to the draining tasks. 21.104 +// A thread is not guaranteed to get anything but a stealing task and 21.105 +// a thread that only gets a stealing task has to get a stack. A failed 21.106 +// implementation tried to have the GC threads keep the stack they used 21.107 +// during a draining task for later use in the stealing task but that didn't 21.108 +// work because as noted a thread is not guaranteed to get a draining task. 21.109 +// 21.110 +// For PSScavenge and ParCompactionManager the GC threads are 21.111 +// held in the GCTaskThread** _thread array in GCTaskManager. 21.112 + 21.113 + 21.114 class GCTaskManager : public CHeapObj { 21.115 friend class ParCompactionManager; 21.116 friend class PSParallelCompact; 21.117 friend class PSScavenge; 21.118 friend class PSRefProcTaskExecutor; 21.119 friend class RefProcTaskExecutor; 21.120 + friend class GCTaskThread; 21.121 + friend class IdleGCTask; 21.122 private: 21.123 // Instance state. 21.124 NotifyDoneClosure* _ndc; // Notify on completion. 21.125 @@ -298,6 +370,7 @@ 21.126 Monitor* _monitor; // Notification of changes. 21.127 SynchronizedGCTaskQueue* _queue; // Queue of tasks. 21.128 GCTaskThread** _thread; // Array of worker threads. 21.129 + uint _active_workers; // Number of active workers. 21.130 uint _busy_workers; // Number of busy workers. 21.131 uint _blocking_worker; // The worker that's blocking. 21.132 bool* _resource_flag; // Array of flag per threads. 21.133 @@ -307,6 +380,8 @@ 21.134 uint _emptied_queue; // Times we emptied the queue. 21.135 NoopGCTask* _noop_task; // The NoopGCTask instance. 21.136 uint _noop_tasks; // Count of noop tasks. 21.137 + WaitForBarrierGCTask* _idle_inactive_task;// Task for inactive workers 21.138 + volatile uint _idle_workers; // Number of idled workers 21.139 public: 21.140 // Factory create and destroy methods. 21.141 static GCTaskManager* create(uint workers) { 21.142 @@ -324,6 +399,9 @@ 21.143 uint busy_workers() const { 21.144 return _busy_workers; 21.145 } 21.146 + volatile uint idle_workers() const { 21.147 + return _idle_workers; 21.148 + } 21.149 // Pun between Monitor* and Mutex* 21.150 Monitor* monitor() const { 21.151 return _monitor; 21.152 @@ -331,6 +409,9 @@ 21.153 Monitor * lock() const { 21.154 return _monitor; 21.155 } 21.156 + WaitForBarrierGCTask* idle_inactive_task() { 21.157 + return _idle_inactive_task; 21.158 + } 21.159 // Methods. 21.160 // Add the argument task to be run. 21.161 void add_task(GCTask* task); 21.162 @@ -350,6 +431,10 @@ 21.163 bool should_release_resources(uint which); // Predicate. 21.164 // Note the release of resources by the argument worker. 21.165 void note_release(uint which); 21.166 + // Create IdleGCTasks for inactive workers and start workers 21.167 + void task_idle_workers(); 21.168 + // Release the workers in IdleGCTasks 21.169 + void release_idle_workers(); 21.170 // Constants. 21.171 // A sentinel worker identifier. 21.172 static uint sentinel_worker() { 21.173 @@ -375,6 +460,15 @@ 21.174 uint workers() const { 21.175 return _workers; 21.176 } 21.177 + void set_active_workers(uint v) { 21.178 + assert(v <= _workers, "Trying to set more workers active than there are"); 21.179 + _active_workers = MIN2(v, _workers); 21.180 + assert(v != 0, "Trying to set active workers to 0"); 21.181 + _active_workers = MAX2(1U, _active_workers); 21.182 + } 21.183 + // Sets the number of threads that will be used in a collection 21.184 + void set_active_gang(); 21.185 + 21.186 NotifyDoneClosure* notify_done_closure() const { 21.187 return _ndc; 21.188 } 21.189 @@ -457,8 +551,21 @@ 21.190 void reset_noop_tasks() { 21.191 _noop_tasks = 0; 21.192 } 21.193 + void increment_idle_workers() { 21.194 + _idle_workers++; 21.195 + } 21.196 + void decrement_idle_workers() { 21.197 + _idle_workers--; 21.198 + } 21.199 // Other methods. 21.200 void initialize(); 21.201 + 21.202 + public: 21.203 + // Return true if all workers are currently active. 21.204 + bool all_workers_active() { return workers() == active_workers(); } 21.205 + uint active_workers() const { 21.206 + return _active_workers; 21.207 + } 21.208 }; 21.209 21.210 // 21.211 @@ -475,6 +582,8 @@ 21.212 static NoopGCTask* create(); 21.213 static NoopGCTask* create_on_c_heap(); 21.214 static void destroy(NoopGCTask* that); 21.215 + 21.216 + virtual char* name() { return (char *)"noop task"; } 21.217 // Methods from GCTask. 21.218 void do_it(GCTaskManager* manager, uint which) { 21.219 // Nothing to do. 21.220 @@ -518,6 +627,8 @@ 21.221 } 21.222 // Destructor-like method. 21.223 void destruct(); 21.224 + 21.225 + virtual char* name() { return (char *)"barrier task"; } 21.226 // Methods. 21.227 // Wait for this to be the only task running. 21.228 void do_it_internal(GCTaskManager* manager, uint which); 21.229 @@ -586,11 +697,13 @@ 21.230 // the BarrierGCTask is done. 21.231 // This may cover many of the uses of NotifyingBarrierGCTasks. 21.232 class WaitForBarrierGCTask : public BarrierGCTask { 21.233 + friend class GCTaskManager; 21.234 + friend class IdleGCTask; 21.235 private: 21.236 // Instance state. 21.237 - Monitor* _monitor; // Guard and notify changes. 21.238 - bool _should_wait; // true=>wait, false=>proceed. 21.239 - const bool _is_c_heap_obj; // Was allocated on the heap. 21.240 + Monitor* _monitor; // Guard and notify changes. 21.241 + volatile bool _should_wait; // true=>wait, false=>proceed. 21.242 + const bool _is_c_heap_obj; // Was allocated on the heap. 21.243 public: 21.244 virtual char* name() { return (char *) "waitfor-barrier-task"; } 21.245 21.246 @@ -600,7 +713,10 @@ 21.247 static void destroy(WaitForBarrierGCTask* that); 21.248 // Methods. 21.249 void do_it(GCTaskManager* manager, uint which); 21.250 - void wait_for(); 21.251 + void wait_for(bool reset); 21.252 + void set_should_wait(bool value) { 21.253 + _should_wait = value; 21.254 + } 21.255 protected: 21.256 // Constructor. Clients use factory, but there might be subclasses. 21.257 WaitForBarrierGCTask(bool on_c_heap); 21.258 @@ -613,14 +729,38 @@ 21.259 bool should_wait() const { 21.260 return _should_wait; 21.261 } 21.262 - void set_should_wait(bool value) { 21.263 - _should_wait = value; 21.264 - } 21.265 bool is_c_heap_obj() { 21.266 return _is_c_heap_obj; 21.267 } 21.268 }; 21.269 21.270 +// Task that is used to idle a GC task when fewer than 21.271 +// the maximum workers are wanted. 21.272 +class IdleGCTask : public GCTask { 21.273 + const bool _is_c_heap_obj; // Was allocated on the heap. 21.274 + public: 21.275 + bool is_c_heap_obj() { 21.276 + return _is_c_heap_obj; 21.277 + } 21.278 + // Factory create and destroy methods. 21.279 + static IdleGCTask* create(); 21.280 + static IdleGCTask* create_on_c_heap(); 21.281 + static void destroy(IdleGCTask* that); 21.282 + 21.283 + virtual char* name() { return (char *)"idle task"; } 21.284 + // Methods from GCTask. 21.285 + virtual void do_it(GCTaskManager* manager, uint which); 21.286 +protected: 21.287 + // Constructor. 21.288 + IdleGCTask(bool on_c_heap) : 21.289 + GCTask(GCTask::Kind::idle_task), 21.290 + _is_c_heap_obj(on_c_heap) { 21.291 + // Nothing to do. 21.292 + } 21.293 + // Destructor-like method. 21.294 + void destruct(); 21.295 +}; 21.296 + 21.297 class MonitorSupply : public AllStatic { 21.298 private: 21.299 // State.
22.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp Thu Dec 01 13:42:41 2011 -0500 22.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.cpp Fri Dec 02 08:52:53 2011 -0500 22.3 @@ -93,6 +93,11 @@ 22.4 st->cr(); 22.5 } 22.6 22.7 +// GC workers get tasks from the GCTaskManager and execute 22.8 +// them in this method. If there are no tasks to execute, 22.9 +// the GC workers wait in the GCTaskManager's get_task() 22.10 +// for tasks to be enqueued for execution. 22.11 + 22.12 void GCTaskThread::run() { 22.13 // Set up the thread for stack overflow support 22.14 this->record_stack_base_and_size(); 22.15 @@ -124,7 +129,6 @@ 22.16 for (; /* break */; ) { 22.17 // This will block until there is a task to be gotten. 22.18 GCTask* task = manager()->get_task(which()); 22.19 - 22.20 // In case the update is costly 22.21 if (PrintGCTaskTimeStamps) { 22.22 timer.update(); 22.23 @@ -134,18 +138,28 @@ 22.24 char* name = task->name(); 22.25 22.26 task->do_it(manager(), which()); 22.27 - manager()->note_completion(which()); 22.28 22.29 - if (PrintGCTaskTimeStamps) { 22.30 - assert(_time_stamps != NULL, "Sanity (PrintGCTaskTimeStamps set late?)"); 22.31 + if (!task->is_idle_task()) { 22.32 + manager()->note_completion(which()); 22.33 22.34 - timer.update(); 22.35 + if (PrintGCTaskTimeStamps) { 22.36 + assert(_time_stamps != NULL, 22.37 + "Sanity (PrintGCTaskTimeStamps set late?)"); 22.38 22.39 - GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++); 22.40 + timer.update(); 22.41 22.42 - time_stamp->set_name(name); 22.43 - time_stamp->set_entry_time(entry_time); 22.44 - time_stamp->set_exit_time(timer.ticks()); 22.45 + GCTaskTimeStamp* time_stamp = time_stamp_at(_time_stamp_index++); 22.46 + 22.47 + time_stamp->set_name(name); 22.48 + time_stamp->set_entry_time(entry_time); 22.49 + time_stamp->set_exit_time(timer.ticks()); 22.50 + } 22.51 + } else { 22.52 + // idle tasks complete outside the normal accounting 22.53 + // so that a task can complete without waiting for idle tasks. 22.54 + // They have to be terminated separately. 22.55 + IdleGCTask::destroy((IdleGCTask*)task); 22.56 + set_is_working(true); 22.57 } 22.58 22.59 // Check if we should release our inner resources.
23.1 --- a/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp Thu Dec 01 13:42:41 2011 -0500 23.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/gcTaskThread.hpp Fri Dec 02 08:52:53 2011 -0500 23.3 @@ -35,6 +35,7 @@ 23.4 class GCTaskManager; 23.5 23.6 class GCTaskThread : public WorkerThread { 23.7 + friend class GCTaskManager; 23.8 private: 23.9 // Instance state. 23.10 GCTaskManager* _manager; // Manager for worker. 23.11 @@ -45,6 +46,8 @@ 23.12 23.13 GCTaskTimeStamp* time_stamp_at(uint index); 23.14 23.15 + bool _is_working; // True if participating in GC tasks 23.16 + 23.17 public: 23.18 // Factory create and destroy methods. 23.19 static GCTaskThread* create(GCTaskManager* manager, 23.20 @@ -84,6 +87,7 @@ 23.21 uint processor_id() const { 23.22 return _processor_id; 23.23 } 23.24 + void set_is_working(bool v) { _is_working = v; } 23.25 }; 23.26 23.27 class GCTaskTimeStamp : public CHeapObj
24.1 --- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Thu Dec 01 13:42:41 2011 -0500 24.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp Fri Dec 02 08:52:53 2011 -0500 24.3 @@ -152,15 +152,16 @@ 24.4 { 24.5 ParallelScavengeHeap* heap = PSParallelCompact::gc_heap(); 24.6 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 24.7 + uint active_gc_threads = heap->gc_task_manager()->active_workers(); 24.8 RegionTaskQueueSet* qset = ParCompactionManager::region_array(); 24.9 - ParallelTaskTerminator terminator(parallel_gc_threads, qset); 24.10 + ParallelTaskTerminator terminator(active_gc_threads, qset); 24.11 GCTaskQueue* q = GCTaskQueue::create(); 24.12 for(uint i=0; i<parallel_gc_threads; i++) { 24.13 q->enqueue(new RefProcTaskProxy(task, i)); 24.14 } 24.15 if (task.marks_oops_alive()) { 24.16 if (parallel_gc_threads>1) { 24.17 - for (uint j=0; j<parallel_gc_threads; j++) { 24.18 + for (uint j=0; j<active_gc_threads; j++) { 24.19 q->enqueue(new StealMarkingTask(&terminator)); 24.20 } 24.21 } 24.22 @@ -216,7 +217,6 @@ 24.23 // StealRegionCompactionTask 24.24 // 24.25 24.26 - 24.27 StealRegionCompactionTask::StealRegionCompactionTask(ParallelTaskTerminator* t): 24.28 _terminator(t) {} 24.29 24.30 @@ -229,6 +229,32 @@ 24.31 ParCompactionManager* cm = 24.32 ParCompactionManager::gc_thread_compaction_manager(which); 24.33 24.34 + 24.35 + // If not all threads are active, get a draining stack 24.36 + // from the list. Else, just use this threads draining stack. 24.37 + uint which_stack_index; 24.38 + bool use_all_workers = manager->all_workers_active(); 24.39 + if (use_all_workers) { 24.40 + which_stack_index = which; 24.41 + assert(manager->active_workers() == ParallelGCThreads, 24.42 + err_msg("all_workers_active has been incorrectly set: " 24.43 + " active %d ParallelGCThreads %d", manager->active_workers(), 24.44 + ParallelGCThreads)); 24.45 + } else { 24.46 + which_stack_index = ParCompactionManager::pop_recycled_stack_index(); 24.47 + } 24.48 + 24.49 + cm->set_region_stack_index(which_stack_index); 24.50 + cm->set_region_stack(ParCompactionManager::region_list(which_stack_index)); 24.51 + if (TraceDynamicGCThreads) { 24.52 + gclog_or_tty->print_cr("StealRegionCompactionTask::do_it " 24.53 + "region_stack_index %d region_stack = 0x%x " 24.54 + " empty (%d) use all workers %d", 24.55 + which_stack_index, ParCompactionManager::region_list(which_stack_index), 24.56 + cm->region_stack()->is_empty(), 24.57 + use_all_workers); 24.58 + } 24.59 + 24.60 // Has to drain stacks first because there may be regions on 24.61 // preloaded onto the stack and this thread may never have 24.62 // done a draining task. Are the draining tasks needed? 24.63 @@ -285,6 +311,50 @@ 24.64 ParCompactionManager* cm = 24.65 ParCompactionManager::gc_thread_compaction_manager(which); 24.66 24.67 + uint which_stack_index; 24.68 + bool use_all_workers = manager->all_workers_active(); 24.69 + if (use_all_workers) { 24.70 + which_stack_index = which; 24.71 + assert(manager->active_workers() == ParallelGCThreads, 24.72 + err_msg("all_workers_active has been incorrectly set: " 24.73 + " active %d ParallelGCThreads %d", manager->active_workers(), 24.74 + ParallelGCThreads)); 24.75 + } else { 24.76 + which_stack_index = stack_index(); 24.77 + } 24.78 + 24.79 + cm->set_region_stack(ParCompactionManager::region_list(which_stack_index)); 24.80 + if (TraceDynamicGCThreads) { 24.81 + gclog_or_tty->print_cr("DrainStacksCompactionTask::do_it which = %d " 24.82 + "which_stack_index = %d/empty(%d) " 24.83 + "use all workers %d", 24.84 + which, which_stack_index, 24.85 + cm->region_stack()->is_empty(), 24.86 + use_all_workers); 24.87 + } 24.88 + 24.89 + cm->set_region_stack_index(which_stack_index); 24.90 + 24.91 // Process any regions already in the compaction managers stacks. 24.92 cm->drain_region_stacks(); 24.93 + 24.94 + assert(cm->region_stack()->is_empty(), "Not empty"); 24.95 + 24.96 + if (!use_all_workers) { 24.97 + // Always give up the region stack. 24.98 + assert(cm->region_stack() == 24.99 + ParCompactionManager::region_list(cm->region_stack_index()), 24.100 + "region_stack and region_stack_index are inconsistent"); 24.101 + ParCompactionManager::push_recycled_stack_index(cm->region_stack_index()); 24.102 + 24.103 + if (TraceDynamicGCThreads) { 24.104 + void* old_region_stack = (void*) cm->region_stack(); 24.105 + int old_region_stack_index = cm->region_stack_index(); 24.106 + gclog_or_tty->print_cr("Pushing region stack 0x%x/%d", 24.107 + old_region_stack, old_region_stack_index); 24.108 + } 24.109 + 24.110 + cm->set_region_stack(NULL); 24.111 + cm->set_region_stack_index((uint)max_uintx); 24.112 + } 24.113 }
25.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Thu Dec 01 13:42:41 2011 -0500 25.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.cpp Fri Dec 02 08:52:53 2011 -0500 25.3 @@ -1,5 +1,5 @@ 25.4 /* 25.5 - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 25.6 + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 25.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 25.8 * 25.9 * This code is free software; you can redistribute it and/or modify it 25.10 @@ -39,6 +39,9 @@ 25.11 25.12 PSOldGen* ParCompactionManager::_old_gen = NULL; 25.13 ParCompactionManager** ParCompactionManager::_manager_array = NULL; 25.14 + 25.15 +RegionTaskQueue** ParCompactionManager::_region_list = NULL; 25.16 + 25.17 OopTaskQueueSet* ParCompactionManager::_stack_array = NULL; 25.18 ParCompactionManager::ObjArrayTaskQueueSet* 25.19 ParCompactionManager::_objarray_queues = NULL; 25.20 @@ -46,8 +49,14 @@ 25.21 ParMarkBitMap* ParCompactionManager::_mark_bitmap = NULL; 25.22 RegionTaskQueueSet* ParCompactionManager::_region_array = NULL; 25.23 25.24 +uint* ParCompactionManager::_recycled_stack_index = NULL; 25.25 +int ParCompactionManager::_recycled_top = -1; 25.26 +int ParCompactionManager::_recycled_bottom = -1; 25.27 + 25.28 ParCompactionManager::ParCompactionManager() : 25.29 - _action(CopyAndUpdate) { 25.30 + _action(CopyAndUpdate), 25.31 + _region_stack(NULL), 25.32 + _region_stack_index((uint)max_uintx) { 25.33 25.34 ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap(); 25.35 assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity"); 25.36 @@ -57,7 +66,10 @@ 25.37 25.38 marking_stack()->initialize(); 25.39 _objarray_stack.initialize(); 25.40 - region_stack()->initialize(); 25.41 +} 25.42 + 25.43 +ParCompactionManager::~ParCompactionManager() { 25.44 + delete _recycled_stack_index; 25.45 } 25.46 25.47 void ParCompactionManager::initialize(ParMarkBitMap* mbm) { 25.48 @@ -72,6 +84,19 @@ 25.49 _manager_array = NEW_C_HEAP_ARRAY(ParCompactionManager*, parallel_gc_threads+1 ); 25.50 guarantee(_manager_array != NULL, "Could not allocate manager_array"); 25.51 25.52 + _region_list = NEW_C_HEAP_ARRAY(RegionTaskQueue*, 25.53 + parallel_gc_threads+1); 25.54 + guarantee(_region_list != NULL, "Could not initialize promotion manager"); 25.55 + 25.56 + _recycled_stack_index = NEW_C_HEAP_ARRAY(uint, parallel_gc_threads); 25.57 + 25.58 + // parallel_gc-threads + 1 to be consistent with the number of 25.59 + // compaction managers. 25.60 + for(uint i=0; i<parallel_gc_threads + 1; i++) { 25.61 + _region_list[i] = new RegionTaskQueue(); 25.62 + region_list(i)->initialize(); 25.63 + } 25.64 + 25.65 _stack_array = new OopTaskQueueSet(parallel_gc_threads); 25.66 guarantee(_stack_array != NULL, "Could not allocate stack_array"); 25.67 _objarray_queues = new ObjArrayTaskQueueSet(parallel_gc_threads); 25.68 @@ -85,7 +110,7 @@ 25.69 guarantee(_manager_array[i] != NULL, "Could not create ParCompactionManager"); 25.70 stack_array()->register_queue(i, _manager_array[i]->marking_stack()); 25.71 _objarray_queues->register_queue(i, &_manager_array[i]->_objarray_stack); 25.72 - region_array()->register_queue(i, _manager_array[i]->region_stack()); 25.73 + region_array()->register_queue(i, region_list(i)); 25.74 } 25.75 25.76 // The VMThread gets its own ParCompactionManager, which is not available 25.77 @@ -97,6 +122,29 @@ 25.78 "Not initialized?"); 25.79 } 25.80 25.81 +int ParCompactionManager::pop_recycled_stack_index() { 25.82 + assert(_recycled_bottom <= _recycled_top, "list is empty"); 25.83 + // Get the next available index 25.84 + if (_recycled_bottom < _recycled_top) { 25.85 + uint cur, next, last; 25.86 + do { 25.87 + cur = _recycled_bottom; 25.88 + next = cur + 1; 25.89 + last = Atomic::cmpxchg(next, &_recycled_bottom, cur); 25.90 + } while (cur != last); 25.91 + return _recycled_stack_index[next]; 25.92 + } else { 25.93 + return -1; 25.94 + } 25.95 +} 25.96 + 25.97 +void ParCompactionManager::push_recycled_stack_index(uint v) { 25.98 + // Get the next available index 25.99 + int cur = Atomic::add(1, &_recycled_top); 25.100 + _recycled_stack_index[cur] = v; 25.101 + assert(_recycled_bottom <= _recycled_top, "list top and bottom are wrong"); 25.102 +} 25.103 + 25.104 bool ParCompactionManager::should_update() { 25.105 assert(action() != NotValid, "Action is not set"); 25.106 return (action() == ParCompactionManager::Update) || 25.107 @@ -111,14 +159,13 @@ 25.108 (action() == ParCompactionManager::UpdateAndCopy); 25.109 } 25.110 25.111 -bool ParCompactionManager::should_verify_only() { 25.112 - assert(action() != NotValid, "Action is not set"); 25.113 - return action() == ParCompactionManager::VerifyUpdate; 25.114 +void ParCompactionManager::region_list_push(uint list_index, 25.115 + size_t region_index) { 25.116 + region_list(list_index)->push(region_index); 25.117 } 25.118 25.119 -bool ParCompactionManager::should_reset_only() { 25.120 - assert(action() != NotValid, "Action is not set"); 25.121 - return action() == ParCompactionManager::ResetObjects; 25.122 +void ParCompactionManager::verify_region_list_empty(uint list_index) { 25.123 + assert(region_list(list_index)->is_empty(), "Not empty"); 25.124 } 25.125 25.126 ParCompactionManager*
26.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Thu Dec 01 13:42:41 2011 -0500 26.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psCompactionManager.hpp Fri Dec 02 08:52:53 2011 -0500 26.3 @@ -1,5 +1,5 @@ 26.4 /* 26.5 - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 26.6 + * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved. 26.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 26.8 * 26.9 * This code is free software; you can redistribute it and/or modify it 26.10 @@ -48,6 +48,7 @@ 26.11 friend class StealRegionCompactionTask; 26.12 friend class UpdateAndFillClosure; 26.13 friend class RefProcTaskExecutor; 26.14 + friend class IdleGCTask; 26.15 26.16 public: 26.17 26.18 @@ -58,8 +59,6 @@ 26.19 Copy, 26.20 UpdateAndCopy, 26.21 CopyAndUpdate, 26.22 - VerifyUpdate, 26.23 - ResetObjects, 26.24 NotValid 26.25 }; 26.26 // ------------------------ End don't putback if not needed 26.27 @@ -85,7 +84,31 @@ 26.28 // Is there a way to reuse the _marking_stack for the 26.29 // saving empty regions? For now just create a different 26.30 // type of TaskQueue. 26.31 - RegionTaskQueue _region_stack; 26.32 + RegionTaskQueue* _region_stack; 26.33 + 26.34 + static RegionTaskQueue** _region_list; 26.35 + // Index in _region_list for current _region_stack. 26.36 + uint _region_stack_index; 26.37 + 26.38 + // Indexes of recycled region stacks/overflow stacks 26.39 + // Stacks of regions to be compacted are embedded in the tasks doing 26.40 + // the compaction. A thread that executes the task extracts the 26.41 + // region stack and drains it. These threads keep these region 26.42 + // stacks for use during compaction task stealing. If a thread 26.43 + // gets a second draining task, it pushed its current region stack 26.44 + // index into the array _recycled_stack_index and gets a new 26.45 + // region stack from the task. A thread that is executing a 26.46 + // compaction stealing task without ever having executing a 26.47 + // draining task, will get a region stack from _recycled_stack_index. 26.48 + // 26.49 + // Array of indexes into the array of region stacks. 26.50 + static uint* _recycled_stack_index; 26.51 + // The index into _recycled_stack_index of the last region stack index 26.52 + // pushed. If -1, there are no entries into _recycled_stack_index. 26.53 + static int _recycled_top; 26.54 + // The index into _recycled_stack_index of the last region stack index 26.55 + // popped. If -1, there has not been any entry popped. 26.56 + static int _recycled_bottom; 26.57 26.58 Stack<Klass*> _revisit_klass_stack; 26.59 Stack<DataLayout*> _revisit_mdo_stack; 26.60 @@ -104,7 +127,6 @@ 26.61 // Array of tasks. Needed by the ParallelTaskTerminator. 26.62 static RegionTaskQueueSet* region_array() { return _region_array; } 26.63 OverflowTaskQueue<oop>* marking_stack() { return &_marking_stack; } 26.64 - RegionTaskQueue* region_stack() { return &_region_stack; } 26.65 26.66 // Pushes onto the marking stack. If the marking stack is full, 26.67 // pushes onto the overflow stack. 26.68 @@ -116,10 +138,33 @@ 26.69 Action action() { return _action; } 26.70 void set_action(Action v) { _action = v; } 26.71 26.72 + RegionTaskQueue* region_stack() { return _region_stack; } 26.73 + void set_region_stack(RegionTaskQueue* v) { _region_stack = v; } 26.74 + 26.75 inline static ParCompactionManager* manager_array(int index); 26.76 26.77 + inline static RegionTaskQueue* region_list(int index) { 26.78 + return _region_list[index]; 26.79 + } 26.80 + 26.81 + uint region_stack_index() { return _region_stack_index; } 26.82 + void set_region_stack_index(uint v) { _region_stack_index = v; } 26.83 + 26.84 + // Pop and push unique reusable stack index 26.85 + static int pop_recycled_stack_index(); 26.86 + static void push_recycled_stack_index(uint v); 26.87 + static void reset_recycled_stack_index() { 26.88 + _recycled_bottom = _recycled_top = -1; 26.89 + } 26.90 + 26.91 ParCompactionManager(); 26.92 + ~ParCompactionManager(); 26.93 26.94 + // Pushes onto the region stack at the given index. If the 26.95 + // region stack is full, 26.96 + // pushes onto the region overflow stack. 26.97 + static void region_list_push(uint stack_index, size_t region_index); 26.98 + static void verify_region_list_empty(uint stack_index); 26.99 ParMarkBitMap* mark_bitmap() { return _mark_bitmap; } 26.100 26.101 // Take actions in preparation for a compaction. 26.102 @@ -129,8 +174,6 @@ 26.103 26.104 bool should_update(); 26.105 bool should_copy(); 26.106 - bool should_verify_only(); 26.107 - bool should_reset_only(); 26.108 26.109 Stack<Klass*>* revisit_klass_stack() { return &_revisit_klass_stack; } 26.110 Stack<DataLayout*>* revisit_mdo_stack() { return &_revisit_mdo_stack; }
27.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Thu Dec 01 13:42:41 2011 -0500 27.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweepDecorator.cpp Fri Dec 02 08:52:53 2011 -0500 27.3 @@ -1,5 +1,5 @@ 27.4 /* 27.5 - * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved. 27.6 + * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved. 27.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 27.8 * 27.9 * This code is free software; you can redistribute it and/or modify it 27.10 @@ -96,7 +96,8 @@ 27.11 * by the MarkSweepAlwaysCompactCount parameter. This is a significant 27.12 * performance improvement! 27.13 */ 27.14 - bool skip_dead = ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0); 27.15 + bool skip_dead = (MarkSweepAlwaysCompactCount < 1) 27.16 + || ((PSMarkSweep::total_invocations() % MarkSweepAlwaysCompactCount) != 0); 27.17 27.18 size_t allowed_deadspace = 0; 27.19 if (skip_dead) {
28.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Thu Dec 01 13:42:41 2011 -0500 28.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp Fri Dec 02 08:52:53 2011 -0500 28.3 @@ -2045,6 +2045,11 @@ 28.4 ResourceMark rm; 28.5 HandleMark hm; 28.6 28.7 + // Set the number of GC threads to be used in this collection 28.8 + gc_task_manager()->set_active_gang(); 28.9 + gc_task_manager()->task_idle_workers(); 28.10 + heap->set_par_threads(gc_task_manager()->active_workers()); 28.11 + 28.12 const bool is_system_gc = gc_cause == GCCause::_java_lang_system_gc; 28.13 28.14 // This is useful for debugging but don't change the output the 28.15 @@ -2197,6 +2202,7 @@ 28.16 // Track memory usage and detect low memory 28.17 MemoryService::track_memory_usage(); 28.18 heap->update_counters(); 28.19 + gc_task_manager()->release_idle_workers(); 28.20 } 28.21 28.22 #ifdef ASSERT 28.23 @@ -2204,7 +2210,7 @@ 28.24 ParCompactionManager* const cm = 28.25 ParCompactionManager::manager_array(int(i)); 28.26 assert(cm->marking_stack()->is_empty(), "should be empty"); 28.27 - assert(cm->region_stack()->is_empty(), "should be empty"); 28.28 + assert(ParCompactionManager::region_list(int(i))->is_empty(), "should be empty"); 28.29 assert(cm->revisit_klass_stack()->is_empty(), "should be empty"); 28.30 } 28.31 #endif // ASSERT 28.32 @@ -2351,8 +2357,9 @@ 28.33 28.34 ParallelScavengeHeap* heap = gc_heap(); 28.35 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 28.36 + uint active_gc_threads = heap->gc_task_manager()->active_workers(); 28.37 TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 28.38 - ParallelTaskTerminator terminator(parallel_gc_threads, qset); 28.39 + ParallelTaskTerminator terminator(active_gc_threads, qset); 28.40 28.41 PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm); 28.42 PSParallelCompact::FollowStackClosure follow_stack_closure(cm); 28.43 @@ -2374,21 +2381,13 @@ 28.44 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::jvmti)); 28.45 q->enqueue(new MarkFromRootsTask(MarkFromRootsTask::code_cache)); 28.46 28.47 - if (parallel_gc_threads > 1) { 28.48 - for (uint j = 0; j < parallel_gc_threads; j++) { 28.49 + if (active_gc_threads > 1) { 28.50 + for (uint j = 0; j < active_gc_threads; j++) { 28.51 q->enqueue(new StealMarkingTask(&terminator)); 28.52 } 28.53 } 28.54 28.55 - WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); 28.56 - q->enqueue(fin); 28.57 - 28.58 - gc_task_manager()->add_list(q); 28.59 - 28.60 - fin->wait_for(); 28.61 - 28.62 - // We have to release the barrier tasks! 28.63 - WaitForBarrierGCTask::destroy(fin); 28.64 + gc_task_manager()->execute_and_wait(q); 28.65 } 28.66 28.67 // Process reference objects found during marking 28.68 @@ -2483,10 +2482,22 @@ 28.69 { 28.70 TraceTime tm("drain task setup", print_phases(), true, gclog_or_tty); 28.71 28.72 - const unsigned int task_count = MAX2(parallel_gc_threads, 1U); 28.73 - for (unsigned int j = 0; j < task_count; j++) { 28.74 + // Find the threads that are active 28.75 + unsigned int which = 0; 28.76 + 28.77 + const uint task_count = MAX2(parallel_gc_threads, 1U); 28.78 + for (uint j = 0; j < task_count; j++) { 28.79 q->enqueue(new DrainStacksCompactionTask(j)); 28.80 + ParCompactionManager::verify_region_list_empty(j); 28.81 + // Set the region stacks variables to "no" region stack values 28.82 + // so that they will be recognized and needing a region stack 28.83 + // in the stealing tasks if they do not get one by executing 28.84 + // a draining stack. 28.85 + ParCompactionManager* cm = ParCompactionManager::manager_array(j); 28.86 + cm->set_region_stack(NULL); 28.87 + cm->set_region_stack_index((uint)max_uintx); 28.88 } 28.89 + ParCompactionManager::reset_recycled_stack_index(); 28.90 28.91 // Find all regions that are available (can be filled immediately) and 28.92 // distribute them to the thread stacks. The iteration is done in reverse 28.93 @@ -2495,8 +2506,10 @@ 28.94 const ParallelCompactData& sd = PSParallelCompact::summary_data(); 28.95 28.96 size_t fillable_regions = 0; // A count for diagnostic purposes. 28.97 - unsigned int which = 0; // The worker thread number. 28.98 - 28.99 + // A region index which corresponds to the tasks created above. 28.100 + // "which" must be 0 <= which < task_count 28.101 + 28.102 + which = 0; 28.103 for (unsigned int id = to_space_id; id > perm_space_id; --id) { 28.104 SpaceInfo* const space_info = _space_info + id; 28.105 MutableSpace* const space = space_info->space(); 28.106 @@ -2509,8 +2522,7 @@ 28.107 28.108 for (size_t cur = end_region - 1; cur >= beg_region; --cur) { 28.109 if (sd.region(cur)->claim_unsafe()) { 28.110 - ParCompactionManager* cm = ParCompactionManager::manager_array(which); 28.111 - cm->push_region(cur); 28.112 + ParCompactionManager::region_list_push(which, cur); 28.113 28.114 if (TraceParallelOldGCCompactionPhase && Verbose) { 28.115 const size_t count_mod_8 = fillable_regions & 7; 28.116 @@ -2521,8 +2533,10 @@ 28.117 28.118 NOT_PRODUCT(++fillable_regions;) 28.119 28.120 - // Assign regions to threads in round-robin fashion. 28.121 + // Assign regions to tasks in round-robin fashion. 28.122 if (++which == task_count) { 28.123 + assert(which <= parallel_gc_threads, 28.124 + "Inconsistent number of workers"); 28.125 which = 0; 28.126 } 28.127 } 28.128 @@ -2642,26 +2656,19 @@ 28.129 PSOldGen* old_gen = heap->old_gen(); 28.130 old_gen->start_array()->reset(); 28.131 uint parallel_gc_threads = heap->gc_task_manager()->workers(); 28.132 + uint active_gc_threads = heap->gc_task_manager()->active_workers(); 28.133 TaskQueueSetSuper* qset = ParCompactionManager::region_array(); 28.134 - ParallelTaskTerminator terminator(parallel_gc_threads, qset); 28.135 + ParallelTaskTerminator terminator(active_gc_threads, qset); 28.136 28.137 GCTaskQueue* q = GCTaskQueue::create(); 28.138 - enqueue_region_draining_tasks(q, parallel_gc_threads); 28.139 - enqueue_dense_prefix_tasks(q, parallel_gc_threads); 28.140 - enqueue_region_stealing_tasks(q, &terminator, parallel_gc_threads); 28.141 + enqueue_region_draining_tasks(q, active_gc_threads); 28.142 + enqueue_dense_prefix_tasks(q, active_gc_threads); 28.143 + enqueue_region_stealing_tasks(q, &terminator, active_gc_threads); 28.144 28.145 { 28.146 TraceTime tm_pc("par compact", print_phases(), true, gclog_or_tty); 28.147 28.148 - WaitForBarrierGCTask* fin = WaitForBarrierGCTask::create(); 28.149 - q->enqueue(fin); 28.150 - 28.151 - gc_task_manager()->add_list(q); 28.152 - 28.153 - fin->wait_for(); 28.154 - 28.155 - // We have to release the barrier tasks! 28.156 - WaitForBarrierGCTask::destroy(fin); 28.157 + gc_task_manager()->execute_and_wait(q); 28.158 28.159 #ifdef ASSERT 28.160 // Verify that all regions have been processed before the deferred updates. 28.161 @@ -2729,6 +2736,9 @@ 28.162 PSParallelCompact::follow_weak_klass_links() { 28.163 // All klasses on the revisit stack are marked at this point. 28.164 // Update and follow all subklass, sibling and implementor links. 28.165 + // Check all the stacks here even if not all the workers are active. 28.166 + // There is no accounting which indicates which stacks might have 28.167 + // contents to be followed. 28.168 if (PrintRevisitStats) { 28.169 gclog_or_tty->print_cr("#classes in system dictionary = %d", 28.170 SystemDictionary::number_of_classes()); 28.171 @@ -3360,20 +3370,7 @@ 28.172 HeapWord* beg_addr = sp->bottom(); 28.173 HeapWord* end_addr = sp->top(); 28.174 28.175 -#ifdef ASSERT 28.176 assert(beg_addr <= dp_addr && dp_addr <= end_addr, "bad dense prefix"); 28.177 - if (cm->should_verify_only()) { 28.178 - VerifyUpdateClosure verify_update(cm, sp); 28.179 - bitmap->iterate(&verify_update, beg_addr, end_addr); 28.180 - return; 28.181 - } 28.182 - 28.183 - if (cm->should_reset_only()) { 28.184 - ResetObjectsClosure reset_objects(cm); 28.185 - bitmap->iterate(&reset_objects, beg_addr, end_addr); 28.186 - return; 28.187 - } 28.188 -#endif 28.189 28.190 const size_t beg_region = sd.addr_to_region_idx(beg_addr); 28.191 const size_t dp_region = sd.addr_to_region_idx(dp_addr); 28.192 @@ -3492,35 +3489,6 @@ 28.193 return ParMarkBitMap::incomplete; 28.194 } 28.195 28.196 -// Verify the new location using the forwarding pointer 28.197 -// from MarkSweep::mark_sweep_phase2(). Set the mark_word 28.198 -// to the initial value. 28.199 -ParMarkBitMapClosure::IterationStatus 28.200 -PSParallelCompact::VerifyUpdateClosure::do_addr(HeapWord* addr, size_t words) { 28.201 - // The second arg (words) is not used. 28.202 - oop obj = (oop) addr; 28.203 - HeapWord* forwarding_ptr = (HeapWord*) obj->mark()->decode_pointer(); 28.204 - HeapWord* new_pointer = summary_data().calc_new_pointer(obj); 28.205 - if (forwarding_ptr == NULL) { 28.206 - // The object is dead or not moving. 28.207 - assert(bitmap()->is_unmarked(obj) || (new_pointer == (HeapWord*) obj), 28.208 - "Object liveness is wrong."); 28.209 - return ParMarkBitMap::incomplete; 28.210 - } 28.211 - assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 || 28.212 - forwarding_ptr == new_pointer, "new location is incorrect"); 28.213 - return ParMarkBitMap::incomplete; 28.214 -} 28.215 - 28.216 -// Reset objects modified for debug checking. 28.217 -ParMarkBitMapClosure::IterationStatus 28.218 -PSParallelCompact::ResetObjectsClosure::do_addr(HeapWord* addr, size_t words) { 28.219 - // The second arg (words) is not used. 28.220 - oop obj = (oop) addr; 28.221 - obj->init_mark(); 28.222 - return ParMarkBitMap::incomplete; 28.223 -} 28.224 - 28.225 // Prepare for compaction. This method is executed once 28.226 // (i.e., by a single thread) before compaction. 28.227 // Save the updated location of the intArrayKlassObj for
29.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Thu Dec 01 13:42:41 2011 -0500 29.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp Fri Dec 02 08:52:53 2011 -0500 29.3 @@ -832,31 +832,6 @@ 29.4 virtual void do_code_blob(CodeBlob* cb) const { } 29.5 }; 29.6 29.7 - // Closure for verifying update of pointers. Does not 29.8 - // have any side effects. 29.9 - class VerifyUpdateClosure: public ParMarkBitMapClosure { 29.10 - const MutableSpace* _space; // Is this ever used? 29.11 - 29.12 - public: 29.13 - VerifyUpdateClosure(ParCompactionManager* cm, const MutableSpace* sp) : 29.14 - ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm), _space(sp) 29.15 - { } 29.16 - 29.17 - virtual IterationStatus do_addr(HeapWord* addr, size_t words); 29.18 - 29.19 - const MutableSpace* space() { return _space; } 29.20 - }; 29.21 - 29.22 - // Closure for updating objects altered for debug checking 29.23 - class ResetObjectsClosure: public ParMarkBitMapClosure { 29.24 - public: 29.25 - ResetObjectsClosure(ParCompactionManager* cm): 29.26 - ParMarkBitMapClosure(PSParallelCompact::mark_bitmap(), cm) 29.27 - { } 29.28 - 29.29 - virtual IterationStatus do_addr(HeapWord* addr, size_t words); 29.30 - }; 29.31 - 29.32 friend class KeepAliveClosure; 29.33 friend class FollowStackClosure; 29.34 friend class AdjustPointerClosure; 29.35 @@ -1183,10 +1158,6 @@ 29.36 // Update the deferred objects in the space. 29.37 static void update_deferred_objects(ParCompactionManager* cm, SpaceId id); 29.38 29.39 - // Mark pointer and follow contents. 29.40 - template <class T> 29.41 - static inline void mark_and_follow(ParCompactionManager* cm, T* p); 29.42 - 29.43 static ParMarkBitMap* mark_bitmap() { return &_mark_bitmap; } 29.44 static ParallelCompactData& summary_data() { return _summary_data; } 29.45 29.46 @@ -1283,20 +1254,6 @@ 29.47 } 29.48 29.49 template <class T> 29.50 -inline void PSParallelCompact::mark_and_follow(ParCompactionManager* cm, 29.51 - T* p) { 29.52 - T heap_oop = oopDesc::load_heap_oop(p); 29.53 - if (!oopDesc::is_null(heap_oop)) { 29.54 - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 29.55 - if (mark_bitmap()->is_unmarked(obj)) { 29.56 - if (mark_obj(obj)) { 29.57 - obj->follow_contents(cm); 29.58 - } 29.59 - } 29.60 - } 29.61 -} 29.62 - 29.63 -template <class T> 29.64 inline void PSParallelCompact::mark_and_push(ParCompactionManager* cm, T* p) { 29.65 T heap_oop = oopDesc::load_heap_oop(p); 29.66 if (!oopDesc::is_null(heap_oop)) {
30.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Thu Dec 01 13:42:41 2011 -0500 30.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psScavenge.cpp Fri Dec 02 08:52:53 2011 -0500 30.3 @@ -181,28 +181,29 @@ 30.4 void PSRefProcTaskExecutor::execute(ProcessTask& task) 30.5 { 30.6 GCTaskQueue* q = GCTaskQueue::create(); 30.7 - for(uint i=0; i<ParallelGCThreads; i++) { 30.8 + GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager(); 30.9 + for(uint i=0; i < manager->active_workers(); i++) { 30.10 q->enqueue(new PSRefProcTaskProxy(task, i)); 30.11 } 30.12 - ParallelTaskTerminator terminator( 30.13 - ParallelScavengeHeap::gc_task_manager()->workers(), 30.14 + ParallelTaskTerminator terminator(manager->active_workers(), 30.15 (TaskQueueSetSuper*) PSPromotionManager::stack_array_depth()); 30.16 - if (task.marks_oops_alive() && ParallelGCThreads > 1) { 30.17 - for (uint j=0; j<ParallelGCThreads; j++) { 30.18 + if (task.marks_oops_alive() && manager->active_workers() > 1) { 30.19 + for (uint j = 0; j < manager->active_workers(); j++) { 30.20 q->enqueue(new StealTask(&terminator)); 30.21 } 30.22 } 30.23 - ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q); 30.24 + manager->execute_and_wait(q); 30.25 } 30.26 30.27 30.28 void PSRefProcTaskExecutor::execute(EnqueueTask& task) 30.29 { 30.30 GCTaskQueue* q = GCTaskQueue::create(); 30.31 - for(uint i=0; i<ParallelGCThreads; i++) { 30.32 + GCTaskManager* manager = ParallelScavengeHeap::gc_task_manager(); 30.33 + for(uint i=0; i < manager->active_workers(); i++) { 30.34 q->enqueue(new PSRefEnqueueTaskProxy(task, i)); 30.35 } 30.36 - ParallelScavengeHeap::gc_task_manager()->execute_and_wait(q); 30.37 + manager->execute_and_wait(q); 30.38 } 30.39 30.40 // This method contains all heap specific policy for invoking scavenge. 30.41 @@ -375,6 +376,14 @@ 30.42 // Release all previously held resources 30.43 gc_task_manager()->release_all_resources(); 30.44 30.45 + // Set the number of GC threads to be used in this collection 30.46 + gc_task_manager()->set_active_gang(); 30.47 + gc_task_manager()->task_idle_workers(); 30.48 + // Get the active number of workers here and use that value 30.49 + // throughout the methods. 30.50 + uint active_workers = gc_task_manager()->active_workers(); 30.51 + heap->set_par_threads(active_workers); 30.52 + 30.53 PSPromotionManager::pre_scavenge(); 30.54 30.55 // We'll use the promotion manager again later. 30.56 @@ -385,8 +394,9 @@ 30.57 30.58 GCTaskQueue* q = GCTaskQueue::create(); 30.59 30.60 - for(uint i=0; i<ParallelGCThreads; i++) { 30.61 - q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i)); 30.62 + uint stripe_total = active_workers; 30.63 + for(uint i=0; i < stripe_total; i++) { 30.64 + q->enqueue(new OldToYoungRootsTask(old_gen, old_top, i, stripe_total)); 30.65 } 30.66 30.67 q->enqueue(new SerialOldToYoungRootsTask(perm_gen, perm_top)); 30.68 @@ -403,10 +413,10 @@ 30.69 q->enqueue(new ScavengeRootsTask(ScavengeRootsTask::code_cache)); 30.70 30.71 ParallelTaskTerminator terminator( 30.72 - gc_task_manager()->workers(), 30.73 + active_workers, 30.74 (TaskQueueSetSuper*) promotion_manager->stack_array_depth()); 30.75 - if (ParallelGCThreads>1) { 30.76 - for (uint j=0; j<ParallelGCThreads; j++) { 30.77 + if (active_workers > 1) { 30.78 + for (uint j = 0; j < active_workers; j++) { 30.79 q->enqueue(new StealTask(&terminator)); 30.80 } 30.81 } 30.82 @@ -419,6 +429,7 @@ 30.83 // Process reference objects discovered during scavenge 30.84 { 30.85 reference_processor()->setup_policy(false); // not always_clear 30.86 + reference_processor()->set_active_mt_degree(active_workers); 30.87 PSKeepAliveClosure keep_alive(promotion_manager); 30.88 PSEvacuateFollowersClosure evac_followers(promotion_manager); 30.89 if (reference_processor()->processing_is_mt()) { 30.90 @@ -622,6 +633,8 @@ 30.91 // Track memory usage and detect low memory 30.92 MemoryService::track_memory_usage(); 30.93 heap->update_counters(); 30.94 + 30.95 + gc_task_manager()->release_idle_workers(); 30.96 } 30.97 30.98 if (VerifyAfterGC && heap->total_collections() >= VerifyGCStartAt) { 30.99 @@ -804,6 +817,7 @@ 30.100 30.101 // Initialize ref handling object for scavenging. 30.102 MemRegion mr = young_gen->reserved(); 30.103 + 30.104 _ref_processor = 30.105 new ReferenceProcessor(mr, // span 30.106 ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
31.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Thu Dec 01 13:42:41 2011 -0500 31.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp Fri Dec 02 08:52:53 2011 -0500 31.3 @@ -202,7 +202,8 @@ 31.4 _gen->object_space(), 31.5 _gen_top, 31.6 pm, 31.7 - _stripe_number); 31.8 + _stripe_number, 31.9 + _stripe_total); 31.10 31.11 // Do the real work 31.12 pm->drain_stacks(false);
32.1 --- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp Thu Dec 01 13:42:41 2011 -0500 32.2 +++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.hpp Fri Dec 02 08:52:53 2011 -0500 32.3 @@ -135,16 +135,63 @@ 32.4 // OldToYoungRootsTask 32.5 // 32.6 // This task is used to scan old to young roots in parallel 32.7 +// 32.8 +// A GC thread executing this tasks divides the generation (old gen) 32.9 +// into slices and takes a stripe in the slice as its part of the 32.10 +// work. 32.11 +// 32.12 +// +===============+ slice 0 32.13 +// | stripe 0 | 32.14 +// +---------------+ 32.15 +// | stripe 1 | 32.16 +// +---------------+ 32.17 +// | stripe 2 | 32.18 +// +---------------+ 32.19 +// | stripe 3 | 32.20 +// +===============+ slice 1 32.21 +// | stripe 0 | 32.22 +// +---------------+ 32.23 +// | stripe 1 | 32.24 +// +---------------+ 32.25 +// | stripe 2 | 32.26 +// +---------------+ 32.27 +// | stripe 3 | 32.28 +// +===============+ slice 2 32.29 +// ... 32.30 +// 32.31 +// A task is created for each stripe. In this case there are 4 tasks 32.32 +// created. A GC thread first works on its stripe within slice 0 32.33 +// and then moves to its stripe in the next slice until all stripes 32.34 +// exceed the top of the generation. Note that having fewer GC threads 32.35 +// than stripes works because all the tasks are executed so all stripes 32.36 +// will be covered. In this example if 4 tasks have been created to cover 32.37 +// all the stripes and there are only 3 threads, one of the threads will 32.38 +// get the tasks with the 4th stripe. However, there is a dependence in 32.39 +// CardTableExtension::scavenge_contents_parallel() on the number 32.40 +// of tasks created. In scavenge_contents_parallel the distance 32.41 +// to the next stripe is calculated based on the number of tasks. 32.42 +// If the stripe width is ssize, a task's next stripe is at 32.43 +// ssize * number_of_tasks (= slice_stride). In this case after 32.44 +// finishing stripe 0 in slice 0, the thread finds the stripe 0 in slice1 32.45 +// by adding slice_stride to the start of stripe 0 in slice 0 to get 32.46 +// to the start of stride 0 in slice 1. 32.47 32.48 class OldToYoungRootsTask : public GCTask { 32.49 private: 32.50 PSOldGen* _gen; 32.51 HeapWord* _gen_top; 32.52 uint _stripe_number; 32.53 + uint _stripe_total; 32.54 32.55 public: 32.56 - OldToYoungRootsTask(PSOldGen *gen, HeapWord* gen_top, uint stripe_number) : 32.57 - _gen(gen), _gen_top(gen_top), _stripe_number(stripe_number) { } 32.58 + OldToYoungRootsTask(PSOldGen *gen, 32.59 + HeapWord* gen_top, 32.60 + uint stripe_number, 32.61 + uint stripe_total) : 32.62 + _gen(gen), 32.63 + _gen_top(gen_top), 32.64 + _stripe_number(stripe_number), 32.65 + _stripe_total(stripe_total) { } 32.66 32.67 char* name() { return (char *)"old-to-young-roots-task"; } 32.68
33.1 --- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp Thu Dec 01 13:42:41 2011 -0500 33.2 +++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.cpp Fri Dec 02 08:52:53 2011 -0500 33.3 @@ -28,8 +28,10 @@ 33.4 #include "memory/collectorPolicy.hpp" 33.5 #include "runtime/timer.hpp" 33.6 #include "utilities/ostream.hpp" 33.7 +#include "utilities/workgroup.hpp" 33.8 elapsedTimer AdaptiveSizePolicy::_minor_timer; 33.9 elapsedTimer AdaptiveSizePolicy::_major_timer; 33.10 +bool AdaptiveSizePolicy::_debug_perturbation = false; 33.11 33.12 // The throughput goal is implemented as 33.13 // _throughput_goal = 1 - ( 1 / (1 + gc_cost_ratio)) 33.14 @@ -88,6 +90,134 @@ 33.15 _young_gen_policy_is_ready = false; 33.16 } 33.17 33.18 +// If the number of GC threads was set on the command line, 33.19 +// use it. 33.20 +// Else 33.21 +// Calculate the number of GC threads based on the number of Java threads. 33.22 +// Calculate the number of GC threads based on the size of the heap. 33.23 +// Use the larger. 33.24 + 33.25 +int AdaptiveSizePolicy::calc_default_active_workers(uintx total_workers, 33.26 + const uintx min_workers, 33.27 + uintx active_workers, 33.28 + uintx application_workers) { 33.29 + // If the user has specifically set the number of 33.30 + // GC threads, use them. 33.31 + 33.32 + // If the user has turned off using a dynamic number of GC threads 33.33 + // or the users has requested a specific number, set the active 33.34 + // number of workers to all the workers. 33.35 + 33.36 + uintx new_active_workers = total_workers; 33.37 + uintx prev_active_workers = active_workers; 33.38 + uintx active_workers_by_JT = 0; 33.39 + uintx active_workers_by_heap_size = 0; 33.40 + 33.41 + // Always use at least min_workers but use up to 33.42 + // GCThreadsPerJavaThreads * application threads. 33.43 + active_workers_by_JT = 33.44 + MAX2((uintx) GCWorkersPerJavaThread * application_workers, 33.45 + min_workers); 33.46 + 33.47 + // Choose a number of GC threads based on the current size 33.48 + // of the heap. This may be complicated because the size of 33.49 + // the heap depends on factors such as the thoughput goal. 33.50 + // Still a large heap should be collected by more GC threads. 33.51 + active_workers_by_heap_size = 33.52 + MAX2((size_t) 2U, Universe::heap()->capacity() / HeapSizePerGCThread); 33.53 + 33.54 + uintx max_active_workers = 33.55 + MAX2(active_workers_by_JT, active_workers_by_heap_size); 33.56 + 33.57 + // Limit the number of workers to the the number created, 33.58 + // (workers()). 33.59 + new_active_workers = MIN2(max_active_workers, 33.60 + (uintx) total_workers); 33.61 + 33.62 + // Increase GC workers instantly but decrease them more 33.63 + // slowly. 33.64 + if (new_active_workers < prev_active_workers) { 33.65 + new_active_workers = 33.66 + MAX2(min_workers, (prev_active_workers + new_active_workers) / 2); 33.67 + } 33.68 + 33.69 + // Check once more that the number of workers is within the limits. 33.70 + assert(min_workers <= total_workers, "Minimum workers not consistent with total workers"); 33.71 + assert(new_active_workers >= min_workers, "Minimum workers not observed"); 33.72 + assert(new_active_workers <= total_workers, "Total workers not observed"); 33.73 + 33.74 + if (ForceDynamicNumberOfGCThreads) { 33.75 + // Assume this is debugging and jiggle the number of GC threads. 33.76 + if (new_active_workers == prev_active_workers) { 33.77 + if (new_active_workers < total_workers) { 33.78 + new_active_workers++; 33.79 + } else if (new_active_workers > min_workers) { 33.80 + new_active_workers--; 33.81 + } 33.82 + } 33.83 + if (new_active_workers == total_workers) { 33.84 + if (_debug_perturbation) { 33.85 + new_active_workers = min_workers; 33.86 + } 33.87 + _debug_perturbation = !_debug_perturbation; 33.88 + } 33.89 + assert((new_active_workers <= (uintx) ParallelGCThreads) && 33.90 + (new_active_workers >= min_workers), 33.91 + "Jiggled active workers too much"); 33.92 + } 33.93 + 33.94 + if (TraceDynamicGCThreads) { 33.95 + gclog_or_tty->print_cr("GCTaskManager::calc_default_active_workers() : " 33.96 + "active_workers(): %d new_acitve_workers: %d " 33.97 + "prev_active_workers: %d\n" 33.98 + " active_workers_by_JT: %d active_workers_by_heap_size: %d", 33.99 + active_workers, new_active_workers, prev_active_workers, 33.100 + active_workers_by_JT, active_workers_by_heap_size); 33.101 + } 33.102 + assert(new_active_workers > 0, "Always need at least 1"); 33.103 + return new_active_workers; 33.104 +} 33.105 + 33.106 +int AdaptiveSizePolicy::calc_active_workers(uintx total_workers, 33.107 + uintx active_workers, 33.108 + uintx application_workers) { 33.109 + // If the user has specifically set the number of 33.110 + // GC threads, use them. 33.111 + 33.112 + // If the user has turned off using a dynamic number of GC threads 33.113 + // or the users has requested a specific number, set the active 33.114 + // number of workers to all the workers. 33.115 + 33.116 + int new_active_workers; 33.117 + if (!UseDynamicNumberOfGCThreads || 33.118 + (!FLAG_IS_DEFAULT(ParallelGCThreads) && !ForceDynamicNumberOfGCThreads)) { 33.119 + new_active_workers = total_workers; 33.120 + } else { 33.121 + new_active_workers = calc_default_active_workers(total_workers, 33.122 + 2, /* Minimum number of workers */ 33.123 + active_workers, 33.124 + application_workers); 33.125 + } 33.126 + assert(new_active_workers > 0, "Always need at least 1"); 33.127 + return new_active_workers; 33.128 +} 33.129 + 33.130 +int AdaptiveSizePolicy::calc_active_conc_workers(uintx total_workers, 33.131 + uintx active_workers, 33.132 + uintx application_workers) { 33.133 + if (!UseDynamicNumberOfGCThreads || 33.134 + (!FLAG_IS_DEFAULT(ConcGCThreads) && !ForceDynamicNumberOfGCThreads)) { 33.135 + return ConcGCThreads; 33.136 + } else { 33.137 + int no_of_gc_threads = calc_default_active_workers( 33.138 + total_workers, 33.139 + 1, /* Minimum number of workers */ 33.140 + active_workers, 33.141 + application_workers); 33.142 + return no_of_gc_threads; 33.143 + } 33.144 +} 33.145 + 33.146 bool AdaptiveSizePolicy::tenuring_threshold_change() const { 33.147 return decrement_tenuring_threshold_for_gc_cost() || 33.148 increment_tenuring_threshold_for_gc_cost() ||
34.1 --- a/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp Thu Dec 01 13:42:41 2011 -0500 34.2 +++ b/src/share/vm/gc_implementation/shared/adaptiveSizePolicy.hpp Fri Dec 02 08:52:53 2011 -0500 34.3 @@ -187,6 +187,8 @@ 34.4 julong _young_gen_change_for_minor_throughput; 34.5 julong _old_gen_change_for_major_throughput; 34.6 34.7 + static const uint GCWorkersPerJavaThread = 2; 34.8 + 34.9 // Accessors 34.10 34.11 double gc_pause_goal_sec() const { return _gc_pause_goal_sec; } 34.12 @@ -331,6 +333,8 @@ 34.13 // Return true if the policy suggested a change. 34.14 bool tenuring_threshold_change() const; 34.15 34.16 + static bool _debug_perturbation; 34.17 + 34.18 public: 34.19 AdaptiveSizePolicy(size_t init_eden_size, 34.20 size_t init_promo_size, 34.21 @@ -338,6 +342,31 @@ 34.22 double gc_pause_goal_sec, 34.23 uint gc_cost_ratio); 34.24 34.25 + // Return number default GC threads to use in the next GC. 34.26 + static int calc_default_active_workers(uintx total_workers, 34.27 + const uintx min_workers, 34.28 + uintx active_workers, 34.29 + uintx application_workers); 34.30 + 34.31 + // Return number of GC threads to use in the next GC. 34.32 + // This is called sparingly so as not to change the 34.33 + // number of GC workers gratuitously. 34.34 + // For ParNew collections 34.35 + // For PS scavenge and ParOld collections 34.36 + // For G1 evacuation pauses (subject to update) 34.37 + // Other collection phases inherit the number of 34.38 + // GC workers from the calls above. For example, 34.39 + // a CMS parallel remark uses the same number of GC 34.40 + // workers as the most recent ParNew collection. 34.41 + static int calc_active_workers(uintx total_workers, 34.42 + uintx active_workers, 34.43 + uintx application_workers); 34.44 + 34.45 + // Return number of GC threads to use in the next concurrent GC phase. 34.46 + static int calc_active_conc_workers(uintx total_workers, 34.47 + uintx active_workers, 34.48 + uintx application_workers); 34.49 + 34.50 bool is_gc_cms_adaptive_size_policy() { 34.51 return kind() == _gc_cms_adaptive_size_policy; 34.52 }
35.1 --- a/src/share/vm/gc_implementation/shared/markSweep.hpp Thu Dec 01 13:42:41 2011 -0500 35.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.hpp Fri Dec 02 08:52:53 2011 -0500 35.3 @@ -1,5 +1,5 @@ 35.4 /* 35.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 35.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 35.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 35.8 * 35.9 * This code is free software; you can redistribute it and/or modify it 35.10 @@ -196,8 +196,6 @@ 35.11 static void mark_object(oop obj); 35.12 // Mark pointer and follow contents. Empty marking stack afterwards. 35.13 template <class T> static inline void follow_root(T* p); 35.14 - // Mark pointer and follow contents. 35.15 - template <class T> static inline void mark_and_follow(T* p); 35.16 // Check mark and maybe push on marking stack 35.17 template <class T> static inline void mark_and_push(T* p); 35.18 static inline void push_objarray(oop obj, size_t index);
36.1 --- a/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Thu Dec 01 13:42:41 2011 -0500 36.2 +++ b/src/share/vm/gc_implementation/shared/markSweep.inline.hpp Fri Dec 02 08:52:53 2011 -0500 36.3 @@ -1,5 +1,5 @@ 36.4 /* 36.5 - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 36.6 + * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved. 36.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 36.8 * 36.9 * This code is free software; you can redistribute it and/or modify it 36.10 @@ -63,18 +63,6 @@ 36.11 follow_stack(); 36.12 } 36.13 36.14 -template <class T> inline void MarkSweep::mark_and_follow(T* p) { 36.15 -// assert(Universe::heap()->is_in_reserved(p), "should be in object space"); 36.16 - T heap_oop = oopDesc::load_heap_oop(p); 36.17 - if (!oopDesc::is_null(heap_oop)) { 36.18 - oop obj = oopDesc::decode_heap_oop_not_null(heap_oop); 36.19 - if (!obj->mark()->is_marked()) { 36.20 - mark_object(obj); 36.21 - obj->follow_contents(); 36.22 - } 36.23 - } 36.24 -} 36.25 - 36.26 template <class T> inline void MarkSweep::mark_and_push(T* p) { 36.27 // assert(Universe::heap()->is_in_reserved(p), "should be in object space"); 36.28 T heap_oop = oopDesc::load_heap_oop(p);
37.1 --- a/src/share/vm/memory/cardTableModRefBS.cpp Thu Dec 01 13:42:41 2011 -0500 37.2 +++ b/src/share/vm/memory/cardTableModRefBS.cpp Fri Dec 02 08:52:53 2011 -0500 37.3 @@ -460,9 +460,43 @@ 37.4 OopsInGenClosure* cl, 37.5 CardTableRS* ct) { 37.6 if (!mr.is_empty()) { 37.7 - int n_threads = SharedHeap::heap()->n_par_threads(); 37.8 - if (n_threads > 0) { 37.9 + // Caller (process_strong_roots()) claims that all GC threads 37.10 + // execute this call. With UseDynamicNumberOfGCThreads now all 37.11 + // active GC threads execute this call. The number of active GC 37.12 + // threads needs to be passed to par_non_clean_card_iterate_work() 37.13 + // to get proper partitioning and termination. 37.14 + // 37.15 + // This is an example of where n_par_threads() is used instead 37.16 + // of workers()->active_workers(). n_par_threads can be set to 0 to 37.17 + // turn off parallelism. For example when this code is called as 37.18 + // part of verification and SharedHeap::process_strong_roots() is being 37.19 + // used, then n_par_threads() may have been set to 0. active_workers 37.20 + // is not overloaded with the meaning that it is a switch to disable 37.21 + // parallelism and so keeps the meaning of the number of 37.22 + // active gc workers. If parallelism has not been shut off by 37.23 + // setting n_par_threads to 0, then n_par_threads should be 37.24 + // equal to active_workers. When a different mechanism for shutting 37.25 + // off parallelism is used, then active_workers can be used in 37.26 + // place of n_par_threads. 37.27 + // This is an example of a path where n_par_threads is 37.28 + // set to 0 to turn off parallism. 37.29 + // [7] CardTableModRefBS::non_clean_card_iterate() 37.30 + // [8] CardTableRS::younger_refs_in_space_iterate() 37.31 + // [9] Generation::younger_refs_in_space_iterate() 37.32 + // [10] OneContigSpaceCardGeneration::younger_refs_iterate() 37.33 + // [11] CompactingPermGenGen::younger_refs_iterate() 37.34 + // [12] CardTableRS::younger_refs_iterate() 37.35 + // [13] SharedHeap::process_strong_roots() 37.36 + // [14] G1CollectedHeap::verify() 37.37 + // [15] Universe::verify() 37.38 + // [16] G1CollectedHeap::do_collection_pause_at_safepoint() 37.39 + // 37.40 + int n_threads = SharedHeap::heap()->n_par_threads(); 37.41 + bool is_par = n_threads > 0; 37.42 + if (is_par) { 37.43 #ifndef SERIALGC 37.44 + assert(SharedHeap::heap()->n_par_threads() == 37.45 + SharedHeap::heap()->workers()->active_workers(), "Mismatch"); 37.46 non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads); 37.47 #else // SERIALGC 37.48 fatal("Parallel gc not supported here."); 37.49 @@ -489,6 +523,10 @@ 37.50 // change their values in any manner. 37.51 void CardTableModRefBS::non_clean_card_iterate_serial(MemRegion mr, 37.52 MemRegionClosure* cl) { 37.53 + bool is_par = (SharedHeap::heap()->n_par_threads() > 0); 37.54 + assert(!is_par || 37.55 + (SharedHeap::heap()->n_par_threads() == 37.56 + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); 37.57 for (int i = 0; i < _cur_covered_regions; i++) { 37.58 MemRegion mri = mr.intersection(_covered[i]); 37.59 if (mri.word_size() > 0) { 37.60 @@ -624,23 +662,6 @@ 37.61 return MemRegion(mr.end(), mr.end()); 37.62 } 37.63 37.64 -// Set all the dirty cards in the given region to "precleaned" state. 37.65 -void CardTableModRefBS::preclean_dirty_cards(MemRegion mr) { 37.66 - for (int i = 0; i < _cur_covered_regions; i++) { 37.67 - MemRegion mri = mr.intersection(_covered[i]); 37.68 - if (!mri.is_empty()) { 37.69 - jbyte *cur_entry, *limit; 37.70 - for (cur_entry = byte_for(mri.start()), limit = byte_for(mri.last()); 37.71 - cur_entry <= limit; 37.72 - cur_entry++) { 37.73 - if (*cur_entry == dirty_card) { 37.74 - *cur_entry = precleaned_card; 37.75 - } 37.76 - } 37.77 - } 37.78 - } 37.79 -} 37.80 - 37.81 uintx CardTableModRefBS::ct_max_alignment_constraint() { 37.82 return card_size * os::vm_page_size(); 37.83 }
38.1 --- a/src/share/vm/memory/cardTableModRefBS.hpp Thu Dec 01 13:42:41 2011 -0500 38.2 +++ b/src/share/vm/memory/cardTableModRefBS.hpp Fri Dec 02 08:52:53 2011 -0500 38.3 @@ -435,9 +435,6 @@ 38.4 MemRegion dirty_card_range_after_reset(MemRegion mr, bool reset, 38.5 int reset_val); 38.6 38.7 - // Set all the dirty cards in the given region to precleaned state. 38.8 - void preclean_dirty_cards(MemRegion mr); 38.9 - 38.10 // Provide read-only access to the card table array. 38.11 const jbyte* byte_for_const(const void* p) const { 38.12 return byte_for(p);
39.1 --- a/src/share/vm/memory/cardTableRS.cpp Thu Dec 01 13:42:41 2011 -0500 39.2 +++ b/src/share/vm/memory/cardTableRS.cpp Fri Dec 02 08:52:53 2011 -0500 39.3 @@ -164,7 +164,13 @@ 39.4 ClearNoncleanCardWrapper::ClearNoncleanCardWrapper( 39.5 DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) : 39.6 _dirty_card_closure(dirty_card_closure), _ct(ct) { 39.7 + // Cannot yet substitute active_workers for n_par_threads 39.8 + // in the case where parallelism is being turned off by 39.9 + // setting n_par_threads to 0. 39.10 _is_par = (SharedHeap::heap()->n_par_threads() > 0); 39.11 + assert(!_is_par || 39.12 + (SharedHeap::heap()->n_par_threads() == 39.13 + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); 39.14 } 39.15 39.16 void ClearNoncleanCardWrapper::do_MemRegion(MemRegion mr) {
40.1 --- a/src/share/vm/memory/sharedHeap.cpp Thu Dec 01 13:42:41 2011 -0500 40.2 +++ b/src/share/vm/memory/sharedHeap.cpp Fri Dec 02 08:52:53 2011 -0500 40.3 @@ -58,7 +58,6 @@ 40.4 _perm_gen(NULL), _rem_set(NULL), 40.5 _strong_roots_parity(0), 40.6 _process_strong_tasks(new SubTasksDone(SH_PS_NumElements)), 40.7 - _n_par_threads(0), 40.8 _workers(NULL) 40.9 { 40.10 if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { 40.11 @@ -80,6 +79,14 @@ 40.12 } 40.13 } 40.14 40.15 +int SharedHeap::n_termination() { 40.16 + return _process_strong_tasks->n_threads(); 40.17 +} 40.18 + 40.19 +void SharedHeap::set_n_termination(int t) { 40.20 + _process_strong_tasks->set_n_threads(t); 40.21 +} 40.22 + 40.23 bool SharedHeap::heap_lock_held_for_gc() { 40.24 Thread* t = Thread::current(); 40.25 return Heap_lock->owned_by_self() 40.26 @@ -144,6 +151,10 @@ 40.27 StrongRootsScope srs(this, activate_scope); 40.28 // General strong roots. 40.29 assert(_strong_roots_parity != 0, "must have called prologue code"); 40.30 + // _n_termination for _process_strong_tasks should be set up stream 40.31 + // in a method not running in a GC worker. Otherwise the GC worker 40.32 + // could be trying to change the termination condition while the task 40.33 + // is executing in another GC worker. 40.34 if (!_process_strong_tasks->is_task_claimed(SH_PS_Universe_oops_do)) { 40.35 Universe::oops_do(roots); 40.36 // Consider perm-gen discovered lists to be strong.
41.1 --- a/src/share/vm/memory/sharedHeap.hpp Thu Dec 01 13:42:41 2011 -0500 41.2 +++ b/src/share/vm/memory/sharedHeap.hpp Fri Dec 02 08:52:53 2011 -0500 41.3 @@ -49,6 +49,62 @@ 41.4 class CollectorPolicy; 41.5 class KlassHandle; 41.6 41.7 +// Note on use of FlexibleWorkGang's for GC. 41.8 +// There are three places where task completion is determined. 41.9 +// In 41.10 +// 1) ParallelTaskTerminator::offer_termination() where _n_threads 41.11 +// must be set to the correct value so that count of workers that 41.12 +// have offered termination will exactly match the number 41.13 +// working on the task. Tasks such as those derived from GCTask 41.14 +// use ParallelTaskTerminator's. Tasks that want load balancing 41.15 +// by work stealing use this method to gauge completion. 41.16 +// 2) SubTasksDone has a variable _n_threads that is used in 41.17 +// all_tasks_completed() to determine completion. all_tasks_complete() 41.18 +// counts the number of tasks that have been done and then reset 41.19 +// the SubTasksDone so that it can be used again. When the number of 41.20 +// tasks is set to the number of GC workers, then _n_threads must 41.21 +// be set to the number of active GC workers. G1CollectedHeap, 41.22 +// HRInto_G1RemSet, GenCollectedHeap and SharedHeap have SubTasksDone. 41.23 +// This seems too many. 41.24 +// 3) SequentialSubTasksDone has an _n_threads that is used in 41.25 +// a way similar to SubTasksDone and has the same dependency on the 41.26 +// number of active GC workers. CompactibleFreeListSpace and Space 41.27 +// have SequentialSubTasksDone's. 41.28 +// Example of using SubTasksDone and SequentialSubTasksDone 41.29 +// G1CollectedHeap::g1_process_strong_roots() calls 41.30 +// process_strong_roots(false, // no scoping; this is parallel code 41.31 +// collecting_perm_gen, so, 41.32 +// &buf_scan_non_heap_roots, 41.33 +// &eager_scan_code_roots, 41.34 +// &buf_scan_perm); 41.35 +// which delegates to SharedHeap::process_strong_roots() and uses 41.36 +// SubTasksDone* _process_strong_tasks to claim tasks. 41.37 +// process_strong_roots() calls 41.38 +// rem_set()->younger_refs_iterate(perm_gen(), perm_blk); 41.39 +// to scan the card table and which eventually calls down into 41.40 +// CardTableModRefBS::par_non_clean_card_iterate_work(). This method 41.41 +// uses SequentialSubTasksDone* _pst to claim tasks. 41.42 +// Both SubTasksDone and SequentialSubTasksDone call their method 41.43 +// all_tasks_completed() to count the number of GC workers that have 41.44 +// finished their work. That logic is "when all the workers are 41.45 +// finished the tasks are finished". 41.46 +// 41.47 +// The pattern that appears in the code is to set _n_threads 41.48 +// to a value > 1 before a task that you would like executed in parallel 41.49 +// and then to set it to 0 after that task has completed. A value of 41.50 +// 0 is a "special" value in set_n_threads() which translates to 41.51 +// setting _n_threads to 1. 41.52 +// 41.53 +// Some code uses _n_terminiation to decide if work should be done in 41.54 +// parallel. The notorious possibly_parallel_oops_do() in threads.cpp 41.55 +// is an example of such code. Look for variable "is_par" for other 41.56 +// examples. 41.57 +// 41.58 +// The active_workers is not reset to 0 after a parallel phase. It's 41.59 +// value may be used in later phases and in one instance at least 41.60 +// (the parallel remark) it has to be used (the parallel remark depends 41.61 +// on the partitioning done in the previous parallel scavenge). 41.62 + 41.63 class SharedHeap : public CollectedHeap { 41.64 friend class VMStructs; 41.65 41.66 @@ -84,11 +140,6 @@ 41.67 // If we're doing parallel GC, use this gang of threads. 41.68 FlexibleWorkGang* _workers; 41.69 41.70 - // Number of parallel threads currently working on GC tasks. 41.71 - // O indicates use sequential code; 1 means use parallel code even with 41.72 - // only one thread, for performance testing purposes. 41.73 - int _n_par_threads; 41.74 - 41.75 // Full initialization is done in a concrete subtype's "initialize" 41.76 // function. 41.77 SharedHeap(CollectorPolicy* policy_); 41.78 @@ -107,6 +158,7 @@ 41.79 CollectorPolicy *collector_policy() const { return _collector_policy; } 41.80 41.81 void set_barrier_set(BarrierSet* bs); 41.82 + SubTasksDone* process_strong_tasks() { return _process_strong_tasks; } 41.83 41.84 // Does operations required after initialization has been done. 41.85 virtual void post_initialize(); 41.86 @@ -198,13 +250,6 @@ 41.87 41.88 FlexibleWorkGang* workers() const { return _workers; } 41.89 41.90 - // Sets the number of parallel threads that will be doing tasks 41.91 - // (such as process strong roots) subsequently. 41.92 - virtual void set_par_threads(int t); 41.93 - 41.94 - // Number of threads currently working on GC tasks. 41.95 - int n_par_threads() { return _n_par_threads; } 41.96 - 41.97 // Invoke the "do_oop" method the closure "roots" on all root locations. 41.98 // If "collecting_perm_gen" is false, then roots that may only contain 41.99 // references to permGen objects are not scanned; instead, in that case, 41.100 @@ -240,6 +285,13 @@ 41.101 virtual void gc_prologue(bool full) = 0; 41.102 virtual void gc_epilogue(bool full) = 0; 41.103 41.104 + // Sets the number of parallel threads that will be doing tasks 41.105 + // (such as process strong roots) subsequently. 41.106 + virtual void set_par_threads(int t); 41.107 + 41.108 + int n_termination(); 41.109 + void set_n_termination(int t); 41.110 + 41.111 // 41.112 // New methods from CollectedHeap 41.113 //
42.1 --- a/src/share/vm/memory/space.hpp Thu Dec 01 13:42:41 2011 -0500 42.2 +++ b/src/share/vm/memory/space.hpp Fri Dec 02 08:52:53 2011 -0500 42.3 @@ -1,5 +1,5 @@ 42.4 /* 42.5 - * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. 42.6 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. 42.7 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 42.8 * 42.9 * This code is free software; you can redistribute it and/or modify it 42.10 @@ -533,7 +533,8 @@ 42.11 * by the MarkSweepAlwaysCompactCount parameter. \ 42.12 */ \ 42.13 int invocations = SharedHeap::heap()->perm_gen()->stat_record()->invocations;\ 42.14 - bool skip_dead = ((invocations % MarkSweepAlwaysCompactCount) != 0); \ 42.15 + bool skip_dead = (MarkSweepAlwaysCompactCount < 1) \ 42.16 + ||((invocations % MarkSweepAlwaysCompactCount) != 0); \ 42.17 \ 42.18 size_t allowed_deadspace = 0; \ 42.19 if (skip_dead) { \
43.1 --- a/src/share/vm/oops/objArrayOop.hpp Thu Dec 01 13:42:41 2011 -0500 43.2 +++ b/src/share/vm/oops/objArrayOop.hpp Fri Dec 02 08:52:53 2011 -0500 43.3 @@ -34,7 +34,7 @@ 43.4 friend class objArrayKlass; 43.5 friend class Runtime1; 43.6 friend class psPromotionManager; 43.7 - friend class CSMarkOopClosure; 43.8 + friend class CSetMarkOopClosure; 43.9 friend class G1ParScanPartialArrayClosure; 43.10 43.11 template <class T> T* obj_at_addr(int index) const {
44.1 --- a/src/share/vm/runtime/arguments.cpp Thu Dec 01 13:42:41 2011 -0500 44.2 +++ b/src/share/vm/runtime/arguments.cpp Fri Dec 02 08:52:53 2011 -0500 44.3 @@ -1394,8 +1394,8 @@ 44.4 // If no heap maximum was requested explicitly, use some reasonable fraction 44.5 // of the physical memory, up to a maximum of 1GB. 44.6 if (UseParallelGC) { 44.7 - FLAG_SET_ERGO(uintx, ParallelGCThreads, 44.8 - Abstract_VM_Version::parallel_worker_threads()); 44.9 + FLAG_SET_DEFAULT(ParallelGCThreads, 44.10 + Abstract_VM_Version::parallel_worker_threads()); 44.11 44.12 // If InitialSurvivorRatio or MinSurvivorRatio were not specified, but the 44.13 // SurvivorRatio has been set, reset their default values to SurvivorRatio +
45.1 --- a/src/share/vm/runtime/globals.hpp Thu Dec 01 13:42:41 2011 -0500 45.2 +++ b/src/share/vm/runtime/globals.hpp Fri Dec 02 08:52:53 2011 -0500 45.3 @@ -1416,6 +1416,21 @@ 45.4 product(uintx, ParallelGCThreads, 0, \ 45.5 "Number of parallel threads parallel gc will use") \ 45.6 \ 45.7 + product(bool, UseDynamicNumberOfGCThreads, false, \ 45.8 + "Dynamically choose the number of parallel threads " \ 45.9 + "parallel gc will use") \ 45.10 + \ 45.11 + diagnostic(bool, ForceDynamicNumberOfGCThreads, false, \ 45.12 + "Force dynamic selection of the number of" \ 45.13 + "parallel threads parallel gc will use to aid debugging") \ 45.14 + \ 45.15 + product(uintx, HeapSizePerGCThread, ScaleForWordSize(64*M), \ 45.16 + "Size of heap (bytes) per GC thread used in calculating the " \ 45.17 + "number of GC threads") \ 45.18 + \ 45.19 + product(bool, TraceDynamicGCThreads, false, \ 45.20 + "Trace the dynamic GC thread usage") \ 45.21 + \ 45.22 develop(bool, ParallelOldGCSplitALot, false, \ 45.23 "Provoke splitting (copying data from a young gen space to" \ 45.24 "multiple destination spaces)") \ 45.25 @@ -2357,7 +2372,7 @@ 45.26 develop(bool, TraceGCTaskQueue, false, \ 45.27 "Trace actions of the GC task queues") \ 45.28 \ 45.29 - develop(bool, TraceGCTaskThread, false, \ 45.30 + diagnostic(bool, TraceGCTaskThread, false, \ 45.31 "Trace actions of the GC task threads") \ 45.32 \ 45.33 product(bool, PrintParallelOldGCPhaseTimes, false, \
46.1 --- a/src/share/vm/runtime/thread.cpp Thu Dec 01 13:42:41 2011 -0500 46.2 +++ b/src/share/vm/runtime/thread.cpp Fri Dec 02 08:52:53 2011 -0500 46.3 @@ -778,12 +778,12 @@ 46.4 return true; 46.5 } else { 46.6 guarantee(res == strong_roots_parity, "Or else what?"); 46.7 - assert(SharedHeap::heap()->n_par_threads() > 0, 46.8 - "Should only fail when parallel."); 46.9 + assert(SharedHeap::heap()->workers()->active_workers() > 0, 46.10 + "Should only fail when parallel."); 46.11 return false; 46.12 } 46.13 } 46.14 - assert(SharedHeap::heap()->n_par_threads() > 0, 46.15 + assert(SharedHeap::heap()->workers()->active_workers() > 0, 46.16 "Should only fail when parallel."); 46.17 return false; 46.18 } 46.19 @@ -3939,7 +3939,15 @@ 46.20 // root groups. Overhead should be small enough to use all the time, 46.21 // even in sequential code. 46.22 SharedHeap* sh = SharedHeap::heap(); 46.23 - bool is_par = (sh->n_par_threads() > 0); 46.24 + // Cannot yet substitute active_workers for n_par_threads 46.25 + // because of G1CollectedHeap::verify() use of 46.26 + // SharedHeap::process_strong_roots(). n_par_threads == 0 will 46.27 + // turn off parallelism in process_strong_roots while active_workers 46.28 + // is being used for parallelism elsewhere. 46.29 + bool is_par = sh->n_par_threads() > 0; 46.30 + assert(!is_par || 46.31 + (SharedHeap::heap()->n_par_threads() == 46.32 + SharedHeap::heap()->workers()->active_workers()), "Mismatch"); 46.33 int cp = SharedHeap::heap()->strong_roots_parity(); 46.34 ALL_JAVA_THREADS(p) { 46.35 if (p->claim_oops_do(is_par, cp)) {
47.1 --- a/src/share/vm/services/memoryManager.cpp Thu Dec 01 13:42:41 2011 -0500 47.2 +++ b/src/share/vm/services/memoryManager.cpp Fri Dec 02 08:52:53 2011 -0500 47.3 @@ -168,10 +168,8 @@ 47.4 // initialize the arrays for memory usage 47.5 _before_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools); 47.6 _after_gc_usage_array = (MemoryUsage*) NEW_C_HEAP_ARRAY(MemoryUsage, num_pools); 47.7 - size_t len = num_pools * sizeof(MemoryUsage); 47.8 - memset(_before_gc_usage_array, 0, len); 47.9 - memset(_after_gc_usage_array, 0, len); 47.10 _usage_array_size = num_pools; 47.11 + clear(); 47.12 } 47.13 47.14 GCStatInfo::~GCStatInfo() { 47.15 @@ -304,12 +302,8 @@ 47.16 pool->set_last_collection_usage(usage); 47.17 LowMemoryDetector::detect_after_gc_memory(pool); 47.18 } 47.19 - if(is_notification_enabled()) { 47.20 - bool isMajorGC = this == MemoryService::get_major_gc_manager(); 47.21 - GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC", 47.22 - GCCause::to_string(cause)); 47.23 - } 47.24 } 47.25 + 47.26 if (countCollection) { 47.27 _num_collections++; 47.28 // alternately update two objects making one public when complete 47.29 @@ -321,6 +315,12 @@ 47.30 // reset the current stat for diagnosability purposes 47.31 _current_gc_stat->clear(); 47.32 } 47.33 + 47.34 + if (is_notification_enabled()) { 47.35 + bool isMajorGC = this == MemoryService::get_major_gc_manager(); 47.36 + GCNotifier::pushNotification(this, isMajorGC ? "end of major GC" : "end of minor GC", 47.37 + GCCause::to_string(cause)); 47.38 + } 47.39 } 47.40 } 47.41
48.1 --- a/src/share/vm/utilities/workgroup.cpp Thu Dec 01 13:42:41 2011 -0500 48.2 +++ b/src/share/vm/utilities/workgroup.cpp Fri Dec 02 08:52:53 2011 -0500 48.3 @@ -57,7 +57,6 @@ 48.4 bool are_GC_task_threads, 48.5 bool are_ConcurrentGC_threads) : 48.6 AbstractWorkGang(name, are_GC_task_threads, are_ConcurrentGC_threads) { 48.7 - // Save arguments. 48.8 _total_workers = workers; 48.9 } 48.10 48.11 @@ -127,6 +126,12 @@ 48.12 } 48.13 48.14 void WorkGang::run_task(AbstractGangTask* task) { 48.15 + run_task(task, total_workers()); 48.16 +} 48.17 + 48.18 +void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) { 48.19 + task->set_for_termination(no_of_parallel_workers); 48.20 + 48.21 // This thread is executed by the VM thread which does not block 48.22 // on ordinary MutexLocker's. 48.23 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); 48.24 @@ -143,22 +148,32 @@ 48.25 // Tell the workers to get to work. 48.26 monitor()->notify_all(); 48.27 // Wait for them to be finished 48.28 - while (finished_workers() < total_workers()) { 48.29 + while (finished_workers() < (int) no_of_parallel_workers) { 48.30 if (TraceWorkGang) { 48.31 tty->print_cr("Waiting in work gang %s: %d/%d finished sequence %d", 48.32 - name(), finished_workers(), total_workers(), 48.33 + name(), finished_workers(), no_of_parallel_workers, 48.34 _sequence_number); 48.35 } 48.36 monitor()->wait(/* no_safepoint_check */ true); 48.37 } 48.38 _task = NULL; 48.39 if (TraceWorkGang) { 48.40 - tty->print_cr("/nFinished work gang %s: %d/%d sequence %d", 48.41 - name(), finished_workers(), total_workers(), 48.42 + tty->print_cr("\nFinished work gang %s: %d/%d sequence %d", 48.43 + name(), finished_workers(), no_of_parallel_workers, 48.44 _sequence_number); 48.45 + Thread* me = Thread::current(); 48.46 + tty->print_cr(" T: 0x%x VM_thread: %d", me, me->is_VM_thread()); 48.47 } 48.48 } 48.49 48.50 +void FlexibleWorkGang::run_task(AbstractGangTask* task) { 48.51 + // If active_workers() is passed, _finished_workers 48.52 + // must only be incremented for workers that find non_null 48.53 + // work (as opposed to all those that just check that the 48.54 + // task is not null). 48.55 + WorkGang::run_task(task, (uint) active_workers()); 48.56 +} 48.57 + 48.58 void AbstractWorkGang::stop() { 48.59 // Tell all workers to terminate, then wait for them to become inactive. 48.60 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); 48.61 @@ -168,10 +183,10 @@ 48.62 _task = NULL; 48.63 _terminate = true; 48.64 monitor()->notify_all(); 48.65 - while (finished_workers() < total_workers()) { 48.66 + while (finished_workers() < active_workers()) { 48.67 if (TraceWorkGang) { 48.68 tty->print_cr("Waiting in work gang %s: %d/%d finished", 48.69 - name(), finished_workers(), total_workers()); 48.70 + name(), finished_workers(), active_workers()); 48.71 } 48.72 monitor()->wait(/* no_safepoint_check */ true); 48.73 } 48.74 @@ -275,10 +290,12 @@ 48.75 // Check for new work. 48.76 if ((data.task() != NULL) && 48.77 (data.sequence_number() != previous_sequence_number)) { 48.78 - gang()->internal_note_start(); 48.79 - gang_monitor->notify_all(); 48.80 - part = gang()->started_workers() - 1; 48.81 - break; 48.82 + if (gang()->needs_more_workers()) { 48.83 + gang()->internal_note_start(); 48.84 + gang_monitor->notify_all(); 48.85 + part = gang()->started_workers() - 1; 48.86 + break; 48.87 + } 48.88 } 48.89 // Nothing to do. 48.90 gang_monitor->wait(/* no_safepoint_check */ true); 48.91 @@ -350,6 +367,9 @@ 48.92 48.93 #endif /* PRODUCT */ 48.94 48.95 +// FlexibleWorkGang 48.96 + 48.97 + 48.98 // *** WorkGangBarrierSync 48.99 48.100 WorkGangBarrierSync::WorkGangBarrierSync() 48.101 @@ -411,10 +431,8 @@ 48.102 } 48.103 48.104 void SubTasksDone::set_n_threads(int t) { 48.105 -#ifdef ASSERT 48.106 assert(_claimed == 0 || _threads_completed == _n_threads, 48.107 "should not be called while tasks are being processed!"); 48.108 -#endif 48.109 _n_threads = (t == 0 ? 1 : t); 48.110 } 48.111
49.1 --- a/src/share/vm/utilities/workgroup.hpp Thu Dec 01 13:42:41 2011 -0500 49.2 +++ b/src/share/vm/utilities/workgroup.hpp Fri Dec 02 08:52:53 2011 -0500 49.3 @@ -96,11 +96,14 @@ 49.4 49.5 protected: 49.6 // Constructor and desctructor: only construct subclasses. 49.7 - AbstractGangTask(const char* name) { 49.8 + AbstractGangTask(const char* name) 49.9 + { 49.10 NOT_PRODUCT(_name = name); 49.11 _counter = 0; 49.12 } 49.13 virtual ~AbstractGangTask() { } 49.14 + 49.15 +public: 49.16 }; 49.17 49.18 class AbstractGangTaskWOopQueues : public AbstractGangTask { 49.19 @@ -116,6 +119,7 @@ 49.20 OopTaskQueueSet* queues() { return _queues; } 49.21 }; 49.22 49.23 + 49.24 // Class AbstractWorkGang: 49.25 // An abstract class representing a gang of workers. 49.26 // You subclass this to supply an implementation of run_task(). 49.27 @@ -130,6 +134,8 @@ 49.28 virtual void run_task(AbstractGangTask* task) = 0; 49.29 // Stop and terminate all workers. 49.30 virtual void stop(); 49.31 + // Return true if more workers should be applied to the task. 49.32 + virtual bool needs_more_workers() const { return true; } 49.33 public: 49.34 // Debugging. 49.35 const char* name() const; 49.36 @@ -287,20 +293,62 @@ 49.37 AbstractWorkGang* gang() const { return _gang; } 49.38 }; 49.39 49.40 +// Dynamic number of worker threads 49.41 +// 49.42 +// This type of work gang is used to run different numbers of 49.43 +// worker threads at different times. The 49.44 +// number of workers run for a task is "_active_workers" 49.45 +// instead of "_total_workers" in a WorkGang. The method 49.46 +// "needs_more_workers()" returns true until "_active_workers" 49.47 +// have been started and returns false afterwards. The 49.48 +// implementation of "needs_more_workers()" in WorkGang always 49.49 +// returns true so that all workers are started. The method 49.50 +// "loop()" in GangWorker was modified to ask "needs_more_workers()" 49.51 +// in its loop to decide if it should start working on a task. 49.52 +// A worker in "loop()" waits for notification on the WorkGang 49.53 +// monitor and execution of each worker as it checks for work 49.54 +// is serialized via the same monitor. The "needs_more_workers()" 49.55 +// call is serialized and additionally the calculation for the 49.56 +// "part" (effectively the worker id for executing the task) is 49.57 +// serialized to give each worker a unique "part". Workers that 49.58 +// are not needed for this tasks (i.e., "_active_workers" have 49.59 +// been started before it, continue to wait for work. 49.60 + 49.61 class FlexibleWorkGang: public WorkGang { 49.62 + // The currently active workers in this gang. 49.63 + // This is a number that is dynamically adjusted 49.64 + // and checked in the run_task() method at each invocation. 49.65 + // As described above _active_workers determines the number 49.66 + // of threads started on a task. It must also be used to 49.67 + // determine completion. 49.68 + 49.69 protected: 49.70 int _active_workers; 49.71 public: 49.72 // Constructor and destructor. 49.73 + // Initialize active_workers to a minimum value. Setting it to 49.74 + // the parameter "workers" will initialize it to a maximum 49.75 + // value which is not desirable. 49.76 FlexibleWorkGang(const char* name, int workers, 49.77 bool are_GC_task_threads, 49.78 bool are_ConcurrentGC_threads) : 49.79 - WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads) { 49.80 - _active_workers = ParallelGCThreads; 49.81 - }; 49.82 + WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads), 49.83 + _active_workers(UseDynamicNumberOfGCThreads ? 1 : ParallelGCThreads) {}; 49.84 // Accessors for fields 49.85 virtual int active_workers() const { return _active_workers; } 49.86 - void set_active_workers(int v) { _active_workers = v; } 49.87 + void set_active_workers(int v) { 49.88 + assert(v <= _total_workers, 49.89 + "Trying to set more workers active than there are"); 49.90 + _active_workers = MIN2(v, _total_workers); 49.91 + assert(v != 0, "Trying to set active workers to 0"); 49.92 + _active_workers = MAX2(1, _active_workers); 49.93 + assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers, 49.94 + "Unless dynamic should use total workers"); 49.95 + } 49.96 + virtual void run_task(AbstractGangTask* task); 49.97 + virtual bool needs_more_workers() const { 49.98 + return _started_workers < _active_workers; 49.99 + } 49.100 }; 49.101 49.102 // Work gangs in garbage collectors: 2009-06-10 49.103 @@ -357,6 +405,11 @@ 49.104 class SubTasksDone: public CHeapObj { 49.105 jint* _tasks; 49.106 int _n_tasks; 49.107 + // _n_threads is used to determine when a sub task is done. 49.108 + // It does not control how many threads will execute the subtask 49.109 + // but must be initialized to the number that do execute the task 49.110 + // in order to correctly decide when the subtask is done (all the 49.111 + // threads working on the task have finished). 49.112 int _n_threads; 49.113 jint _threads_completed; 49.114 #ifdef ASSERT
50.1 --- a/src/share/vm/utilities/yieldingWorkgroup.cpp Thu Dec 01 13:42:41 2011 -0500 50.2 +++ b/src/share/vm/utilities/yieldingWorkgroup.cpp Fri Dec 02 08:52:53 2011 -0500 50.3 @@ -125,7 +125,7 @@ 50.4 if (requested_size != 0) { 50.5 _active_workers = MIN2(requested_size, total_workers()); 50.6 } else { 50.7 - _active_workers = total_workers(); 50.8 + _active_workers = active_workers(); 50.9 } 50.10 new_task->set_actual_size(_active_workers); 50.11 new_task->set_for_termination(_active_workers); 50.12 @@ -148,22 +148,22 @@ 50.13 for (Status status = yielding_task()->status(); 50.14 status != COMPLETED && status != YIELDED && status != ABORTED; 50.15 status = yielding_task()->status()) { 50.16 - assert(started_workers() <= total_workers(), "invariant"); 50.17 - assert(finished_workers() <= total_workers(), "invariant"); 50.18 - assert(yielded_workers() <= total_workers(), "invariant"); 50.19 + assert(started_workers() <= active_workers(), "invariant"); 50.20 + assert(finished_workers() <= active_workers(), "invariant"); 50.21 + assert(yielded_workers() <= active_workers(), "invariant"); 50.22 monitor()->wait(Mutex::_no_safepoint_check_flag); 50.23 } 50.24 switch (yielding_task()->status()) { 50.25 case COMPLETED: 50.26 case ABORTED: { 50.27 - assert(finished_workers() == total_workers(), "Inconsistent status"); 50.28 + assert(finished_workers() == active_workers(), "Inconsistent status"); 50.29 assert(yielded_workers() == 0, "Invariant"); 50.30 reset(); // for next task; gang<->task binding released 50.31 break; 50.32 } 50.33 case YIELDED: { 50.34 assert(yielded_workers() > 0, "Invariant"); 50.35 - assert(yielded_workers() + finished_workers() == total_workers(), 50.36 + assert(yielded_workers() + finished_workers() == active_workers(), 50.37 "Inconsistent counts"); 50.38 break; 50.39 } 50.40 @@ -182,7 +182,6 @@ 50.41 50.42 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); 50.43 assert(task() != NULL && task() == gang_task, "Incorrect usage"); 50.44 - // assert(_active_workers == total_workers(), "For now"); 50.45 assert(_started_workers == _active_workers, "Precondition"); 50.46 assert(_yielded_workers > 0 && yielding_task()->status() == YIELDED, 50.47 "Else why are we calling continue_task()"); 50.48 @@ -202,7 +201,7 @@ 50.49 void YieldingFlexibleWorkGang::yield() { 50.50 assert(task() != NULL, "Inconsistency; should have task binding"); 50.51 MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag); 50.52 - assert(yielded_workers() < total_workers(), "Consistency check"); 50.53 + assert(yielded_workers() < active_workers(), "Consistency check"); 50.54 if (yielding_task()->status() == ABORTING) { 50.55 // Do not yield; we need to abort as soon as possible 50.56 // XXX NOTE: This can cause a performance pathology in the 50.57 @@ -213,7 +212,7 @@ 50.58 // us to return at each potential yield point. 50.59 return; 50.60 } 50.61 - if (++_yielded_workers + finished_workers() == total_workers()) { 50.62 + if (++_yielded_workers + finished_workers() == active_workers()) { 50.63 yielding_task()->set_status(YIELDED); 50.64 monitor()->notify_all(); 50.65 } else {
51.1 --- a/src/share/vm/utilities/yieldingWorkgroup.hpp Thu Dec 01 13:42:41 2011 -0500 51.2 +++ b/src/share/vm/utilities/yieldingWorkgroup.hpp Fri Dec 02 08:52:53 2011 -0500 51.3 @@ -199,17 +199,11 @@ 51.4 void abort(); 51.5 51.6 private: 51.7 - int _active_workers; 51.8 int _yielded_workers; 51.9 void wait_for_gang(); 51.10 51.11 public: 51.12 // Accessors for fields 51.13 - int active_workers() const { 51.14 - return _active_workers; 51.15 - } 51.16 - 51.17 - // Accessors for fields 51.18 int yielded_workers() const { 51.19 return _yielded_workers; 51.20 }