1.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Sep 16 13:45:55 2010 -0700 1.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Sep 20 14:38:38 2010 -0700 1.3 @@ -195,7 +195,7 @@ 1.4 "Offset of FreeChunk::_prev within FreeChunk must match" 1.5 " that of OopDesc::_klass within OopDesc"); 1.6 ) 1.7 - if (ParallelGCThreads > 0) { 1.8 + if (CollectedHeap::use_parallel_gc_threads()) { 1.9 typedef CMSParGCThreadState* CMSParGCThreadStatePtr; 1.10 _par_gc_thread_states = 1.11 NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); 1.12 @@ -616,7 +616,7 @@ 1.13 } 1.14 1.15 // Support for multi-threaded concurrent phases 1.16 - if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { 1.17 + if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { 1.18 if (FLAG_IS_DEFAULT(ConcGCThreads)) { 1.19 // just for now 1.20 FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); 1.21 @@ -628,6 +628,8 @@ 1.22 warning("GC/CMS: _conc_workers allocation failure: " 1.23 "forcing -CMSConcurrentMTEnabled"); 1.24 CMSConcurrentMTEnabled = false; 1.25 + } else { 1.26 + _conc_workers->initialize_workers(); 1.27 } 1.28 } else { 1.29 CMSConcurrentMTEnabled = false; 1.30 @@ -936,7 +938,7 @@ 1.31 // along with all the other pointers into the heap but 1.32 // compaction is expected to be a rare event with 1.33 // a heap using cms so don't do it without seeing the need. 1.34 - if (ParallelGCThreads > 0) { 1.35 + if (CollectedHeap::use_parallel_gc_threads()) { 1.36 for (uint i = 0; i < ParallelGCThreads; i++) { 1.37 _par_gc_thread_states[i]->promo.reset(); 1.38 } 1.39 @@ -2630,7 +2632,8 @@ 1.40 // Should call gc_prologue_work() for all cms gens we are responsible for 1.41 bool registerClosure = _collectorState >= Marking 1.42 && _collectorState < Sweeping; 1.43 - ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar 1.44 + ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ? 1.45 + &_modUnionClosurePar 1.46 : &_modUnionClosure; 1.47 _cmsGen->gc_prologue_work(full, registerClosure, muc); 1.48 _permGen->gc_prologue_work(full, registerClosure, muc); 1.49 @@ -2731,7 +2734,7 @@ 1.50 collector()->gc_epilogue(full); 1.51 1.52 // Also reset promotion tracking in par gc thread states. 1.53 - if (ParallelGCThreads > 0) { 1.54 + if (CollectedHeap::use_parallel_gc_threads()) { 1.55 for (uint i = 0; i < ParallelGCThreads; i++) { 1.56 _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); 1.57 } 1.58 @@ -3731,7 +3734,6 @@ 1.59 // MT Concurrent Marking Task 1.60 class CMSConcMarkingTask: public YieldingFlexibleGangTask { 1.61 CMSCollector* _collector; 1.62 - YieldingFlexibleWorkGang* _workers; // the whole gang 1.63 int _n_workers; // requested/desired # workers 1.64 bool _asynch; 1.65 bool _result; 1.66 @@ -3751,21 +3753,19 @@ 1.67 CMSConcMarkingTask(CMSCollector* collector, 1.68 CompactibleFreeListSpace* cms_space, 1.69 CompactibleFreeListSpace* perm_space, 1.70 - bool asynch, int n_workers, 1.71 + bool asynch, 1.72 YieldingFlexibleWorkGang* workers, 1.73 OopTaskQueueSet* task_queues): 1.74 YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), 1.75 _collector(collector), 1.76 _cms_space(cms_space), 1.77 _perm_space(perm_space), 1.78 - _asynch(asynch), _n_workers(n_workers), _result(true), 1.79 - _workers(workers), _task_queues(task_queues), 1.80 - _term(n_workers, task_queues, _collector, asynch), 1.81 + _asynch(asynch), _n_workers(0), _result(true), 1.82 + _task_queues(task_queues), 1.83 + _term(_n_workers, task_queues, _collector, asynch), 1.84 _bit_map_lock(collector->bitMapLock()) 1.85 { 1.86 - assert(n_workers <= workers->total_workers(), 1.87 - "Else termination won't work correctly today"); // XXX FIX ME! 1.88 - _requested_size = n_workers; 1.89 + _requested_size = _n_workers; 1.90 _term.set_task(this); 1.91 assert(_cms_space->bottom() < _perm_space->bottom(), 1.92 "Finger incorrectly initialized below"); 1.93 @@ -3781,6 +3781,10 @@ 1.94 1.95 CMSConcMarkingTerminator* terminator() { return &_term; } 1.96 1.97 + virtual void set_for_termination(int active_workers) { 1.98 + terminator()->reset_for_reuse(active_workers); 1.99 + } 1.100 + 1.101 void work(int i); 1.102 1.103 virtual void coordinator_yield(); // stuff done by coordinator 1.104 @@ -4220,9 +4224,12 @@ 1.105 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 1.106 CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); 1.107 1.108 - CMSConcMarkingTask tsk(this, cms_space, perm_space, 1.109 - asynch, num_workers /* number requested XXX */, 1.110 - conc_workers(), task_queues()); 1.111 + CMSConcMarkingTask tsk(this, 1.112 + cms_space, 1.113 + perm_space, 1.114 + asynch, 1.115 + conc_workers(), 1.116 + task_queues()); 1.117 1.118 // Since the actual number of workers we get may be different 1.119 // from the number we requested above, do we need to do anything different 1.120 @@ -4326,6 +4333,10 @@ 1.121 verify_overflow_empty(); 1.122 _abort_preclean = false; 1.123 if (CMSPrecleaningEnabled) { 1.124 + // Precleaning is currently not MT but the reference processor 1.125 + // may be set for MT. Disable it temporarily here. 1.126 + ReferenceProcessor* rp = ref_processor(); 1.127 + ReferenceProcessorMTProcMutator z(rp, false); 1.128 _eden_chunk_index = 0; 1.129 size_t used = get_eden_used(); 1.130 size_t capacity = get_eden_capacity(); 1.131 @@ -4918,7 +4929,7 @@ 1.132 // dirtied since the first checkpoint in this GC cycle and prior to 1.133 // the most recent young generation GC, minus those cleaned up by the 1.134 // concurrent precleaning. 1.135 - if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { 1.136 + if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) { 1.137 TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); 1.138 do_remark_parallel(); 1.139 } else { 1.140 @@ -5012,7 +5023,6 @@ 1.141 // Parallel remark task 1.142 class CMSParRemarkTask: public AbstractGangTask { 1.143 CMSCollector* _collector; 1.144 - WorkGang* _workers; 1.145 int _n_workers; 1.146 CompactibleFreeListSpace* _cms_space; 1.147 CompactibleFreeListSpace* _perm_space; 1.148 @@ -5025,21 +5035,21 @@ 1.149 CMSParRemarkTask(CMSCollector* collector, 1.150 CompactibleFreeListSpace* cms_space, 1.151 CompactibleFreeListSpace* perm_space, 1.152 - int n_workers, WorkGang* workers, 1.153 + int n_workers, FlexibleWorkGang* workers, 1.154 OopTaskQueueSet* task_queues): 1.155 AbstractGangTask("Rescan roots and grey objects in parallel"), 1.156 _collector(collector), 1.157 _cms_space(cms_space), _perm_space(perm_space), 1.158 _n_workers(n_workers), 1.159 - _workers(workers), 1.160 _task_queues(task_queues), 1.161 - _term(workers->total_workers(), task_queues) { } 1.162 + _term(n_workers, task_queues) { } 1.163 1.164 OopTaskQueueSet* task_queues() { return _task_queues; } 1.165 1.166 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 1.167 1.168 ParallelTaskTerminator* terminator() { return &_term; } 1.169 + int n_workers() { return _n_workers; } 1.170 1.171 void work(int i); 1.172 1.173 @@ -5057,6 +5067,11 @@ 1.174 void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); 1.175 }; 1.176 1.177 +// work_queue(i) is passed to the closure 1.178 +// Par_MarkRefsIntoAndScanClosure. The "i" parameter 1.179 +// also is passed to do_dirty_card_rescan_tasks() and to 1.180 +// do_work_steal() to select the i-th task_queue. 1.181 + 1.182 void CMSParRemarkTask::work(int i) { 1.183 elapsedTimer _timer; 1.184 ResourceMark rm; 1.185 @@ -5128,6 +5143,7 @@ 1.186 1.187 // Do the rescan tasks for each of the two spaces 1.188 // (cms_space and perm_space) in turn. 1.189 + // "i" is passed to select the "i-th" task_queue 1.190 do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); 1.191 do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); 1.192 _timer.stop(); 1.193 @@ -5150,6 +5166,7 @@ 1.194 } 1.195 } 1.196 1.197 +// Note that parameter "i" is not used. 1.198 void 1.199 CMSParRemarkTask::do_young_space_rescan(int i, 1.200 Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, 1.201 @@ -5309,8 +5326,13 @@ 1.202 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, 1.203 (size_t)ParGCDesiredObjsFromOverflowList); 1.204 // Now check if there's any work in the overflow list 1.205 + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, 1.206 + // only affects the number of attempts made to get work from the 1.207 + // overflow list and does not affect the number of workers. Just 1.208 + // pass ParallelGCThreads so this behavior is unchanged. 1.209 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 1.210 - work_q)) { 1.211 + work_q, 1.212 + ParallelGCThreads)) { 1.213 // found something in global overflow list; 1.214 // not yet ready to go stealing work from others. 1.215 // We'd like to assert(work_q->size() != 0, ...) 1.216 @@ -5367,11 +5389,12 @@ 1.217 // Merge the per-thread plab arrays into the global survivor chunk 1.218 // array which will provide the partitioning of the survivor space 1.219 // for CMS rescan. 1.220 -void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { 1.221 +void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv, 1.222 + int no_of_gc_threads) { 1.223 assert(_survivor_plab_array != NULL, "Error"); 1.224 assert(_survivor_chunk_array != NULL, "Error"); 1.225 assert(_collectorState == FinalMarking, "Error"); 1.226 - for (uint j = 0; j < ParallelGCThreads; j++) { 1.227 + for (int j = 0; j < no_of_gc_threads; j++) { 1.228 _cursor[j] = 0; 1.229 } 1.230 HeapWord* top = surv->top(); 1.231 @@ -5379,7 +5402,7 @@ 1.232 for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries 1.233 HeapWord* min_val = top; // Higher than any PLAB address 1.234 uint min_tid = 0; // position of min_val this round 1.235 - for (uint j = 0; j < ParallelGCThreads; j++) { 1.236 + for (int j = 0; j < no_of_gc_threads; j++) { 1.237 ChunkArray* cur_sca = &_survivor_plab_array[j]; 1.238 if (_cursor[j] == cur_sca->end()) { 1.239 continue; 1.240 @@ -5413,7 +5436,7 @@ 1.241 // Verify that we used up all the recorded entries 1.242 #ifdef ASSERT 1.243 size_t total = 0; 1.244 - for (uint j = 0; j < ParallelGCThreads; j++) { 1.245 + for (int j = 0; j < no_of_gc_threads; j++) { 1.246 assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); 1.247 total += _cursor[j]; 1.248 } 1.249 @@ -5448,13 +5471,15 @@ 1.250 // Each valid entry in [0, _eden_chunk_index) represents a task. 1.251 size_t n_tasks = _eden_chunk_index + 1; 1.252 assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); 1.253 - pst->set_par_threads(n_threads); 1.254 + // Sets the condition for completion of the subtask (how many threads 1.255 + // need to finish in order to be done). 1.256 + pst->set_n_threads(n_threads); 1.257 pst->set_n_tasks((int)n_tasks); 1.258 } 1.259 1.260 // Merge the survivor plab arrays into _survivor_chunk_array 1.261 if (_survivor_plab_array != NULL) { 1.262 - merge_survivor_plab_arrays(dng->from()); 1.263 + merge_survivor_plab_arrays(dng->from(), n_threads); 1.264 } else { 1.265 assert(_survivor_chunk_index == 0, "Error"); 1.266 } 1.267 @@ -5463,7 +5488,9 @@ 1.268 { 1.269 SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); 1.270 assert(!pst->valid(), "Clobbering existing data?"); 1.271 - pst->set_par_threads(n_threads); 1.272 + // Sets the condition for completion of the subtask (how many threads 1.273 + // need to finish in order to be done). 1.274 + pst->set_n_threads(n_threads); 1.275 pst->set_n_tasks(1); 1.276 assert(pst->valid(), "Error"); 1.277 } 1.278 @@ -5474,7 +5501,9 @@ 1.279 assert(!pst->valid(), "Clobbering existing data?"); 1.280 size_t n_tasks = _survivor_chunk_index + 1; 1.281 assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); 1.282 - pst->set_par_threads(n_threads); 1.283 + // Sets the condition for completion of the subtask (how many threads 1.284 + // need to finish in order to be done). 1.285 + pst->set_n_threads(n_threads); 1.286 pst->set_n_tasks((int)n_tasks); 1.287 assert(pst->valid(), "Error"); 1.288 } 1.289 @@ -5483,7 +5512,7 @@ 1.290 // Parallel version of remark 1.291 void CMSCollector::do_remark_parallel() { 1.292 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1.293 - WorkGang* workers = gch->workers(); 1.294 + FlexibleWorkGang* workers = gch->workers(); 1.295 assert(workers != NULL, "Need parallel worker threads."); 1.296 int n_workers = workers->total_workers(); 1.297 CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); 1.298 @@ -5636,13 +5665,11 @@ 1.299 //////////////////////////////////////////////////////// 1.300 // Parallel Reference Processing Task Proxy Class 1.301 //////////////////////////////////////////////////////// 1.302 -class CMSRefProcTaskProxy: public AbstractGangTask { 1.303 +class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues { 1.304 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; 1.305 CMSCollector* _collector; 1.306 CMSBitMap* _mark_bit_map; 1.307 const MemRegion _span; 1.308 - OopTaskQueueSet* _task_queues; 1.309 - ParallelTaskTerminator _term; 1.310 ProcessTask& _task; 1.311 1.312 public: 1.313 @@ -5650,24 +5677,21 @@ 1.314 CMSCollector* collector, 1.315 const MemRegion& span, 1.316 CMSBitMap* mark_bit_map, 1.317 - int total_workers, 1.318 + AbstractWorkGang* workers, 1.319 OopTaskQueueSet* task_queues): 1.320 - AbstractGangTask("Process referents by policy in parallel"), 1.321 + AbstractGangTaskWOopQueues("Process referents by policy in parallel", 1.322 + task_queues), 1.323 _task(task), 1.324 - _collector(collector), _span(span), _mark_bit_map(mark_bit_map), 1.325 - _task_queues(task_queues), 1.326 - _term(total_workers, task_queues) 1.327 + _collector(collector), _span(span), _mark_bit_map(mark_bit_map) 1.328 { 1.329 assert(_collector->_span.equals(_span) && !_span.is_empty(), 1.330 "Inconsistency in _span"); 1.331 } 1.332 1.333 - OopTaskQueueSet* task_queues() { return _task_queues; } 1.334 + OopTaskQueueSet* task_queues() { return queues(); } 1.335 1.336 OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } 1.337 1.338 - ParallelTaskTerminator* terminator() { return &_term; } 1.339 - 1.340 void do_work_steal(int i, 1.341 CMSParDrainMarkingStackClosure* drain, 1.342 CMSParKeepAliveClosure* keep_alive, 1.343 @@ -5739,8 +5763,13 @@ 1.344 size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, 1.345 (size_t)ParGCDesiredObjsFromOverflowList); 1.346 // Now check if there's any work in the overflow list 1.347 + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, 1.348 + // only affects the number of attempts made to get work from the 1.349 + // overflow list and does not affect the number of workers. Just 1.350 + // pass ParallelGCThreads so this behavior is unchanged. 1.351 if (_collector->par_take_from_overflow_list(num_from_overflow_list, 1.352 - work_q)) { 1.353 + work_q, 1.354 + ParallelGCThreads)) { 1.355 // Found something in global overflow list; 1.356 // not yet ready to go stealing work from others. 1.357 // We'd like to assert(work_q->size() != 0, ...) 1.358 @@ -5773,13 +5802,12 @@ 1.359 void CMSRefProcTaskExecutor::execute(ProcessTask& task) 1.360 { 1.361 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1.362 - WorkGang* workers = gch->workers(); 1.363 + FlexibleWorkGang* workers = gch->workers(); 1.364 assert(workers != NULL, "Need parallel worker threads."); 1.365 - int n_workers = workers->total_workers(); 1.366 CMSRefProcTaskProxy rp_task(task, &_collector, 1.367 _collector.ref_processor()->span(), 1.368 _collector.markBitMap(), 1.369 - n_workers, _collector.task_queues()); 1.370 + workers, _collector.task_queues()); 1.371 workers->run_task(&rp_task); 1.372 } 1.373 1.374 @@ -5787,7 +5815,7 @@ 1.375 { 1.376 1.377 GenCollectedHeap* gch = GenCollectedHeap::heap(); 1.378 - WorkGang* workers = gch->workers(); 1.379 + FlexibleWorkGang* workers = gch->workers(); 1.380 assert(workers != NULL, "Need parallel worker threads."); 1.381 CMSRefEnqueueTaskProxy enq_task(task); 1.382 workers->run_task(&enq_task); 1.383 @@ -5814,6 +5842,14 @@ 1.384 { 1.385 TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); 1.386 if (rp->processing_is_mt()) { 1.387 + // Set the degree of MT here. If the discovery is done MT, there 1.388 + // may have been a different number of threads doing the discovery 1.389 + // and a different number of discovered lists may have Ref objects. 1.390 + // That is OK as long as the Reference lists are balanced (see 1.391 + // balance_all_queues() and balance_queues()). 1.392 + 1.393 + 1.394 + rp->set_mt_degree(ParallelGCThreads); 1.395 CMSRefProcTaskExecutor task_executor(*this); 1.396 rp->process_discovered_references(&_is_alive_closure, 1.397 &cmsKeepAliveClosure, 1.398 @@ -5874,6 +5910,7 @@ 1.399 1.400 rp->set_enqueuing_is_done(true); 1.401 if (rp->processing_is_mt()) { 1.402 + rp->balance_all_queues(); 1.403 CMSRefProcTaskExecutor task_executor(*this); 1.404 rp->enqueue_discovered_references(&task_executor); 1.405 } else { 1.406 @@ -8708,7 +8745,8 @@ 1.407 // similar changes might be needed. 1.408 // CR 6797058 has been filed to consolidate the common code. 1.409 bool CMSCollector::par_take_from_overflow_list(size_t num, 1.410 - OopTaskQueue* work_q) { 1.411 + OopTaskQueue* work_q, 1.412 + int no_of_gc_threads) { 1.413 assert(work_q->size() == 0, "First empty local work queue"); 1.414 assert(num < work_q->max_elems(), "Can't bite more than we can chew"); 1.415 if (_overflow_list == NULL) { 1.416 @@ -8717,7 +8755,9 @@ 1.417 // Grab the entire list; we'll put back a suffix 1.418 oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); 1.419 Thread* tid = Thread::current(); 1.420 - size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; 1.421 + // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was 1.422 + // set to ParallelGCThreads. 1.423 + size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads; 1.424 size_t sleep_time_millis = MAX2((size_t)1, num/100); 1.425 // If the list is busy, we spin for a short while, 1.426 // sleeping between attempts to get the list. 1.427 @@ -9256,4 +9296,3 @@ 1.428 true /* recordGCEndTime */, 1.429 true /* countCollection */ ); 1.430 } 1.431 -