diff -r 97fbf5beff7b -r 8b10f48633dc src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Thu Sep 16 13:45:55 2010 -0700 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Mon Sep 20 14:38:38 2010 -0700 @@ -195,7 +195,7 @@ "Offset of FreeChunk::_prev within FreeChunk must match" " that of OopDesc::_klass within OopDesc"); ) - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { typedef CMSParGCThreadState* CMSParGCThreadStatePtr; _par_gc_thread_states = NEW_C_HEAP_ARRAY(CMSParGCThreadStatePtr, ParallelGCThreads); @@ -616,7 +616,7 @@ } // Support for multi-threaded concurrent phases - if (ParallelGCThreads > 0 && CMSConcurrentMTEnabled) { + if (CollectedHeap::use_parallel_gc_threads() && CMSConcurrentMTEnabled) { if (FLAG_IS_DEFAULT(ConcGCThreads)) { // just for now FLAG_SET_DEFAULT(ConcGCThreads, (ParallelGCThreads + 3)/4); @@ -628,6 +628,8 @@ warning("GC/CMS: _conc_workers allocation failure: " "forcing -CMSConcurrentMTEnabled"); CMSConcurrentMTEnabled = false; + } else { + _conc_workers->initialize_workers(); } } else { CMSConcurrentMTEnabled = false; @@ -936,7 +938,7 @@ // along with all the other pointers into the heap but // compaction is expected to be a rare event with // a heap using cms so don't do it without seeing the need. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.reset(); } @@ -2630,7 +2632,8 @@ // Should call gc_prologue_work() for all cms gens we are responsible for bool registerClosure = _collectorState >= Marking && _collectorState < Sweeping; - ModUnionClosure* muc = ParallelGCThreads > 0 ? &_modUnionClosurePar + ModUnionClosure* muc = CollectedHeap::use_parallel_gc_threads() ? + &_modUnionClosurePar : &_modUnionClosure; _cmsGen->gc_prologue_work(full, registerClosure, muc); _permGen->gc_prologue_work(full, registerClosure, muc); @@ -2731,7 +2734,7 @@ collector()->gc_epilogue(full); // Also reset promotion tracking in par gc thread states. - if (ParallelGCThreads > 0) { + if (CollectedHeap::use_parallel_gc_threads()) { for (uint i = 0; i < ParallelGCThreads; i++) { _par_gc_thread_states[i]->promo.stopTrackingPromotions(i); } @@ -3731,7 +3734,6 @@ // MT Concurrent Marking Task class CMSConcMarkingTask: public YieldingFlexibleGangTask { CMSCollector* _collector; - YieldingFlexibleWorkGang* _workers; // the whole gang int _n_workers; // requested/desired # workers bool _asynch; bool _result; @@ -3751,21 +3753,19 @@ CMSConcMarkingTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - bool asynch, int n_workers, + bool asynch, YieldingFlexibleWorkGang* workers, OopTaskQueueSet* task_queues): YieldingFlexibleGangTask("Concurrent marking done multi-threaded"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), - _asynch(asynch), _n_workers(n_workers), _result(true), - _workers(workers), _task_queues(task_queues), - _term(n_workers, task_queues, _collector, asynch), + _asynch(asynch), _n_workers(0), _result(true), + _task_queues(task_queues), + _term(_n_workers, task_queues, _collector, asynch), _bit_map_lock(collector->bitMapLock()) { - assert(n_workers <= workers->total_workers(), - "Else termination won't work correctly today"); // XXX FIX ME! - _requested_size = n_workers; + _requested_size = _n_workers; _term.set_task(this); assert(_cms_space->bottom() < _perm_space->bottom(), "Finger incorrectly initialized below"); @@ -3781,6 +3781,10 @@ CMSConcMarkingTerminator* terminator() { return &_term; } + virtual void set_for_termination(int active_workers) { + terminator()->reset_for_reuse(active_workers); + } + void work(int i); virtual void coordinator_yield(); // stuff done by coordinator @@ -4220,9 +4224,12 @@ CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); CompactibleFreeListSpace* perm_space = _permGen->cmsSpace(); - CMSConcMarkingTask tsk(this, cms_space, perm_space, - asynch, num_workers /* number requested XXX */, - conc_workers(), task_queues()); + CMSConcMarkingTask tsk(this, + cms_space, + perm_space, + asynch, + conc_workers(), + task_queues()); // Since the actual number of workers we get may be different // from the number we requested above, do we need to do anything different @@ -4326,6 +4333,10 @@ verify_overflow_empty(); _abort_preclean = false; if (CMSPrecleaningEnabled) { + // Precleaning is currently not MT but the reference processor + // may be set for MT. Disable it temporarily here. + ReferenceProcessor* rp = ref_processor(); + ReferenceProcessorMTProcMutator z(rp, false); _eden_chunk_index = 0; size_t used = get_eden_used(); size_t capacity = get_eden_capacity(); @@ -4918,7 +4929,7 @@ // dirtied since the first checkpoint in this GC cycle and prior to // the most recent young generation GC, minus those cleaned up by the // concurrent precleaning. - if (CMSParallelRemarkEnabled && ParallelGCThreads > 0) { + if (CMSParallelRemarkEnabled && CollectedHeap::use_parallel_gc_threads()) { TraceTime t("Rescan (parallel) ", PrintGCDetails, false, gclog_or_tty); do_remark_parallel(); } else { @@ -5012,7 +5023,6 @@ // Parallel remark task class CMSParRemarkTask: public AbstractGangTask { CMSCollector* _collector; - WorkGang* _workers; int _n_workers; CompactibleFreeListSpace* _cms_space; CompactibleFreeListSpace* _perm_space; @@ -5025,21 +5035,21 @@ CMSParRemarkTask(CMSCollector* collector, CompactibleFreeListSpace* cms_space, CompactibleFreeListSpace* perm_space, - int n_workers, WorkGang* workers, + int n_workers, FlexibleWorkGang* workers, OopTaskQueueSet* task_queues): AbstractGangTask("Rescan roots and grey objects in parallel"), _collector(collector), _cms_space(cms_space), _perm_space(perm_space), _n_workers(n_workers), - _workers(workers), _task_queues(task_queues), - _term(workers->total_workers(), task_queues) { } + _term(n_workers, task_queues) { } OopTaskQueueSet* task_queues() { return _task_queues; } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } ParallelTaskTerminator* terminator() { return &_term; } + int n_workers() { return _n_workers; } void work(int i); @@ -5057,6 +5067,11 @@ void do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, int* seed); }; +// work_queue(i) is passed to the closure +// Par_MarkRefsIntoAndScanClosure. The "i" parameter +// also is passed to do_dirty_card_rescan_tasks() and to +// do_work_steal() to select the i-th task_queue. + void CMSParRemarkTask::work(int i) { elapsedTimer _timer; ResourceMark rm; @@ -5128,6 +5143,7 @@ // Do the rescan tasks for each of the two spaces // (cms_space and perm_space) in turn. + // "i" is passed to select the "i-th" task_queue do_dirty_card_rescan_tasks(_cms_space, i, &par_mrias_cl); do_dirty_card_rescan_tasks(_perm_space, i, &par_mrias_cl); _timer.stop(); @@ -5150,6 +5166,7 @@ } } +// Note that parameter "i" is not used. void CMSParRemarkTask::do_young_space_rescan(int i, Par_MarkRefsIntoAndScanClosure* cl, ContiguousSpace* space, @@ -5309,8 +5326,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5367,11 +5389,12 @@ // Merge the per-thread plab arrays into the global survivor chunk // array which will provide the partitioning of the survivor space // for CMS rescan. -void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv) { +void CMSCollector::merge_survivor_plab_arrays(ContiguousSpace* surv, + int no_of_gc_threads) { assert(_survivor_plab_array != NULL, "Error"); assert(_survivor_chunk_array != NULL, "Error"); assert(_collectorState == FinalMarking, "Error"); - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { _cursor[j] = 0; } HeapWord* top = surv->top(); @@ -5379,7 +5402,7 @@ for (i = 0; i < _survivor_chunk_capacity; i++) { // all sca entries HeapWord* min_val = top; // Higher than any PLAB address uint min_tid = 0; // position of min_val this round - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { ChunkArray* cur_sca = &_survivor_plab_array[j]; if (_cursor[j] == cur_sca->end()) { continue; @@ -5413,7 +5436,7 @@ // Verify that we used up all the recorded entries #ifdef ASSERT size_t total = 0; - for (uint j = 0; j < ParallelGCThreads; j++) { + for (int j = 0; j < no_of_gc_threads; j++) { assert(_cursor[j] == _survivor_plab_array[j].end(), "Ctl pt invariant"); total += _cursor[j]; } @@ -5448,13 +5471,15 @@ // Each valid entry in [0, _eden_chunk_index) represents a task. size_t n_tasks = _eden_chunk_index + 1; assert(n_tasks == 1 || _eden_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); } // Merge the survivor plab arrays into _survivor_chunk_array if (_survivor_plab_array != NULL) { - merge_survivor_plab_arrays(dng->from()); + merge_survivor_plab_arrays(dng->from(), n_threads); } else { assert(_survivor_chunk_index == 0, "Error"); } @@ -5463,7 +5488,9 @@ { SequentialSubTasksDone* pst = dng->to()->par_seq_tasks(); assert(!pst->valid(), "Clobbering existing data?"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks(1); assert(pst->valid(), "Error"); } @@ -5474,7 +5501,9 @@ assert(!pst->valid(), "Clobbering existing data?"); size_t n_tasks = _survivor_chunk_index + 1; assert(n_tasks == 1 || _survivor_chunk_array != NULL, "Error"); - pst->set_par_threads(n_threads); + // Sets the condition for completion of the subtask (how many threads + // need to finish in order to be done). + pst->set_n_threads(n_threads); pst->set_n_tasks((int)n_tasks); assert(pst->valid(), "Error"); } @@ -5483,7 +5512,7 @@ // Parallel version of remark void CMSCollector::do_remark_parallel() { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); int n_workers = workers->total_workers(); CompactibleFreeListSpace* cms_space = _cmsGen->cmsSpace(); @@ -5636,13 +5665,11 @@ //////////////////////////////////////////////////////// // Parallel Reference Processing Task Proxy Class //////////////////////////////////////////////////////// -class CMSRefProcTaskProxy: public AbstractGangTask { +class CMSRefProcTaskProxy: public AbstractGangTaskWOopQueues { typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask; CMSCollector* _collector; CMSBitMap* _mark_bit_map; const MemRegion _span; - OopTaskQueueSet* _task_queues; - ParallelTaskTerminator _term; ProcessTask& _task; public: @@ -5650,24 +5677,21 @@ CMSCollector* collector, const MemRegion& span, CMSBitMap* mark_bit_map, - int total_workers, + AbstractWorkGang* workers, OopTaskQueueSet* task_queues): - AbstractGangTask("Process referents by policy in parallel"), + AbstractGangTaskWOopQueues("Process referents by policy in parallel", + task_queues), _task(task), - _collector(collector), _span(span), _mark_bit_map(mark_bit_map), - _task_queues(task_queues), - _term(total_workers, task_queues) + _collector(collector), _span(span), _mark_bit_map(mark_bit_map) { assert(_collector->_span.equals(_span) && !_span.is_empty(), "Inconsistency in _span"); } - OopTaskQueueSet* task_queues() { return _task_queues; } + OopTaskQueueSet* task_queues() { return queues(); } OopTaskQueue* work_queue(int i) { return task_queues()->queue(i); } - ParallelTaskTerminator* terminator() { return &_term; } - void do_work_steal(int i, CMSParDrainMarkingStackClosure* drain, CMSParKeepAliveClosure* keep_alive, @@ -5739,8 +5763,13 @@ size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list + // Passing ParallelGCThreads as the third parameter, no_of_gc_threads, + // only affects the number of attempts made to get work from the + // overflow list and does not affect the number of workers. Just + // pass ParallelGCThreads so this behavior is unchanged. if (_collector->par_take_from_overflow_list(num_from_overflow_list, - work_q)) { + work_q, + ParallelGCThreads)) { // Found something in global overflow list; // not yet ready to go stealing work from others. // We'd like to assert(work_q->size() != 0, ...) @@ -5773,13 +5802,12 @@ void CMSRefProcTaskExecutor::execute(ProcessTask& task) { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); - int n_workers = workers->total_workers(); CMSRefProcTaskProxy rp_task(task, &_collector, _collector.ref_processor()->span(), _collector.markBitMap(), - n_workers, _collector.task_queues()); + workers, _collector.task_queues()); workers->run_task(&rp_task); } @@ -5787,7 +5815,7 @@ { GenCollectedHeap* gch = GenCollectedHeap::heap(); - WorkGang* workers = gch->workers(); + FlexibleWorkGang* workers = gch->workers(); assert(workers != NULL, "Need parallel worker threads."); CMSRefEnqueueTaskProxy enq_task(task); workers->run_task(&enq_task); @@ -5814,6 +5842,14 @@ { TraceTime t("weak refs processing", PrintGCDetails, false, gclog_or_tty); if (rp->processing_is_mt()) { + // Set the degree of MT here. If the discovery is done MT, there + // may have been a different number of threads doing the discovery + // and a different number of discovered lists may have Ref objects. + // That is OK as long as the Reference lists are balanced (see + // balance_all_queues() and balance_queues()). + + + rp->set_mt_degree(ParallelGCThreads); CMSRefProcTaskExecutor task_executor(*this); rp->process_discovered_references(&_is_alive_closure, &cmsKeepAliveClosure, @@ -5874,6 +5910,7 @@ rp->set_enqueuing_is_done(true); if (rp->processing_is_mt()) { + rp->balance_all_queues(); CMSRefProcTaskExecutor task_executor(*this); rp->enqueue_discovered_references(&task_executor); } else { @@ -8708,7 +8745,8 @@ // similar changes might be needed. // CR 6797058 has been filed to consolidate the common code. bool CMSCollector::par_take_from_overflow_list(size_t num, - OopTaskQueue* work_q) { + OopTaskQueue* work_q, + int no_of_gc_threads) { assert(work_q->size() == 0, "First empty local work queue"); assert(num < work_q->max_elems(), "Can't bite more than we can chew"); if (_overflow_list == NULL) { @@ -8717,7 +8755,9 @@ // Grab the entire list; we'll put back a suffix oop prefix = (oop)Atomic::xchg_ptr(BUSY, &_overflow_list); Thread* tid = Thread::current(); - size_t CMSOverflowSpinCount = (size_t)ParallelGCThreads; + // Before "no_of_gc_threads" was introduced CMSOverflowSpinCount was + // set to ParallelGCThreads. + size_t CMSOverflowSpinCount = (size_t) no_of_gc_threads; // was ParallelGCThreads; size_t sleep_time_millis = MAX2((size_t)1, num/100); // If the list is busy, we spin for a short while, // sleeping between attempts to get the list. @@ -9256,4 +9296,3 @@ true /* recordGCEndTime */, true /* countCollection */ ); } -