Wed, 10 Jun 2009 14:57:21 -0700
Merge
1.1 --- a/src/cpu/sparc/vm/assembler_sparc.cpp Fri Jun 05 10:25:39 2009 -0700 1.2 +++ b/src/cpu/sparc/vm/assembler_sparc.cpp Wed Jun 10 14:57:21 2009 -0700 1.3 @@ -4454,43 +4454,26 @@ 1.4 delayed()->nop(); 1.5 } 1.6 1.7 - // Now we decide how to generate the card table write. If we're 1.8 - // enqueueing, we call out to a generated function. Otherwise, we do it 1.9 - // inline here. 1.10 - 1.11 - if (G1RSBarrierUseQueue) { 1.12 - // If the "store_addr" register is an "in" or "local" register, move it to 1.13 - // a scratch reg so we can pass it as an argument. 1.14 - bool use_scr = !(store_addr->is_global() || store_addr->is_out()); 1.15 - // Pick a scratch register different from "tmp". 1.16 - Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); 1.17 - // Make sure we use up the delay slot! 1.18 - if (use_scr) { 1.19 - post_filter_masm->mov(store_addr, scr); 1.20 - } else { 1.21 - post_filter_masm->nop(); 1.22 - } 1.23 - generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); 1.24 - save_frame(0); 1.25 - call(dirty_card_log_enqueue); 1.26 - if (use_scr) { 1.27 - delayed()->mov(scr, O0); 1.28 - } else { 1.29 - delayed()->mov(store_addr->after_save(), O0); 1.30 - } 1.31 - restore(); 1.32 - 1.33 + // If the "store_addr" register is an "in" or "local" register, move it to 1.34 + // a scratch reg so we can pass it as an argument. 1.35 + bool use_scr = !(store_addr->is_global() || store_addr->is_out()); 1.36 + // Pick a scratch register different from "tmp". 1.37 + Register scr = (tmp == G1_scratch ? G3_scratch : G1_scratch); 1.38 + // Make sure we use up the delay slot! 1.39 + if (use_scr) { 1.40 + post_filter_masm->mov(store_addr, scr); 1.41 } else { 1.42 - 1.43 -#ifdef _LP64 1.44 - post_filter_masm->srlx(store_addr, CardTableModRefBS::card_shift, store_addr); 1.45 -#else 1.46 - post_filter_masm->srl(store_addr, CardTableModRefBS::card_shift, store_addr); 1.47 -#endif 1.48 - assert(tmp != store_addr, "need separate temp reg"); 1.49 - set(bs->byte_map_base, tmp); 1.50 - stb(G0, tmp, store_addr); 1.51 + post_filter_masm->nop(); 1.52 } 1.53 + generate_dirty_card_log_enqueue_if_necessary(bs->byte_map_base); 1.54 + save_frame(0); 1.55 + call(dirty_card_log_enqueue); 1.56 + if (use_scr) { 1.57 + delayed()->mov(scr, O0); 1.58 + } else { 1.59 + delayed()->mov(store_addr->after_save(), O0); 1.60 + } 1.61 + restore(); 1.62 1.63 bind(filtered); 1.64
2.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Fri Jun 05 10:25:39 2009 -0700 2.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp Wed Jun 10 14:57:21 2009 -0700 2.3 @@ -555,6 +555,7 @@ 2.4 _collector_policy(cp), 2.5 _should_unload_classes(false), 2.6 _concurrent_cycles_since_last_unload(0), 2.7 + _roots_scanning_options(0), 2.8 _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding) 2.9 { 2.10 if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
3.1 --- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Fri Jun 05 10:25:39 2009 -0700 3.2 +++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp Wed Jun 10 14:57:21 2009 -0700 3.3 @@ -545,6 +545,11 @@ 3.4 bool unloaded_classes_last_cycle() const { 3.5 return concurrent_cycles_since_last_unload() == 0; 3.6 } 3.7 + // Root scanning options for perm gen 3.8 + int _roots_scanning_options; 3.9 + int roots_scanning_options() const { return _roots_scanning_options; } 3.10 + void add_root_scanning_option(int o) { _roots_scanning_options |= o; } 3.11 + void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o; } 3.12 3.13 // Verification support 3.14 CMSBitMap _verification_mark_bm; 3.15 @@ -719,11 +724,6 @@ 3.16 NOT_PRODUCT(bool simulate_overflow();) // sequential 3.17 NOT_PRODUCT(bool par_simulate_overflow();) // MT version 3.18 3.19 - int _roots_scanning_options; 3.20 - int roots_scanning_options() const { return _roots_scanning_options; } 3.21 - void add_root_scanning_option(int o) { _roots_scanning_options |= o; } 3.22 - void remove_root_scanning_option(int o) { _roots_scanning_options &= ~o; } 3.23 - 3.24 // CMS work methods 3.25 void checkpointRootsInitialWork(bool asynch); // initial checkpoint work 3.26
4.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Fri Jun 05 10:25:39 2009 -0700 4.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Wed Jun 10 14:57:21 2009 -0700 4.3 @@ -25,26 +25,37 @@ 4.4 #include "incls/_precompiled.incl" 4.5 #include "incls/_concurrentG1Refine.cpp.incl" 4.6 4.7 -bool ConcurrentG1Refine::_enabled = false; 4.8 - 4.9 ConcurrentG1Refine::ConcurrentG1Refine() : 4.10 - _pya(PYA_continue), _last_pya(PYA_continue), 4.11 - _last_cards_during(), _first_traversal(false), 4.12 _card_counts(NULL), _cur_card_count_histo(NULL), _cum_card_count_histo(NULL), 4.13 _hot_cache(NULL), 4.14 _def_use_cache(false), _use_cache(false), 4.15 - _n_periods(0), _total_cards(0), _total_travs(0) 4.16 + _n_periods(0), _total_cards(0), _total_travs(0), 4.17 + _threads(NULL), _n_threads(0) 4.18 { 4.19 if (G1ConcRefine) { 4.20 - _cg1rThread = new ConcurrentG1RefineThread(this); 4.21 - assert(cg1rThread() != NULL, "Conc refine should have been created"); 4.22 - assert(cg1rThread()->cg1r() == this, 4.23 - "Conc refine thread should refer to this"); 4.24 - } else { 4.25 - _cg1rThread = NULL; 4.26 + _n_threads = (int)thread_num(); 4.27 + if (_n_threads > 0) { 4.28 + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 4.29 + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 4.30 + ConcurrentG1RefineThread *next = NULL; 4.31 + for (int i = _n_threads - 1; i >= 0; i--) { 4.32 + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 4.33 + assert(t != NULL, "Conc refine should have been created"); 4.34 + assert(t->cg1r() == this, "Conc refine thread should refer to this"); 4.35 + _threads[i] = t; 4.36 + next = t; 4.37 + } 4.38 + } 4.39 } 4.40 } 4.41 4.42 +size_t ConcurrentG1Refine::thread_num() { 4.43 + if (G1ConcRefine) { 4.44 + return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; 4.45 + } 4.46 + return 0; 4.47 +} 4.48 + 4.49 void ConcurrentG1Refine::init() { 4.50 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { 4.51 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4.52 @@ -75,6 +86,14 @@ 4.53 } 4.54 } 4.55 4.56 +void ConcurrentG1Refine::stop() { 4.57 + if (_threads != NULL) { 4.58 + for (int i = 0; i < _n_threads; i++) { 4.59 + _threads[i]->stop(); 4.60 + } 4.61 + } 4.62 +} 4.63 + 4.64 ConcurrentG1Refine::~ConcurrentG1Refine() { 4.65 if (G1ConcRSLogCacheSize > 0 || G1ConcRSCountTraversals) { 4.66 assert(_card_counts != NULL, "Logic"); 4.67 @@ -88,104 +107,22 @@ 4.68 assert(_hot_cache != NULL, "Logic"); 4.69 FREE_C_HEAP_ARRAY(jbyte*, _hot_cache); 4.70 } 4.71 -} 4.72 - 4.73 -bool ConcurrentG1Refine::refine() { 4.74 - G1CollectedHeap* g1h = G1CollectedHeap::heap(); 4.75 - unsigned cards_before = g1h->g1_rem_set()->conc_refine_cards(); 4.76 - clear_hot_cache(); // Any previous values in this are now invalid. 4.77 - g1h->g1_rem_set()->concurrentRefinementPass(this); 4.78 - _traversals++; 4.79 - unsigned cards_after = g1h->g1_rem_set()->conc_refine_cards(); 4.80 - unsigned cards_during = cards_after-cards_before; 4.81 - // If this is the first traversal in the current enabling 4.82 - // and we did some cards, or if the number of cards found is decreasing 4.83 - // sufficiently quickly, then keep going. Otherwise, sleep a while. 4.84 - bool res = 4.85 - (_first_traversal && cards_during > 0) 4.86 - || 4.87 - (!_first_traversal && cards_during * 3 < _last_cards_during * 2); 4.88 - _last_cards_during = cards_during; 4.89 - _first_traversal = false; 4.90 - return res; 4.91 -} 4.92 - 4.93 -void ConcurrentG1Refine::enable() { 4.94 - MutexLocker x(G1ConcRefine_mon); 4.95 - if (!_enabled) { 4.96 - _enabled = true; 4.97 - _first_traversal = true; _last_cards_during = 0; 4.98 - G1ConcRefine_mon->notify_all(); 4.99 + if (_threads != NULL) { 4.100 + for (int i = 0; i < _n_threads; i++) { 4.101 + delete _threads[i]; 4.102 + } 4.103 + FREE_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _threads); 4.104 } 4.105 } 4.106 4.107 -unsigned ConcurrentG1Refine::disable() { 4.108 - MutexLocker x(G1ConcRefine_mon); 4.109 - if (_enabled) { 4.110 - _enabled = false; 4.111 - return _traversals; 4.112 - } else { 4.113 - return 0; 4.114 +void ConcurrentG1Refine::threads_do(ThreadClosure *tc) { 4.115 + if (_threads != NULL) { 4.116 + for (int i = 0; i < _n_threads; i++) { 4.117 + tc->do_thread(_threads[i]); 4.118 + } 4.119 } 4.120 } 4.121 4.122 -void ConcurrentG1Refine::wait_for_ConcurrentG1Refine_enabled() { 4.123 - G1ConcRefine_mon->lock(); 4.124 - while (!_enabled) { 4.125 - G1ConcRefine_mon->wait(Mutex::_no_safepoint_check_flag); 4.126 - } 4.127 - G1ConcRefine_mon->unlock(); 4.128 - _traversals = 0; 4.129 -}; 4.130 - 4.131 -void ConcurrentG1Refine::set_pya_restart() { 4.132 - // If we're using the log-based RS barrier, the above will cause 4.133 - // in-progress traversals of completed log buffers to quit early; we will 4.134 - // also abandon all other buffers. 4.135 - if (G1RSBarrierUseQueue) { 4.136 - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 4.137 - dcqs.abandon_logs(); 4.138 - // Reset the post-yield actions. 4.139 - _pya = PYA_continue; 4.140 - _last_pya = PYA_continue; 4.141 - } else { 4.142 - _pya = PYA_restart; 4.143 - } 4.144 -} 4.145 - 4.146 -void ConcurrentG1Refine::set_pya_cancel() { 4.147 - _pya = PYA_cancel; 4.148 -} 4.149 - 4.150 -PostYieldAction ConcurrentG1Refine::get_pya() { 4.151 - if (_pya != PYA_continue) { 4.152 - jint val = _pya; 4.153 - while (true) { 4.154 - jint val_read = Atomic::cmpxchg(PYA_continue, &_pya, val); 4.155 - if (val_read == val) { 4.156 - PostYieldAction res = (PostYieldAction)val; 4.157 - assert(res != PYA_continue, "Only the refine thread should reset."); 4.158 - _last_pya = res; 4.159 - return res; 4.160 - } else { 4.161 - val = val_read; 4.162 - } 4.163 - } 4.164 - } 4.165 - // QQQ WELL WHAT DO WE RETURN HERE??? 4.166 - // make up something! 4.167 - return PYA_continue; 4.168 -} 4.169 - 4.170 -PostYieldAction ConcurrentG1Refine::get_last_pya() { 4.171 - PostYieldAction res = _last_pya; 4.172 - _last_pya = PYA_continue; 4.173 - return res; 4.174 -} 4.175 - 4.176 -bool ConcurrentG1Refine::do_traversal() { 4.177 - return _cg1rThread->do_traversal(); 4.178 -} 4.179 4.180 int ConcurrentG1Refine::add_card_count(jbyte* card_ptr) { 4.181 size_t card_num = (card_ptr - _ct_bot);
5.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Fri Jun 05 10:25:39 2009 -0700 5.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Wed Jun 10 14:57:21 2009 -0700 5.3 @@ -26,26 +26,9 @@ 5.4 class ConcurrentG1RefineThread; 5.5 class G1RemSet; 5.6 5.7 -// What to do after a yield: 5.8 -enum PostYieldAction { 5.9 - PYA_continue, // Continue the traversal 5.10 - PYA_restart, // Restart 5.11 - PYA_cancel // It's been completed by somebody else: cancel. 5.12 -}; 5.13 - 5.14 class ConcurrentG1Refine: public CHeapObj { 5.15 - ConcurrentG1RefineThread* _cg1rThread; 5.16 - 5.17 - volatile jint _pya; 5.18 - PostYieldAction _last_pya; 5.19 - 5.20 - static bool _enabled; // Protected by G1ConcRefine_mon. 5.21 - unsigned _traversals; 5.22 - 5.23 - // Number of cards processed during last refinement traversal. 5.24 - unsigned _first_traversal; 5.25 - unsigned _last_cards_during; 5.26 - 5.27 + ConcurrentG1RefineThread** _threads; 5.28 + int _n_threads; 5.29 // The cache for card refinement. 5.30 bool _use_cache; 5.31 bool _def_use_cache; 5.32 @@ -74,37 +57,10 @@ 5.33 ~ConcurrentG1Refine(); 5.34 5.35 void init(); // Accomplish some initialization that has to wait. 5.36 + void stop(); 5.37 5.38 - // Enabled Conc refinement, waking up thread if necessary. 5.39 - void enable(); 5.40 - 5.41 - // Returns the number of traversals performed since this refiner was enabled. 5.42 - unsigned disable(); 5.43 - 5.44 - // Requires G1ConcRefine_mon to be held. 5.45 - bool enabled() { return _enabled; } 5.46 - 5.47 - // Returns only when G1 concurrent refinement has been enabled. 5.48 - void wait_for_ConcurrentG1Refine_enabled(); 5.49 - 5.50 - // Do one concurrent refinement pass over the card table. Returns "true" 5.51 - // if heuristics determine that another pass should be done immediately. 5.52 - bool refine(); 5.53 - 5.54 - // Indicate that an in-progress refinement pass should start over. 5.55 - void set_pya_restart(); 5.56 - // Indicate that an in-progress refinement pass should quit. 5.57 - void set_pya_cancel(); 5.58 - 5.59 - // Get the appropriate post-yield action. Also sets last_pya. 5.60 - PostYieldAction get_pya(); 5.61 - 5.62 - // The last PYA read by "get_pya". 5.63 - PostYieldAction get_last_pya(); 5.64 - 5.65 - bool do_traversal(); 5.66 - 5.67 - ConcurrentG1RefineThread* cg1rThread() { return _cg1rThread; } 5.68 + // Iterate over the conc refine threads 5.69 + void threads_do(ThreadClosure *tc); 5.70 5.71 // If this is the first entry for the slot, writes into the cache and 5.72 // returns NULL. If it causes an eviction, returns the evicted pointer. 5.73 @@ -129,4 +85,6 @@ 5.74 5.75 void clear_and_record_card_counts(); 5.76 void print_final_card_counts(); 5.77 + 5.78 + static size_t thread_num(); 5.79 };
6.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Fri Jun 05 10:25:39 2009 -0700 6.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Wed Jun 10 14:57:21 2009 -0700 6.3 @@ -30,12 +30,14 @@ 6.4 // The CM thread is created when the G1 garbage collector is used 6.5 6.6 ConcurrentG1RefineThread:: 6.7 -ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r) : 6.8 +ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, 6.9 + int worker_id_offset, int worker_id) : 6.10 ConcurrentGCThread(), 6.11 + _worker_id_offset(worker_id_offset), 6.12 + _worker_id(worker_id), 6.13 + _active(false), 6.14 + _next(next), 6.15 _cg1r(cg1r), 6.16 - _started(false), 6.17 - _in_progress(false), 6.18 - _do_traversal(false), 6.19 _vtime_accum(0.0), 6.20 _co_tracker(G1CRGroup), 6.21 _interval_ms(5.0) 6.22 @@ -43,112 +45,6 @@ 6.23 create_and_start(); 6.24 } 6.25 6.26 -const long timeout = 200; // ms. 6.27 - 6.28 -void ConcurrentG1RefineThread::traversalBasedRefinement() { 6.29 - _cg1r->wait_for_ConcurrentG1Refine_enabled(); 6.30 - MutexLocker x(G1ConcRefine_mon); 6.31 - while (_cg1r->enabled()) { 6.32 - MutexUnlocker ux(G1ConcRefine_mon); 6.33 - ResourceMark rm; 6.34 - HandleMark hm; 6.35 - 6.36 - if (G1TraceConcurrentRefinement) { 6.37 - gclog_or_tty->print_cr("G1-Refine starting pass"); 6.38 - } 6.39 - _sts.join(); 6.40 - bool no_sleep = _cg1r->refine(); 6.41 - _sts.leave(); 6.42 - if (!no_sleep) { 6.43 - MutexLockerEx x(CGC_lock, Mutex::_no_safepoint_check_flag); 6.44 - // We do this only for the timeout; we don't expect this to be signalled. 6.45 - CGC_lock->wait(Mutex::_no_safepoint_check_flag, timeout); 6.46 - } 6.47 - } 6.48 -} 6.49 - 6.50 -void ConcurrentG1RefineThread::queueBasedRefinement() { 6.51 - DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 6.52 - // Wait for completed log buffers to exist. 6.53 - { 6.54 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 6.55 - while (!_do_traversal && !dcqs.process_completed_buffers() && 6.56 - !_should_terminate) { 6.57 - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); 6.58 - } 6.59 - } 6.60 - 6.61 - if (_should_terminate) { 6.62 - return; 6.63 - } 6.64 - 6.65 - // Now we take them off (this doesn't hold locks while it applies 6.66 - // closures.) (If we did a full collection, then we'll do a full 6.67 - // traversal. 6.68 - _sts.join(); 6.69 - if (_do_traversal) { 6.70 - (void)_cg1r->refine(); 6.71 - switch (_cg1r->get_last_pya()) { 6.72 - case PYA_cancel: case PYA_continue: 6.73 - // Continue was caught and handled inside "refine". If it's still 6.74 - // "continue" when we get here, we're done. 6.75 - _do_traversal = false; 6.76 - break; 6.77 - case PYA_restart: 6.78 - assert(_do_traversal, "Because of Full GC."); 6.79 - break; 6.80 - } 6.81 - } else { 6.82 - int n_logs = 0; 6.83 - int lower_limit = 0; 6.84 - double start_vtime_sec; // only used when G1SmoothConcRefine is on 6.85 - int prev_buffer_num; // only used when G1SmoothConcRefine is on 6.86 - 6.87 - if (G1SmoothConcRefine) { 6.88 - lower_limit = 0; 6.89 - start_vtime_sec = os::elapsedVTime(); 6.90 - prev_buffer_num = (int) dcqs.completed_buffers_num(); 6.91 - } else { 6.92 - lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. 6.93 - } 6.94 - while (dcqs.apply_closure_to_completed_buffer(0, lower_limit)) { 6.95 - double end_vtime_sec; 6.96 - double elapsed_vtime_sec; 6.97 - int elapsed_vtime_ms; 6.98 - int curr_buffer_num; 6.99 - 6.100 - if (G1SmoothConcRefine) { 6.101 - end_vtime_sec = os::elapsedVTime(); 6.102 - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 6.103 - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); 6.104 - curr_buffer_num = (int) dcqs.completed_buffers_num(); 6.105 - 6.106 - if (curr_buffer_num > prev_buffer_num || 6.107 - curr_buffer_num > DCQBarrierProcessCompletedThreshold) { 6.108 - decreaseInterval(elapsed_vtime_ms); 6.109 - } else if (curr_buffer_num < prev_buffer_num) { 6.110 - increaseInterval(elapsed_vtime_ms); 6.111 - } 6.112 - } 6.113 - 6.114 - sample_young_list_rs_lengths(); 6.115 - _co_tracker.update(false); 6.116 - 6.117 - if (G1SmoothConcRefine) { 6.118 - prev_buffer_num = curr_buffer_num; 6.119 - _sts.leave(); 6.120 - os::sleep(Thread::current(), (jlong) _interval_ms, false); 6.121 - _sts.join(); 6.122 - start_vtime_sec = os::elapsedVTime(); 6.123 - } 6.124 - n_logs++; 6.125 - } 6.126 - // Make sure we harvest the PYA, if any. 6.127 - (void)_cg1r->get_pya(); 6.128 - } 6.129 - _sts.leave(); 6.130 -} 6.131 - 6.132 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { 6.133 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 6.134 G1CollectorPolicy* g1p = g1h->g1_policy(); 6.135 @@ -184,15 +80,97 @@ 6.136 _co_tracker.start(); 6.137 6.138 while (!_should_terminate) { 6.139 - // wait until started is set. 6.140 - if (G1RSBarrierUseQueue) { 6.141 - queueBasedRefinement(); 6.142 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 6.143 + // Wait for completed log buffers to exist. 6.144 + { 6.145 + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 6.146 + while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || 6.147 + (_worker_id > 0 && !is_active())) && 6.148 + !_should_terminate) { 6.149 + DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); 6.150 + } 6.151 + } 6.152 + 6.153 + if (_should_terminate) { 6.154 + return; 6.155 + } 6.156 + 6.157 + // Now we take them off (this doesn't hold locks while it applies 6.158 + // closures.) (If we did a full collection, then we'll do a full 6.159 + // traversal. 6.160 + _sts.join(); 6.161 + int n_logs = 0; 6.162 + int lower_limit = 0; 6.163 + double start_vtime_sec; // only used when G1SmoothConcRefine is on 6.164 + int prev_buffer_num; // only used when G1SmoothConcRefine is on 6.165 + // This thread activation threshold 6.166 + int threshold = DCQBarrierProcessCompletedThreshold * _worker_id; 6.167 + // Next thread activation threshold 6.168 + int next_threshold = threshold + DCQBarrierProcessCompletedThreshold; 6.169 + int deactivation_threshold = MAX2<int>(threshold - DCQBarrierProcessCompletedThreshold / 2, 0); 6.170 + 6.171 + if (G1SmoothConcRefine) { 6.172 + lower_limit = 0; 6.173 + start_vtime_sec = os::elapsedVTime(); 6.174 + prev_buffer_num = (int) dcqs.completed_buffers_num(); 6.175 } else { 6.176 - traversalBasedRefinement(); 6.177 + lower_limit = DCQBarrierProcessCompletedThreshold / 4; // For now. 6.178 } 6.179 - _sts.join(); 6.180 - _co_tracker.update(); 6.181 + while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { 6.182 + double end_vtime_sec; 6.183 + double elapsed_vtime_sec; 6.184 + int elapsed_vtime_ms; 6.185 + int curr_buffer_num = (int) dcqs.completed_buffers_num(); 6.186 + 6.187 + if (G1SmoothConcRefine) { 6.188 + end_vtime_sec = os::elapsedVTime(); 6.189 + elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 6.190 + elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); 6.191 + 6.192 + if (curr_buffer_num > prev_buffer_num || 6.193 + curr_buffer_num > next_threshold) { 6.194 + decreaseInterval(elapsed_vtime_ms); 6.195 + } else if (curr_buffer_num < prev_buffer_num) { 6.196 + increaseInterval(elapsed_vtime_ms); 6.197 + } 6.198 + } 6.199 + if (_worker_id == 0) { 6.200 + sample_young_list_rs_lengths(); 6.201 + } else if (curr_buffer_num < deactivation_threshold) { 6.202 + // If the number of the buffer has fallen below our threshold 6.203 + // we should deactivate. The predecessor will reactivate this 6.204 + // thread should the number of the buffers cross the threshold again. 6.205 + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 6.206 + deactivate(); 6.207 + if (G1TraceConcurrentRefinement) { 6.208 + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); 6.209 + } 6.210 + break; 6.211 + } 6.212 + _co_tracker.update(false); 6.213 + 6.214 + // Check if we need to activate the next thread. 6.215 + if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { 6.216 + MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 6.217 + _next->activate(); 6.218 + DirtyCardQ_CBL_mon->notify_all(); 6.219 + if (G1TraceConcurrentRefinement) { 6.220 + gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); 6.221 + } 6.222 + } 6.223 + 6.224 + if (G1SmoothConcRefine) { 6.225 + prev_buffer_num = curr_buffer_num; 6.226 + _sts.leave(); 6.227 + os::sleep(Thread::current(), (jlong) _interval_ms, false); 6.228 + _sts.join(); 6.229 + start_vtime_sec = os::elapsedVTime(); 6.230 + } 6.231 + n_logs++; 6.232 + } 6.233 + _co_tracker.update(false); 6.234 _sts.leave(); 6.235 + 6.236 if (os::supports_vtime()) { 6.237 _vtime_accum = (os::elapsedVTime() - _vtime_start); 6.238 } else { 6.239 @@ -240,7 +218,3 @@ 6.240 Thread::print(); 6.241 gclog_or_tty->cr(); 6.242 } 6.243 - 6.244 -void ConcurrentG1RefineThread::set_do_traversal(bool b) { 6.245 - _do_traversal = b; 6.246 -}
7.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Fri Jun 05 10:25:39 2009 -0700 7.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Wed Jun 10 14:57:21 2009 -0700 7.3 @@ -33,21 +33,27 @@ 7.4 7.5 double _vtime_start; // Initial virtual time. 7.6 double _vtime_accum; // Initial virtual time. 7.7 + int _worker_id; 7.8 + int _worker_id_offset; 7.9 7.10 + // The refinement threads collection is linked list. A predecessor can activate a successor 7.11 + // when the number of the rset update buffer crosses a certain threshold. A successor 7.12 + // would self-deactivate when the number of the buffers falls below the threshold. 7.13 + bool _active; 7.14 + ConcurrentG1RefineThread * _next; 7.15 public: 7.16 virtual void run(); 7.17 7.18 + bool is_active() { return _active; } 7.19 + void activate() { _active = true; } 7.20 + void deactivate() { _active = false; } 7.21 + 7.22 private: 7.23 ConcurrentG1Refine* _cg1r; 7.24 - bool _started; 7.25 - bool _in_progress; 7.26 - volatile bool _restart; 7.27 7.28 COTracker _co_tracker; 7.29 double _interval_ms; 7.30 7.31 - bool _do_traversal; 7.32 - 7.33 void decreaseInterval(int processing_time_ms) { 7.34 double min_interval_ms = (double) processing_time_ms; 7.35 _interval_ms = 0.8 * _interval_ms; 7.36 @@ -63,16 +69,13 @@ 7.37 7.38 void sleepBeforeNextCycle(); 7.39 7.40 - void traversalBasedRefinement(); 7.41 - 7.42 - void queueBasedRefinement(); 7.43 - 7.44 // For use by G1CollectedHeap, which is a friend. 7.45 static SuspendibleThreadSet* sts() { return &_sts; } 7.46 7.47 public: 7.48 // Constructor 7.49 - ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r); 7.50 + ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, 7.51 + int worker_id_offset, int worker_id); 7.52 7.53 // Printing 7.54 void print(); 7.55 @@ -82,23 +85,11 @@ 7.56 7.57 ConcurrentG1Refine* cg1r() { return _cg1r; } 7.58 7.59 - 7.60 - void set_started() { _started = true; } 7.61 - void clear_started() { _started = false; } 7.62 - bool started() { return _started; } 7.63 - 7.64 - void set_in_progress() { _in_progress = true; } 7.65 - void clear_in_progress() { _in_progress = false; } 7.66 - bool in_progress() { return _in_progress; } 7.67 - 7.68 - void set_do_traversal(bool b); 7.69 - bool do_traversal() { return _do_traversal; } 7.70 - 7.71 void sample_young_list_rs_lengths(); 7.72 7.73 // Yield for GC 7.74 void yield(); 7.75 7.76 // shutdown 7.77 - static void stop(); 7.78 + void stop(); 7.79 };
8.1 --- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Fri Jun 05 10:25:39 2009 -0700 8.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.hpp Wed Jun 10 14:57:21 2009 -0700 8.3 @@ -80,5 +80,5 @@ 8.4 void yield(); 8.5 8.6 // shutdown 8.7 - static void stop(); 8.8 + void stop(); 8.9 };
9.1 --- a/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp Fri Jun 05 10:25:39 2009 -0700 9.2 +++ b/src/share/vm/gc_implementation/g1/concurrentZFThread.hpp Wed Jun 10 14:57:21 2009 -0700 9.3 @@ -73,7 +73,7 @@ 9.4 // while holding the ZF_needed_mon lock. 9.5 9.6 // shutdown 9.7 - static void stop(); 9.8 + void stop(); 9.9 9.10 // Stats 9.11 static void note_region_alloc() {_region_allocs++; }
10.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Fri Jun 05 10:25:39 2009 -0700 10.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Wed Jun 10 14:57:21 2009 -0700 10.3 @@ -71,11 +71,11 @@ 10.4 _all_active = true; 10.5 } 10.6 10.7 +// Determines how many mutator threads can process the buffers in parallel. 10.8 size_t DirtyCardQueueSet::num_par_ids() { 10.9 - return MAX2(ParallelGCThreads, (size_t)2); 10.10 + return os::processor_count(); 10.11 } 10.12 10.13 - 10.14 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, 10.15 int max_completed_queue, 10.16 Mutex* lock, PtrQueueSet* fl_owner) { 10.17 @@ -85,8 +85,6 @@ 10.18 10.19 _shared_dirty_card_queue.set_lock(lock); 10.20 _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); 10.21 - bool b = _free_ids->claim_perm_id(0); 10.22 - guarantee(b, "Must reserve id zero for concurrent refinement thread."); 10.23 } 10.24 10.25 void DirtyCardQueueSet::handle_zero_index_for_thread(JavaThread* t) { 10.26 @@ -234,7 +232,7 @@ 10.27 nd = get_completed_buffer_lock(stop_at); 10.28 } 10.29 bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); 10.30 - if (res) _processed_buffers_rs_thread++; 10.31 + if (res) Atomic::inc(&_processed_buffers_rs_thread); 10.32 return res; 10.33 } 10.34
11.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Fri Jun 05 10:25:39 2009 -0700 11.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Wed Jun 10 14:57:21 2009 -0700 11.3 @@ -446,8 +446,61 @@ 11.4 gclog_or_tty->print_cr(""); 11.5 } 11.6 11.7 +void G1CollectedHeap::push_dirty_cards_region(HeapRegion* hr) 11.8 +{ 11.9 + // Claim the right to put the region on the dirty cards region list 11.10 + // by installing a self pointer. 11.11 + HeapRegion* next = hr->get_next_dirty_cards_region(); 11.12 + if (next == NULL) { 11.13 + HeapRegion* res = (HeapRegion*) 11.14 + Atomic::cmpxchg_ptr(hr, hr->next_dirty_cards_region_addr(), 11.15 + NULL); 11.16 + if (res == NULL) { 11.17 + HeapRegion* head; 11.18 + do { 11.19 + // Put the region to the dirty cards region list. 11.20 + head = _dirty_cards_region_list; 11.21 + next = (HeapRegion*) 11.22 + Atomic::cmpxchg_ptr(hr, &_dirty_cards_region_list, head); 11.23 + if (next == head) { 11.24 + assert(hr->get_next_dirty_cards_region() == hr, 11.25 + "hr->get_next_dirty_cards_region() != hr"); 11.26 + if (next == NULL) { 11.27 + // The last region in the list points to itself. 11.28 + hr->set_next_dirty_cards_region(hr); 11.29 + } else { 11.30 + hr->set_next_dirty_cards_region(next); 11.31 + } 11.32 + } 11.33 + } while (next != head); 11.34 + } 11.35 + } 11.36 +} 11.37 + 11.38 +HeapRegion* G1CollectedHeap::pop_dirty_cards_region() 11.39 +{ 11.40 + HeapRegion* head; 11.41 + HeapRegion* hr; 11.42 + do { 11.43 + head = _dirty_cards_region_list; 11.44 + if (head == NULL) { 11.45 + return NULL; 11.46 + } 11.47 + HeapRegion* new_head = head->get_next_dirty_cards_region(); 11.48 + if (head == new_head) { 11.49 + // The last region. 11.50 + new_head = NULL; 11.51 + } 11.52 + hr = (HeapRegion*)Atomic::cmpxchg_ptr(new_head, &_dirty_cards_region_list, 11.53 + head); 11.54 + } while (hr != head); 11.55 + assert(hr != NULL, "invariant"); 11.56 + hr->set_next_dirty_cards_region(NULL); 11.57 + return hr; 11.58 +} 11.59 + 11.60 void G1CollectedHeap::stop_conc_gc_threads() { 11.61 - _cg1r->cg1rThread()->stop(); 11.62 + _cg1r->stop(); 11.63 _czft->stop(); 11.64 _cmThread->stop(); 11.65 } 11.66 @@ -1001,12 +1054,8 @@ 11.67 11.68 gc_epilogue(true); 11.69 11.70 - // Abandon concurrent refinement. This must happen last: in the 11.71 - // dirty-card logging system, some cards may be dirty by weak-ref 11.72 - // processing, and may be enqueued. But the whole card table is 11.73 - // dirtied, so this should abandon those logs, and set "do_traversal" 11.74 - // to true. 11.75 - concurrent_g1_refine()->set_pya_restart(); 11.76 + // Discard all rset updates 11.77 + JavaThread::dirty_card_queue_set().abandon_logs(); 11.78 assert(!G1DeferredRSUpdate 11.79 || (G1DeferredRSUpdate && (dirty_card_queue_set().completed_buffers_num() == 0)), "Should not be any"); 11.80 assert(regions_accounted_for(), "Region leakage!"); 11.81 @@ -1333,7 +1382,8 @@ 11.82 _gc_time_stamp(0), 11.83 _surviving_young_words(NULL), 11.84 _in_cset_fast_test(NULL), 11.85 - _in_cset_fast_test_base(NULL) { 11.86 + _in_cset_fast_test_base(NULL), 11.87 + _dirty_cards_region_list(NULL) { 11.88 _g1h = this; // To catch bugs. 11.89 if (_process_strong_tasks == NULL || !_process_strong_tasks->valid()) { 11.90 vm_exit_during_initialization("Failed necessary allocation."); 11.91 @@ -1521,12 +1571,12 @@ 11.92 SATB_Q_FL_lock, 11.93 0, 11.94 Shared_SATB_Q_lock); 11.95 - if (G1RSBarrierUseQueue) { 11.96 - JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 11.97 - DirtyCardQ_FL_lock, 11.98 - G1DirtyCardQueueMax, 11.99 - Shared_DirtyCardQ_lock); 11.100 - } 11.101 + 11.102 + JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 11.103 + DirtyCardQ_FL_lock, 11.104 + G1DirtyCardQueueMax, 11.105 + Shared_DirtyCardQ_lock); 11.106 + 11.107 if (G1DeferredRSUpdate) { 11.108 dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 11.109 DirtyCardQ_FL_lock, 11.110 @@ -2249,6 +2299,15 @@ 11.111 _hrs->iterate(&blk); 11.112 } 11.113 11.114 +class PrintOnThreadsClosure : public ThreadClosure { 11.115 + outputStream* _st; 11.116 +public: 11.117 + PrintOnThreadsClosure(outputStream* st) : _st(st) { } 11.118 + virtual void do_thread(Thread *t) { 11.119 + t->print_on(_st); 11.120 + } 11.121 +}; 11.122 + 11.123 void G1CollectedHeap::print_gc_threads_on(outputStream* st) const { 11.124 if (ParallelGCThreads > 0) { 11.125 workers()->print_worker_threads(); 11.126 @@ -2256,8 +2315,9 @@ 11.127 st->print("\"G1 concurrent mark GC Thread\" "); 11.128 _cmThread->print(); 11.129 st->cr(); 11.130 - st->print("\"G1 concurrent refinement GC Thread\" "); 11.131 - _cg1r->cg1rThread()->print_on(st); 11.132 + st->print("\"G1 concurrent refinement GC Threads\" "); 11.133 + PrintOnThreadsClosure p(st); 11.134 + _cg1r->threads_do(&p); 11.135 st->cr(); 11.136 st->print("\"G1 zero-fill GC Thread\" "); 11.137 _czft->print_on(st); 11.138 @@ -2269,7 +2329,7 @@ 11.139 workers()->threads_do(tc); 11.140 } 11.141 tc->do_thread(_cmThread); 11.142 - tc->do_thread(_cg1r->cg1rThread()); 11.143 + _cg1r->threads_do(tc); 11.144 tc->do_thread(_czft); 11.145 } 11.146 11.147 @@ -4685,15 +4745,58 @@ 11.148 } 11.149 } 11.150 11.151 + 11.152 +class G1ParCleanupCTTask : public AbstractGangTask { 11.153 + CardTableModRefBS* _ct_bs; 11.154 + G1CollectedHeap* _g1h; 11.155 +public: 11.156 + G1ParCleanupCTTask(CardTableModRefBS* ct_bs, 11.157 + G1CollectedHeap* g1h) : 11.158 + AbstractGangTask("G1 Par Cleanup CT Task"), 11.159 + _ct_bs(ct_bs), 11.160 + _g1h(g1h) 11.161 + { } 11.162 + 11.163 + void work(int i) { 11.164 + HeapRegion* r; 11.165 + while (r = _g1h->pop_dirty_cards_region()) { 11.166 + clear_cards(r); 11.167 + } 11.168 + } 11.169 + void clear_cards(HeapRegion* r) { 11.170 + // Cards for Survivor and Scan-Only regions will be dirtied later. 11.171 + if (!r->is_scan_only() && !r->is_survivor()) { 11.172 + _ct_bs->clear(MemRegion(r->bottom(), r->end())); 11.173 + } 11.174 + } 11.175 +}; 11.176 + 11.177 + 11.178 void G1CollectedHeap::cleanUpCardTable() { 11.179 CardTableModRefBS* ct_bs = (CardTableModRefBS*) (barrier_set()); 11.180 double start = os::elapsedTime(); 11.181 11.182 - ct_bs->clear(_g1_committed); 11.183 - 11.184 + // Iterate over the dirty cards region list. 11.185 + G1ParCleanupCTTask cleanup_task(ct_bs, this); 11.186 + if (ParallelGCThreads > 0) { 11.187 + set_par_threads(workers()->total_workers()); 11.188 + workers()->run_task(&cleanup_task); 11.189 + set_par_threads(0); 11.190 + } else { 11.191 + while (_dirty_cards_region_list) { 11.192 + HeapRegion* r = _dirty_cards_region_list; 11.193 + cleanup_task.clear_cards(r); 11.194 + _dirty_cards_region_list = r->get_next_dirty_cards_region(); 11.195 + if (_dirty_cards_region_list == r) { 11.196 + // The last region. 11.197 + _dirty_cards_region_list = NULL; 11.198 + } 11.199 + r->set_next_dirty_cards_region(NULL); 11.200 + } 11.201 + } 11.202 // now, redirty the cards of the scan-only and survivor regions 11.203 // (it seemed faster to do it this way, instead of iterating over 11.204 - // all regions and then clearing / dirtying as approprite) 11.205 + // all regions and then clearing / dirtying as appropriate) 11.206 dirtyCardsForYoungRegions(ct_bs, _young_list->first_scan_only_region()); 11.207 dirtyCardsForYoungRegions(ct_bs, _young_list->first_survivor_region()); 11.208
12.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Fri Jun 05 10:25:39 2009 -0700 12.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp Wed Jun 10 14:57:21 2009 -0700 12.3 @@ -158,6 +158,7 @@ 12.4 friend class RegionSorter; 12.5 friend class CountRCClosure; 12.6 friend class EvacPopObjClosure; 12.7 + friend class G1ParCleanupCTTask; 12.8 12.9 // Other related classes. 12.10 friend class G1MarkSweep; 12.11 @@ -1191,6 +1192,16 @@ 12.12 ConcurrentMark* concurrent_mark() const { return _cm; } 12.13 ConcurrentG1Refine* concurrent_g1_refine() const { return _cg1r; } 12.14 12.15 + // The dirty cards region list is used to record a subset of regions 12.16 + // whose cards need clearing. The list if populated during the 12.17 + // remembered set scanning and drained during the card table 12.18 + // cleanup. Although the methods are reentrant, population/draining 12.19 + // phases must not overlap. For synchronization purposes the last 12.20 + // element on the list points to itself. 12.21 + HeapRegion* _dirty_cards_region_list; 12.22 + void push_dirty_cards_region(HeapRegion* hr); 12.23 + HeapRegion* pop_dirty_cards_region(); 12.24 + 12.25 public: 12.26 void stop_conc_gc_threads(); 12.27
13.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Fri Jun 05 10:25:39 2009 -0700 13.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Wed Jun 10 14:57:21 2009 -0700 13.3 @@ -167,11 +167,6 @@ 13.4 13.5 _all_full_gc_times_ms(new NumberSeq()), 13.6 13.7 - _conc_refine_enabled(0), 13.8 - _conc_refine_zero_traversals(0), 13.9 - _conc_refine_max_traversals(0), 13.10 - _conc_refine_current_delta(G1ConcRefineInitialDelta), 13.11 - 13.12 // G1PausesBtwnConcMark defaults to -1 13.13 // so the hack is to do the cast QQQ FIXME 13.14 _pauses_btwn_concurrent_mark((size_t)G1PausesBtwnConcMark), 13.15 @@ -1634,9 +1629,8 @@ 13.16 print_stats(1, "Parallel Time", _cur_collection_par_time_ms); 13.17 print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); 13.18 print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); 13.19 - if (G1RSBarrierUseQueue) 13.20 - print_par_buffers(3, "Processed Buffers", 13.21 - _par_last_update_rs_processed_buffers, true); 13.22 + print_par_buffers(3, "Processed Buffers", 13.23 + _par_last_update_rs_processed_buffers, true); 13.24 print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); 13.25 print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); 13.26 print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); 13.27 @@ -1649,9 +1643,8 @@ 13.28 print_stats(1, "Clear CT", _cur_clear_ct_time_ms); 13.29 } else { 13.30 print_stats(1, "Update RS", update_rs_time); 13.31 - if (G1RSBarrierUseQueue) 13.32 - print_stats(2, "Processed Buffers", 13.33 - (int)update_rs_processed_buffers); 13.34 + print_stats(2, "Processed Buffers", 13.35 + (int)update_rs_processed_buffers); 13.36 print_stats(1, "Ext Root Scanning", ext_root_scan_time); 13.37 print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); 13.38 print_stats(1, "Scan-Only Scanning", scan_only_time); 13.39 @@ -2467,18 +2460,6 @@ 13.40 (double) _region_num_young / (double) all_region_num * 100.0, 13.41 _region_num_tenured, 13.42 (double) _region_num_tenured / (double) all_region_num * 100.0); 13.43 - 13.44 - if (!G1RSBarrierUseQueue) { 13.45 - gclog_or_tty->print_cr("Of %d times conc refinement was enabled, %d (%7.2f%%) " 13.46 - "did zero traversals.", 13.47 - _conc_refine_enabled, _conc_refine_zero_traversals, 13.48 - _conc_refine_enabled > 0 ? 13.49 - 100.0 * (float)_conc_refine_zero_traversals/ 13.50 - (float)_conc_refine_enabled : 0.0); 13.51 - gclog_or_tty->print_cr(" Max # of traversals = %d.", 13.52 - _conc_refine_max_traversals); 13.53 - gclog_or_tty->print_cr(""); 13.54 - } 13.55 } 13.56 if (TraceGen1Time) { 13.57 if (_all_full_gc_times_ms->num() > 0) { 13.58 @@ -2500,38 +2481,6 @@ 13.59 #endif // PRODUCT 13.60 } 13.61 13.62 -void G1CollectorPolicy::update_conc_refine_data() { 13.63 - unsigned traversals = _g1->concurrent_g1_refine()->disable(); 13.64 - if (traversals == 0) _conc_refine_zero_traversals++; 13.65 - _conc_refine_max_traversals = MAX2(_conc_refine_max_traversals, 13.66 - (size_t)traversals); 13.67 - 13.68 - if (G1PolicyVerbose > 1) 13.69 - gclog_or_tty->print_cr("Did a CR traversal series: %d traversals.", traversals); 13.70 - double multiplier = 1.0; 13.71 - if (traversals == 0) { 13.72 - multiplier = 4.0; 13.73 - } else if (traversals > (size_t)G1ConcRefineTargTraversals) { 13.74 - multiplier = 1.0/1.5; 13.75 - } else if (traversals < (size_t)G1ConcRefineTargTraversals) { 13.76 - multiplier = 1.5; 13.77 - } 13.78 - if (G1PolicyVerbose > 1) { 13.79 - gclog_or_tty->print_cr(" Multiplier = %7.2f.", multiplier); 13.80 - gclog_or_tty->print(" Delta went from %d regions to ", 13.81 - _conc_refine_current_delta); 13.82 - } 13.83 - _conc_refine_current_delta = 13.84 - MIN2(_g1->n_regions(), 13.85 - (size_t)(_conc_refine_current_delta * multiplier)); 13.86 - _conc_refine_current_delta = 13.87 - MAX2(_conc_refine_current_delta, (size_t)1); 13.88 - if (G1PolicyVerbose > 1) { 13.89 - gclog_or_tty->print_cr("%d regions.", _conc_refine_current_delta); 13.90 - } 13.91 - _conc_refine_enabled++; 13.92 -} 13.93 - 13.94 bool 13.95 G1CollectorPolicy::should_add_next_region_to_young_list() { 13.96 assert(in_young_gc_mode(), "should be in young GC mode");
14.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Fri Jun 05 10:25:39 2009 -0700 14.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Wed Jun 10 14:57:21 2009 -0700 14.3 @@ -637,18 +637,6 @@ 14.4 // The number of collection pauses at the end of the last mark. 14.5 size_t _n_pauses_at_mark_end; 14.6 14.7 - // ==== This section is for stats related to starting Conc Refinement on time. 14.8 - size_t _conc_refine_enabled; 14.9 - size_t _conc_refine_zero_traversals; 14.10 - size_t _conc_refine_max_traversals; 14.11 - // In # of heap regions. 14.12 - size_t _conc_refine_current_delta; 14.13 - 14.14 - // At the beginning of a collection pause, update the variables above, 14.15 - // especially the "delta". 14.16 - void update_conc_refine_data(); 14.17 - // ==== 14.18 - 14.19 // Stash a pointer to the g1 heap. 14.20 G1CollectedHeap* _g1; 14.21
15.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp Fri Jun 05 10:25:39 2009 -0700 15.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp Wed Jun 10 14:57:21 2009 -0700 15.3 @@ -105,28 +105,6 @@ 15.4 _g1->heap_region_iterate(&rc); 15.5 } 15.6 15.7 -class UpdateRSOutOfRegionClosure: public HeapRegionClosure { 15.8 - G1CollectedHeap* _g1h; 15.9 - ModRefBarrierSet* _mr_bs; 15.10 - UpdateRSOopClosure _cl; 15.11 - int _worker_i; 15.12 -public: 15.13 - UpdateRSOutOfRegionClosure(G1CollectedHeap* g1, int worker_i = 0) : 15.14 - _cl(g1->g1_rem_set()->as_HRInto_G1RemSet(), worker_i), 15.15 - _mr_bs(g1->mr_bs()), 15.16 - _worker_i(worker_i), 15.17 - _g1h(g1) 15.18 - {} 15.19 - bool doHeapRegion(HeapRegion* r) { 15.20 - if (!r->in_collection_set() && !r->continuesHumongous()) { 15.21 - _cl.set_from(r); 15.22 - r->set_next_filter_kind(HeapRegionDCTOC::OutOfRegionFilterKind); 15.23 - _mr_bs->mod_oop_in_space_iterate(r, &_cl, true, true); 15.24 - } 15.25 - return false; 15.26 - } 15.27 -}; 15.28 - 15.29 class VerifyRSCleanCardOopClosure: public OopClosure { 15.30 G1CollectedHeap* _g1; 15.31 public: 15.32 @@ -241,6 +219,7 @@ 15.33 HeapRegionRemSet* hrrs = r->rem_set(); 15.34 if (hrrs->iter_is_complete()) return false; // All done. 15.35 if (!_try_claimed && !hrrs->claim_iter()) return false; 15.36 + _g1h->push_dirty_cards_region(r); 15.37 // If we didn't return above, then 15.38 // _try_claimed || r->claim_iter() 15.39 // is true: either we're supposed to work on claimed-but-not-complete 15.40 @@ -264,6 +243,10 @@ 15.41 assert(card_region != NULL, "Yielding cards not in the heap?"); 15.42 _cards++; 15.43 15.44 + if (!card_region->is_on_dirty_cards_region_list()) { 15.45 + _g1h->push_dirty_cards_region(card_region); 15.46 + } 15.47 + 15.48 // If the card is dirty, then we will scan it during updateRS. 15.49 if (!card_region->in_collection_set() && !_ct_bs->is_card_dirty(card_index)) { 15.50 if (!_ct_bs->is_card_claimed(card_index) && _ct_bs->claim_card(card_index)) { 15.51 @@ -350,30 +333,17 @@ 15.52 double start = os::elapsedTime(); 15.53 _g1p->record_update_rs_start_time(worker_i, start * 1000.0); 15.54 15.55 - if (G1RSBarrierUseQueue && !cg1r->do_traversal()) { 15.56 - // Apply the appropriate closure to all remaining log entries. 15.57 - _g1->iterate_dirty_card_closure(false, worker_i); 15.58 - // Now there should be no dirty cards. 15.59 - if (G1RSLogCheckCardTable) { 15.60 - CountNonCleanMemRegionClosure cl(_g1); 15.61 - _ct_bs->mod_card_iterate(&cl); 15.62 - // XXX This isn't true any more: keeping cards of young regions 15.63 - // marked dirty broke it. Need some reasonable fix. 15.64 - guarantee(cl.n() == 0, "Card table should be clean."); 15.65 - } 15.66 - } else { 15.67 - UpdateRSOutOfRegionClosure update_rs(_g1, worker_i); 15.68 - _g1->heap_region_iterate(&update_rs); 15.69 - // We did a traversal; no further one is necessary. 15.70 - if (G1RSBarrierUseQueue) { 15.71 - assert(cg1r->do_traversal(), "Or we shouldn't have gotten here."); 15.72 - cg1r->set_pya_cancel(); 15.73 - } 15.74 - if (_cg1r->use_cache()) { 15.75 - _cg1r->clear_and_record_card_counts(); 15.76 - _cg1r->clear_hot_cache(); 15.77 - } 15.78 + // Apply the appropriate closure to all remaining log entries. 15.79 + _g1->iterate_dirty_card_closure(false, worker_i); 15.80 + // Now there should be no dirty cards. 15.81 + if (G1RSLogCheckCardTable) { 15.82 + CountNonCleanMemRegionClosure cl(_g1); 15.83 + _ct_bs->mod_card_iterate(&cl); 15.84 + // XXX This isn't true any more: keeping cards of young regions 15.85 + // marked dirty broke it. Need some reasonable fix. 15.86 + guarantee(cl.n() == 0, "Card table should be clean."); 15.87 } 15.88 + 15.89 _g1p->record_update_rs_time(worker_i, (os::elapsedTime() - start) * 1000.0); 15.90 } 15.91 15.92 @@ -486,11 +456,6 @@ 15.93 * 1000.0); 15.94 } 15.95 15.96 -void HRInto_G1RemSet::set_par_traversal(bool b) { 15.97 - _par_traversal_in_progress = b; 15.98 - HeapRegionRemSet::set_par_traversal(b); 15.99 -} 15.100 - 15.101 void HRInto_G1RemSet::cleanupHRRS() { 15.102 HeapRegionRemSet::cleanup(); 15.103 } 15.104 @@ -527,7 +492,7 @@ 15.105 updateRS(worker_i); 15.106 scanNewRefsRS(oc, worker_i); 15.107 } else { 15.108 - _g1p->record_update_rs_start_time(worker_i, os::elapsedTime()); 15.109 + _g1p->record_update_rs_start_time(worker_i, os::elapsedTime() * 1000.0); 15.110 _g1p->record_update_rs_processed_buffers(worker_i, 0.0); 15.111 _g1p->record_update_rs_time(worker_i, 0.0); 15.112 _g1p->record_scan_new_refs_time(worker_i, 0.0); 15.113 @@ -535,7 +500,7 @@ 15.114 if (G1ParallelRSetScanningEnabled || (worker_i == 0)) { 15.115 scanRS(oc, worker_i); 15.116 } else { 15.117 - _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime()); 15.118 + _g1p->record_scan_rs_start_time(worker_i, os::elapsedTime() * 1000.0); 15.119 _g1p->record_scan_rs_time(worker_i, 0.0); 15.120 } 15.121 } else { 15.122 @@ -562,11 +527,6 @@ 15.123 if (ParallelGCThreads > 0) { 15.124 set_par_traversal(true); 15.125 _seq_task->set_par_threads((int)n_workers()); 15.126 - if (cg1r->do_traversal()) { 15.127 - updateRS(0); 15.128 - // Have to do this again after updaters 15.129 - cleanupHRRS(); 15.130 - } 15.131 } 15.132 guarantee( _cards_scanned == NULL, "invariant" ); 15.133 _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers()); 15.134 @@ -647,11 +607,8 @@ 15.135 _g1->collection_set_iterate(&iterClosure); 15.136 // Set all cards back to clean. 15.137 _g1->cleanUpCardTable(); 15.138 + 15.139 if (ParallelGCThreads > 0) { 15.140 - ConcurrentG1Refine* cg1r = _g1->concurrent_g1_refine(); 15.141 - if (cg1r->do_traversal()) { 15.142 - cg1r->cg1rThread()->set_do_traversal(false); 15.143 - } 15.144 set_par_traversal(false); 15.145 } 15.146 15.147 @@ -721,139 +678,8 @@ 15.148 } 15.149 15.150 15.151 -class ConcRefineRegionClosure: public HeapRegionClosure { 15.152 - G1CollectedHeap* _g1h; 15.153 - CardTableModRefBS* _ctbs; 15.154 - ConcurrentGCThread* _cgc_thrd; 15.155 - ConcurrentG1Refine* _cg1r; 15.156 - unsigned _cards_processed; 15.157 - UpdateRSOopClosure _update_rs_oop_cl; 15.158 -public: 15.159 - ConcRefineRegionClosure(CardTableModRefBS* ctbs, 15.160 - ConcurrentG1Refine* cg1r, 15.161 - HRInto_G1RemSet* g1rs) : 15.162 - _ctbs(ctbs), _cg1r(cg1r), _cgc_thrd(cg1r->cg1rThread()), 15.163 - _update_rs_oop_cl(g1rs), _cards_processed(0), 15.164 - _g1h(G1CollectedHeap::heap()) 15.165 - {} 15.166 - 15.167 - bool doHeapRegion(HeapRegion* r) { 15.168 - if (!r->in_collection_set() && 15.169 - !r->continuesHumongous() && 15.170 - !r->is_young()) { 15.171 - _update_rs_oop_cl.set_from(r); 15.172 - UpdateRSObjectClosure update_rs_obj_cl(&_update_rs_oop_cl); 15.173 - 15.174 - // For each run of dirty card in the region: 15.175 - // 1) Clear the cards. 15.176 - // 2) Process the range corresponding to the run, adding any 15.177 - // necessary RS entries. 15.178 - // 1 must precede 2, so that a concurrent modification redirties the 15.179 - // card. If a processing attempt does not succeed, because it runs 15.180 - // into an unparseable region, we will do binary search to find the 15.181 - // beginning of the next parseable region. 15.182 - HeapWord* startAddr = r->bottom(); 15.183 - HeapWord* endAddr = r->used_region().end(); 15.184 - HeapWord* lastAddr; 15.185 - HeapWord* nextAddr; 15.186 - 15.187 - for (nextAddr = lastAddr = startAddr; 15.188 - nextAddr < endAddr; 15.189 - nextAddr = lastAddr) { 15.190 - MemRegion dirtyRegion; 15.191 - 15.192 - // Get and clear dirty region from card table 15.193 - MemRegion next_mr(nextAddr, endAddr); 15.194 - dirtyRegion = 15.195 - _ctbs->dirty_card_range_after_reset( 15.196 - next_mr, 15.197 - true, CardTableModRefBS::clean_card_val()); 15.198 - assert(dirtyRegion.start() >= nextAddr, 15.199 - "returned region inconsistent?"); 15.200 - 15.201 - if (!dirtyRegion.is_empty()) { 15.202 - HeapWord* stop_point = 15.203 - r->object_iterate_mem_careful(dirtyRegion, 15.204 - &update_rs_obj_cl); 15.205 - if (stop_point == NULL) { 15.206 - lastAddr = dirtyRegion.end(); 15.207 - _cards_processed += 15.208 - (int) (dirtyRegion.word_size() / CardTableModRefBS::card_size_in_words); 15.209 - } else { 15.210 - // We're going to skip one or more cards that we can't parse. 15.211 - HeapWord* next_parseable_card = 15.212 - r->next_block_start_careful(stop_point); 15.213 - // Round this up to a card boundary. 15.214 - next_parseable_card = 15.215 - _ctbs->addr_for(_ctbs->byte_after_const(next_parseable_card)); 15.216 - // Now we invalidate the intervening cards so we'll see them 15.217 - // again. 15.218 - MemRegion remaining_dirty = 15.219 - MemRegion(stop_point, dirtyRegion.end()); 15.220 - MemRegion skipped = 15.221 - MemRegion(stop_point, next_parseable_card); 15.222 - _ctbs->invalidate(skipped.intersection(remaining_dirty)); 15.223 - 15.224 - // Now start up again where we can parse. 15.225 - lastAddr = next_parseable_card; 15.226 - 15.227 - // Count how many we did completely. 15.228 - _cards_processed += 15.229 - (stop_point - dirtyRegion.start()) / 15.230 - CardTableModRefBS::card_size_in_words; 15.231 - } 15.232 - // Allow interruption at regular intervals. 15.233 - // (Might need to make them more regular, if we get big 15.234 - // dirty regions.) 15.235 - if (_cgc_thrd != NULL) { 15.236 - if (_cgc_thrd->should_yield()) { 15.237 - _cgc_thrd->yield(); 15.238 - switch (_cg1r->get_pya()) { 15.239 - case PYA_continue: 15.240 - // This may have changed: re-read. 15.241 - endAddr = r->used_region().end(); 15.242 - continue; 15.243 - case PYA_restart: case PYA_cancel: 15.244 - return true; 15.245 - } 15.246 - } 15.247 - } 15.248 - } else { 15.249 - break; 15.250 - } 15.251 - } 15.252 - } 15.253 - // A good yield opportunity. 15.254 - if (_cgc_thrd != NULL) { 15.255 - if (_cgc_thrd->should_yield()) { 15.256 - _cgc_thrd->yield(); 15.257 - switch (_cg1r->get_pya()) { 15.258 - case PYA_restart: case PYA_cancel: 15.259 - return true; 15.260 - default: 15.261 - break; 15.262 - } 15.263 - 15.264 - } 15.265 - } 15.266 - return false; 15.267 - } 15.268 - 15.269 - unsigned cards_processed() { return _cards_processed; } 15.270 -}; 15.271 - 15.272 - 15.273 -void HRInto_G1RemSet::concurrentRefinementPass(ConcurrentG1Refine* cg1r) { 15.274 - ConcRefineRegionClosure cr_cl(ct_bs(), cg1r, this); 15.275 - _g1->heap_region_iterate(&cr_cl); 15.276 - _conc_refine_traversals++; 15.277 - _conc_refine_cards += cr_cl.cards_processed(); 15.278 -} 15.279 - 15.280 static IntHistogram out_of_histo(50, 50); 15.281 15.282 - 15.283 - 15.284 void HRInto_G1RemSet::concurrentRefineOneCard(jbyte* card_ptr, int worker_i) { 15.285 // If the card is no longer dirty, nothing to do. 15.286 if (*card_ptr != CardTableModRefBS::dirty_card_val()) return; 15.287 @@ -983,10 +809,16 @@ 15.288 HeapRegion* max_mem_sz_region() { return _max_mem_sz_region; } 15.289 }; 15.290 15.291 +class PrintRSThreadVTimeClosure : public ThreadClosure { 15.292 +public: 15.293 + virtual void do_thread(Thread *t) { 15.294 + ConcurrentG1RefineThread* crt = (ConcurrentG1RefineThread*) t; 15.295 + gclog_or_tty->print(" %5.2f", crt->vtime_accum()); 15.296 + } 15.297 +}; 15.298 + 15.299 void HRInto_G1RemSet::print_summary_info() { 15.300 G1CollectedHeap* g1 = G1CollectedHeap::heap(); 15.301 - ConcurrentG1RefineThread* cg1r_thrd = 15.302 - g1->concurrent_g1_refine()->cg1rThread(); 15.303 15.304 #if CARD_REPEAT_HISTO 15.305 gclog_or_tty->print_cr("\nG1 card_repeat count histogram: "); 15.306 @@ -999,15 +831,13 @@ 15.307 gclog_or_tty->print_cr(" # of CS ptrs --> # of cards with that number."); 15.308 out_of_histo.print_on(gclog_or_tty); 15.309 } 15.310 - gclog_or_tty->print_cr("\n Concurrent RS processed %d cards in " 15.311 - "%5.2fs.", 15.312 - _conc_refine_cards, cg1r_thrd->vtime_accum()); 15.313 - 15.314 + gclog_or_tty->print_cr("\n Concurrent RS processed %d cards", 15.315 + _conc_refine_cards); 15.316 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 15.317 jint tot_processed_buffers = 15.318 dcqs.processed_buffers_mut() + dcqs.processed_buffers_rs_thread(); 15.319 gclog_or_tty->print_cr(" Of %d completed buffers:", tot_processed_buffers); 15.320 - gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS thread.", 15.321 + gclog_or_tty->print_cr(" %8d (%5.1f%%) by conc RS threads.", 15.322 dcqs.processed_buffers_rs_thread(), 15.323 100.0*(float)dcqs.processed_buffers_rs_thread()/ 15.324 (float)tot_processed_buffers); 15.325 @@ -1015,15 +845,12 @@ 15.326 dcqs.processed_buffers_mut(), 15.327 100.0*(float)dcqs.processed_buffers_mut()/ 15.328 (float)tot_processed_buffers); 15.329 - gclog_or_tty->print_cr(" Did %d concurrent refinement traversals.", 15.330 - _conc_refine_traversals); 15.331 - if (!G1RSBarrierUseQueue) { 15.332 - gclog_or_tty->print_cr(" Scanned %8.2f cards/traversal.", 15.333 - _conc_refine_traversals > 0 ? 15.334 - (float)_conc_refine_cards/(float)_conc_refine_traversals : 15.335 - 0); 15.336 - } 15.337 + gclog_or_tty->print_cr(" Conc RS threads times(s)"); 15.338 + PrintRSThreadVTimeClosure p; 15.339 + gclog_or_tty->print(" "); 15.340 + g1->concurrent_g1_refine()->threads_do(&p); 15.341 gclog_or_tty->print_cr(""); 15.342 + 15.343 if (G1UseHRIntoRS) { 15.344 HRRSStatsIter blk; 15.345 g1->heap_region_iterate(&blk);
16.1 --- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp Fri Jun 05 10:25:39 2009 -0700 16.2 +++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp Wed Jun 10 14:57:21 2009 -0700 16.3 @@ -33,15 +33,12 @@ 16.4 class G1RemSet: public CHeapObj { 16.5 protected: 16.6 G1CollectedHeap* _g1; 16.7 - 16.8 - unsigned _conc_refine_traversals; 16.9 unsigned _conc_refine_cards; 16.10 - 16.11 size_t n_workers(); 16.12 16.13 public: 16.14 G1RemSet(G1CollectedHeap* g1) : 16.15 - _g1(g1), _conc_refine_traversals(0), _conc_refine_cards(0) 16.16 + _g1(g1), _conc_refine_cards(0) 16.17 {} 16.18 16.19 // Invoke "blk->do_oop" on all pointers into the CS in object in regions 16.20 @@ -81,19 +78,11 @@ 16.21 virtual void scrub_par(BitMap* region_bm, BitMap* card_bm, 16.22 int worker_num, int claim_val) = 0; 16.23 16.24 - // Do any "refinement" activity that might be appropriate to the given 16.25 - // G1RemSet. If "refinement" has iterateive "passes", do one pass. 16.26 - // If "t" is non-NULL, it is the thread performing the refinement. 16.27 - // Default implementation does nothing. 16.28 - virtual void concurrentRefinementPass(ConcurrentG1Refine* cg1r) {} 16.29 - 16.30 // Refine the card corresponding to "card_ptr". If "sts" is non-NULL, 16.31 // join and leave around parts that must be atomic wrt GC. (NULL means 16.32 // being done at a safepoint.) 16.33 virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i) {} 16.34 16.35 - unsigned conc_refine_cards() { return _conc_refine_cards; } 16.36 - 16.37 // Print any relevant summary info. 16.38 virtual void print_summary_info() {} 16.39 16.40 @@ -153,7 +142,7 @@ 16.41 // progress. If so, then cards added to remembered sets should also have 16.42 // their references into the collection summarized in "_new_refs". 16.43 bool _par_traversal_in_progress; 16.44 - void set_par_traversal(bool b); 16.45 + void set_par_traversal(bool b) { _par_traversal_in_progress = b; } 16.46 GrowableArray<oop*>** _new_refs; 16.47 void new_refs_iterate(OopClosure* cl); 16.48 16.49 @@ -194,7 +183,6 @@ 16.50 void scrub_par(BitMap* region_bm, BitMap* card_bm, 16.51 int worker_num, int claim_val); 16.52 16.53 - virtual void concurrentRefinementPass(ConcurrentG1Refine* t); 16.54 virtual void concurrentRefineOneCard(jbyte* card_ptr, int worker_i); 16.55 16.56 virtual void print_summary_info();
17.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Fri Jun 05 10:25:39 2009 -0700 17.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Wed Jun 10 14:57:21 2009 -0700 17.3 @@ -147,9 +147,6 @@ 17.4 develop(bool, G1PrintCTFilterStats, false, \ 17.5 "If true, print stats on RS filtering effectiveness") \ 17.6 \ 17.7 - develop(bool, G1RSBarrierUseQueue, true, \ 17.8 - "If true, use queueing RS barrier") \ 17.9 - \ 17.10 develop(bool, G1DeferredRSUpdate, true, \ 17.11 "If true, use deferred RS updates") \ 17.12 \ 17.13 @@ -253,6 +250,10 @@ 17.14 \ 17.15 experimental(bool, G1ParallelRSetScanningEnabled, false, \ 17.16 "Enables the parallelization of remembered set scanning " \ 17.17 - "during evacuation pauses") 17.18 + "during evacuation pauses") \ 17.19 + \ 17.20 + product(uintx, G1ParallelRSetThreads, 0, \ 17.21 + "If non-0 is the number of parallel rem set update threads, " \ 17.22 + "otherwise the value is determined ergonomically.") 17.23 17.24 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
18.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.cpp Fri Jun 05 10:25:39 2009 -0700 18.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp Wed Jun 10 14:57:21 2009 -0700 18.3 @@ -351,6 +351,7 @@ 18.4 _claimed(InitialClaimValue), _evacuation_failed(false), 18.5 _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1), 18.6 _young_type(NotYoung), _next_young_region(NULL), 18.7 + _next_dirty_cards_region(NULL), 18.8 _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), 18.9 _rem_set(NULL), _zfs(NotZeroFilled) 18.10 {
19.1 --- a/src/share/vm/gc_implementation/g1/heapRegion.hpp Fri Jun 05 10:25:39 2009 -0700 19.2 +++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp Wed Jun 10 14:57:21 2009 -0700 19.3 @@ -227,6 +227,9 @@ 19.4 // next region in the young "generation" region set 19.5 HeapRegion* _next_young_region; 19.6 19.7 + // Next region whose cards need cleaning 19.8 + HeapRegion* _next_dirty_cards_region; 19.9 + 19.10 // For parallel heapRegion traversal. 19.11 jint _claimed; 19.12 19.13 @@ -468,6 +471,11 @@ 19.14 _next_young_region = hr; 19.15 } 19.16 19.17 + HeapRegion* get_next_dirty_cards_region() const { return _next_dirty_cards_region; } 19.18 + HeapRegion** next_dirty_cards_region_addr() { return &_next_dirty_cards_region; } 19.19 + void set_next_dirty_cards_region(HeapRegion* hr) { _next_dirty_cards_region = hr; } 19.20 + bool is_on_dirty_cards_region_list() const { return get_next_dirty_cards_region() != NULL; } 19.21 + 19.22 // Allows logical separation between objects allocated before and after. 19.23 void save_marks(); 19.24
20.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Fri Jun 05 10:25:39 2009 -0700 20.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp Wed Jun 10 14:57:21 2009 -0700 20.3 @@ -1052,18 +1052,11 @@ 20.4 20.5 } 20.6 20.7 - 20.8 -bool HeapRegionRemSet::_par_traversal = false; 20.9 - 20.10 -void HeapRegionRemSet::set_par_traversal(bool b) { 20.11 - assert(_par_traversal != b, "Proper alternation..."); 20.12 - _par_traversal = b; 20.13 -} 20.14 - 20.15 +// Determines how many threads can add records to an rset in parallel. 20.16 +// This can be done by either mutator threads together with the 20.17 +// concurrent refinement threads or GC threads. 20.18 int HeapRegionRemSet::num_par_rem_sets() { 20.19 - // We always have at least two, so that a mutator thread can claim an 20.20 - // id and add to a rem set. 20.21 - return (int) MAX2(ParallelGCThreads, (size_t)2); 20.22 + return (int)MAX2(DirtyCardQueueSet::num_par_ids() + ConcurrentG1Refine::thread_num(), ParallelGCThreads); 20.23 } 20.24 20.25 HeapRegionRemSet::HeapRegionRemSet(G1BlockOffsetSharedArray* bosa,
21.1 --- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Fri Jun 05 10:25:39 2009 -0700 21.2 +++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp Wed Jun 10 14:57:21 2009 -0700 21.3 @@ -177,8 +177,6 @@ 21.4 G1BlockOffsetSharedArray* _bosa; 21.5 G1BlockOffsetSharedArray* bosa() const { return _bosa; } 21.6 21.7 - static bool _par_traversal; 21.8 - 21.9 OtherRegionsTable _other_regions; 21.10 21.11 // One set bit for every region that has an entry for this one. 21.12 @@ -211,8 +209,6 @@ 21.13 HeapRegion* hr); 21.14 21.15 static int num_par_rem_sets(); 21.16 - static bool par_traversal() { return _par_traversal; } 21.17 - static void set_par_traversal(bool b); 21.18 21.19 HeapRegion* hr() const { 21.20 return _other_regions.hr();
22.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Fri Jun 05 10:25:39 2009 -0700 22.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Wed Jun 10 14:57:21 2009 -0700 22.3 @@ -172,7 +172,7 @@ 22.4 _n_completed_buffers++; 22.5 22.6 if (!_process_completed && 22.7 - _n_completed_buffers == _process_completed_threshold) { 22.8 + _n_completed_buffers >= _process_completed_threshold) { 22.9 _process_completed = true; 22.10 if (_notify_when_complete) 22.11 _cbl_mon->notify_all();
23.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Fri Jun 05 10:25:39 2009 -0700 23.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Wed Jun 10 14:57:21 2009 -0700 23.3 @@ -49,6 +49,8 @@ 23.4 23.5 concurrentG1Refine.hpp globalDefinitions.hpp 23.6 concurrentG1Refine.hpp allocation.hpp 23.7 +concurrentG1Refine.hpp thread.hpp 23.8 + 23.9 23.10 concurrentG1RefineThread.cpp concurrentG1Refine.hpp 23.11 concurrentG1RefineThread.cpp concurrentG1RefineThread.hpp 23.12 @@ -280,6 +282,7 @@ 23.13 23.14 heapRegionRemSet.cpp allocation.hpp 23.15 heapRegionRemSet.cpp bitMap.inline.hpp 23.16 +heapRegionRemSet.cpp concurrentG1Refine.hpp 23.17 heapRegionRemSet.cpp g1BlockOffsetTable.inline.hpp 23.18 heapRegionRemSet.cpp g1CollectedHeap.inline.hpp 23.19 heapRegionRemSet.cpp heapRegionRemSet.hpp
24.1 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Fri Jun 05 10:25:39 2009 -0700 24.2 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.cpp Wed Jun 10 14:57:21 2009 -0700 24.3 @@ -27,13 +27,12 @@ 24.4 # include "incls/_precompiled.incl" 24.5 # include "incls/_concurrentGCThread.cpp.incl" 24.6 24.7 -bool ConcurrentGCThread::_should_terminate = false; 24.8 -bool ConcurrentGCThread::_has_terminated = false; 24.9 int ConcurrentGCThread::_CGC_flag = CGC_nil; 24.10 24.11 SuspendibleThreadSet ConcurrentGCThread::_sts; 24.12 24.13 -ConcurrentGCThread::ConcurrentGCThread() { 24.14 +ConcurrentGCThread::ConcurrentGCThread() : 24.15 + _should_terminate(false), _has_terminated(false) { 24.16 _sts.initialize(); 24.17 }; 24.18
25.1 --- a/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp Fri Jun 05 10:25:39 2009 -0700 25.2 +++ b/src/share/vm/gc_implementation/shared/concurrentGCThread.hpp Wed Jun 10 14:57:21 2009 -0700 25.3 @@ -72,8 +72,8 @@ 25.4 friend class VMStructs; 25.5 25.6 protected: 25.7 - static bool _should_terminate; 25.8 - static bool _has_terminated; 25.9 + bool _should_terminate; 25.10 + bool _has_terminated; 25.11 25.12 enum CGC_flag_type { 25.13 CGC_nil = 0x0,
26.1 --- a/src/share/vm/memory/cardTableRS.cpp Fri Jun 05 10:25:39 2009 -0700 26.2 +++ b/src/share/vm/memory/cardTableRS.cpp Wed Jun 10 14:57:21 2009 -0700 26.3 @@ -33,12 +33,8 @@ 26.4 { 26.5 #ifndef SERIALGC 26.6 if (UseG1GC) { 26.7 - if (G1RSBarrierUseQueue) { 26.8 _ct_bs = new G1SATBCardTableLoggingModRefBS(whole_heap, 26.9 max_covered_regions); 26.10 - } else { 26.11 - _ct_bs = new G1SATBCardTableModRefBS(whole_heap, max_covered_regions); 26.12 - } 26.13 } else { 26.14 _ct_bs = new CardTableModRefBSForCTRS(whole_heap, max_covered_regions); 26.15 }
27.1 --- a/src/share/vm/runtime/mutexLocker.cpp Fri Jun 05 10:25:39 2009 -0700 27.2 +++ b/src/share/vm/runtime/mutexLocker.cpp Wed Jun 10 14:57:21 2009 -0700 27.3 @@ -70,7 +70,6 @@ 27.4 Monitor* CMark_lock = NULL; 27.5 Monitor* ZF_mon = NULL; 27.6 Monitor* Cleanup_mon = NULL; 27.7 -Monitor* G1ConcRefine_mon = NULL; 27.8 Mutex* SATB_Q_FL_lock = NULL; 27.9 Monitor* SATB_Q_CBL_mon = NULL; 27.10 Mutex* Shared_SATB_Q_lock = NULL; 27.11 @@ -168,7 +167,6 @@ 27.12 def(CMark_lock , Monitor, nonleaf, true ); // coordinate concurrent mark thread 27.13 def(ZF_mon , Monitor, leaf, true ); 27.14 def(Cleanup_mon , Monitor, nonleaf, true ); 27.15 - def(G1ConcRefine_mon , Monitor, nonleaf, true ); 27.16 def(SATB_Q_FL_lock , Mutex , special, true ); 27.17 def(SATB_Q_CBL_mon , Monitor, nonleaf, true ); 27.18 def(Shared_SATB_Q_lock , Mutex, nonleaf, true );
28.1 --- a/src/share/vm/runtime/mutexLocker.hpp Fri Jun 05 10:25:39 2009 -0700 28.2 +++ b/src/share/vm/runtime/mutexLocker.hpp Wed Jun 10 14:57:21 2009 -0700 28.3 @@ -63,9 +63,6 @@ 28.4 extern Monitor* CMark_lock; // used for concurrent mark thread coordination 28.5 extern Monitor* ZF_mon; // used for G1 conc zero-fill. 28.6 extern Monitor* Cleanup_mon; // used for G1 conc cleanup. 28.7 -extern Monitor* G1ConcRefine_mon; // used for G1 conc-refine 28.8 - // coordination. 28.9 - 28.10 extern Mutex* SATB_Q_FL_lock; // Protects SATB Q 28.11 // buffer free list. 28.12 extern Monitor* SATB_Q_CBL_mon; // Protects SATB Q