Tue, 22 Dec 2009 22:35:08 -0800
Merge
1.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Thu Dec 17 07:02:39 2009 -0800 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.cpp Tue Dec 22 22:35:08 2009 -0800 1.3 @@ -42,28 +42,49 @@ 1.4 _n_periods(0), 1.5 _threads(NULL), _n_threads(0) 1.6 { 1.7 - if (G1ConcRefine) { 1.8 - _n_threads = (int)thread_num(); 1.9 - if (_n_threads > 0) { 1.10 - _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 1.11 - int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 1.12 - ConcurrentG1RefineThread *next = NULL; 1.13 - for (int i = _n_threads - 1; i >= 0; i--) { 1.14 - ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 1.15 - assert(t != NULL, "Conc refine should have been created"); 1.16 - assert(t->cg1r() == this, "Conc refine thread should refer to this"); 1.17 - _threads[i] = t; 1.18 - next = t; 1.19 - } 1.20 - } 1.21 + 1.22 + // Ergomonically select initial concurrent refinement parameters 1.23 + if (FLAG_IS_DEFAULT(G1ConcRefineGreenZone)) { 1.24 + FLAG_SET_DEFAULT(G1ConcRefineGreenZone, MAX2<int>(ParallelGCThreads, 1)); 1.25 + } 1.26 + set_green_zone(G1ConcRefineGreenZone); 1.27 + 1.28 + if (FLAG_IS_DEFAULT(G1ConcRefineYellowZone)) { 1.29 + FLAG_SET_DEFAULT(G1ConcRefineYellowZone, green_zone() * 3); 1.30 + } 1.31 + set_yellow_zone(MAX2<int>(G1ConcRefineYellowZone, green_zone())); 1.32 + 1.33 + if (FLAG_IS_DEFAULT(G1ConcRefineRedZone)) { 1.34 + FLAG_SET_DEFAULT(G1ConcRefineRedZone, yellow_zone() * 2); 1.35 + } 1.36 + set_red_zone(MAX2<int>(G1ConcRefineRedZone, yellow_zone())); 1.37 + _n_worker_threads = thread_num(); 1.38 + // We need one extra thread to do the young gen rset size sampling. 1.39 + _n_threads = _n_worker_threads + 1; 1.40 + reset_threshold_step(); 1.41 + 1.42 + _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads); 1.43 + int worker_id_offset = (int)DirtyCardQueueSet::num_par_ids(); 1.44 + ConcurrentG1RefineThread *next = NULL; 1.45 + for (int i = _n_threads - 1; i >= 0; i--) { 1.46 + ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, worker_id_offset, i); 1.47 + assert(t != NULL, "Conc refine should have been created"); 1.48 + assert(t->cg1r() == this, "Conc refine thread should refer to this"); 1.49 + _threads[i] = t; 1.50 + next = t; 1.51 } 1.52 } 1.53 1.54 -size_t ConcurrentG1Refine::thread_num() { 1.55 - if (G1ConcRefine) { 1.56 - return (G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads; 1.57 +void ConcurrentG1Refine::reset_threshold_step() { 1.58 + if (FLAG_IS_DEFAULT(G1ConcRefineThresholdStep)) { 1.59 + _thread_threshold_step = (yellow_zone() - green_zone()) / (worker_thread_num() + 1); 1.60 + } else { 1.61 + _thread_threshold_step = G1ConcRefineThresholdStep; 1.62 } 1.63 - return 0; 1.64 +} 1.65 + 1.66 +int ConcurrentG1Refine::thread_num() { 1.67 + return MAX2<int>((G1ParallelRSetThreads > 0) ? G1ParallelRSetThreads : ParallelGCThreads, 1); 1.68 } 1.69 1.70 void ConcurrentG1Refine::init() { 1.71 @@ -123,6 +144,15 @@ 1.72 } 1.73 } 1.74 1.75 +void ConcurrentG1Refine::reinitialize_threads() { 1.76 + reset_threshold_step(); 1.77 + if (_threads != NULL) { 1.78 + for (int i = 0; i < _n_threads; i++) { 1.79 + _threads[i]->initialize(); 1.80 + } 1.81 + } 1.82 +} 1.83 + 1.84 ConcurrentG1Refine::~ConcurrentG1Refine() { 1.85 if (G1ConcRSLogCacheSize > 0) { 1.86 assert(_card_counts != NULL, "Logic"); 1.87 @@ -384,4 +414,3 @@ 1.88 st->cr(); 1.89 } 1.90 } 1.91 -
2.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Thu Dec 17 07:02:39 2009 -0800 2.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp Tue Dec 22 22:35:08 2009 -0800 2.3 @@ -29,6 +29,31 @@ 2.4 class ConcurrentG1Refine: public CHeapObj { 2.5 ConcurrentG1RefineThread** _threads; 2.6 int _n_threads; 2.7 + int _n_worker_threads; 2.8 + /* 2.9 + * The value of the update buffer queue length falls into one of 3 zones: 2.10 + * green, yellow, red. If the value is in [0, green) nothing is 2.11 + * done, the buffers are left unprocessed to enable the caching effect of the 2.12 + * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement 2.13 + * threads are gradually activated. In [yellow, red) all threads are 2.14 + * running. If the length becomes red (max queue length) the mutators start 2.15 + * processing the buffers. 2.16 + * 2.17 + * There are some interesting cases (with G1AdaptiveConcRefine turned off): 2.18 + * 1) green = yellow = red = 0. In this case the mutator will process all 2.19 + * buffers. Except for those that are created by the deferred updates 2.20 + * machinery during a collection. 2.21 + * 2) green = 0. Means no caching. Can be a good way to minimize the 2.22 + * amount of time spent updating rsets during a collection. 2.23 + */ 2.24 + int _green_zone; 2.25 + int _yellow_zone; 2.26 + int _red_zone; 2.27 + 2.28 + int _thread_threshold_step; 2.29 + 2.30 + // Reset the threshold step value based of the current zone boundaries. 2.31 + void reset_threshold_step(); 2.32 2.33 // The cache for card refinement. 2.34 bool _use_cache; 2.35 @@ -147,6 +172,8 @@ 2.36 void init(); // Accomplish some initialization that has to wait. 2.37 void stop(); 2.38 2.39 + void reinitialize_threads(); 2.40 + 2.41 // Iterate over the conc refine threads 2.42 void threads_do(ThreadClosure *tc); 2.43 2.44 @@ -178,7 +205,20 @@ 2.45 2.46 void clear_and_record_card_counts(); 2.47 2.48 - static size_t thread_num(); 2.49 + static int thread_num(); 2.50 2.51 void print_worker_threads_on(outputStream* st) const; 2.52 + 2.53 + void set_green_zone(int x) { _green_zone = x; } 2.54 + void set_yellow_zone(int x) { _yellow_zone = x; } 2.55 + void set_red_zone(int x) { _red_zone = x; } 2.56 + 2.57 + int green_zone() const { return _green_zone; } 2.58 + int yellow_zone() const { return _yellow_zone; } 2.59 + int red_zone() const { return _red_zone; } 2.60 + 2.61 + int total_thread_num() const { return _n_threads; } 2.62 + int worker_thread_num() const { return _n_worker_threads; } 2.63 + 2.64 + int thread_threshold_step() const { return _thread_threshold_step; } 2.65 };
3.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Thu Dec 17 07:02:39 2009 -0800 3.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.cpp Tue Dec 22 22:35:08 2009 -0800 3.3 @@ -25,10 +25,6 @@ 3.4 #include "incls/_precompiled.incl" 3.5 #include "incls/_concurrentG1RefineThread.cpp.incl" 3.6 3.7 -// ======= Concurrent Mark Thread ======== 3.8 - 3.9 -// The CM thread is created when the G1 garbage collector is used 3.10 - 3.11 ConcurrentG1RefineThread:: 3.12 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next, 3.13 int worker_id_offset, int worker_id) : 3.14 @@ -37,19 +33,42 @@ 3.15 _worker_id(worker_id), 3.16 _active(false), 3.17 _next(next), 3.18 + _monitor(NULL), 3.19 _cg1r(cg1r), 3.20 - _vtime_accum(0.0), 3.21 - _interval_ms(5.0) 3.22 + _vtime_accum(0.0) 3.23 { 3.24 + 3.25 + // Each thread has its own monitor. The i-th thread is responsible for signalling 3.26 + // to thread i+1 if the number of buffers in the queue exceeds a threashold for this 3.27 + // thread. Monitors are also used to wake up the threads during termination. 3.28 + // The 0th worker in notified by mutator threads and has a special monitor. 3.29 + // The last worker is used for young gen rset size sampling. 3.30 + if (worker_id > 0) { 3.31 + _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true); 3.32 + } else { 3.33 + _monitor = DirtyCardQ_CBL_mon; 3.34 + } 3.35 + initialize(); 3.36 create_and_start(); 3.37 } 3.38 3.39 +void ConcurrentG1RefineThread::initialize() { 3.40 + if (_worker_id < cg1r()->worker_thread_num()) { 3.41 + // Current thread activation threshold 3.42 + _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(), 3.43 + cg1r()->yellow_zone()); 3.44 + // A thread deactivates once the number of buffer reached a deactivation threshold 3.45 + _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone()); 3.46 + } else { 3.47 + set_active(true); 3.48 + } 3.49 +} 3.50 + 3.51 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() { 3.52 G1CollectedHeap* g1h = G1CollectedHeap::heap(); 3.53 G1CollectorPolicy* g1p = g1h->g1_policy(); 3.54 if (g1p->adaptive_young_list_length()) { 3.55 int regions_visited = 0; 3.56 - 3.57 g1h->young_list_rs_length_sampling_init(); 3.58 while (g1h->young_list_rs_length_sampling_more()) { 3.59 g1h->young_list_rs_length_sampling_next(); 3.60 @@ -70,99 +89,121 @@ 3.61 } 3.62 } 3.63 3.64 +void ConcurrentG1RefineThread::run_young_rs_sampling() { 3.65 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.66 + _vtime_start = os::elapsedVTime(); 3.67 + while(!_should_terminate) { 3.68 + _sts.join(); 3.69 + sample_young_list_rs_lengths(); 3.70 + _sts.leave(); 3.71 + 3.72 + if (os::supports_vtime()) { 3.73 + _vtime_accum = (os::elapsedVTime() - _vtime_start); 3.74 + } else { 3.75 + _vtime_accum = 0.0; 3.76 + } 3.77 + 3.78 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 3.79 + if (_should_terminate) { 3.80 + break; 3.81 + } 3.82 + _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefineServiceInterval); 3.83 + } 3.84 +} 3.85 + 3.86 +void ConcurrentG1RefineThread::wait_for_completed_buffers() { 3.87 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.88 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 3.89 + while (!_should_terminate && !is_active()) { 3.90 + _monitor->wait(Mutex::_no_safepoint_check_flag); 3.91 + } 3.92 +} 3.93 + 3.94 +bool ConcurrentG1RefineThread::is_active() { 3.95 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.96 + return _worker_id > 0 ? _active : dcqs.process_completed_buffers(); 3.97 +} 3.98 + 3.99 +void ConcurrentG1RefineThread::activate() { 3.100 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 3.101 + if (_worker_id > 0) { 3.102 + if (G1TraceConcurrentRefinement) { 3.103 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.104 + gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d", 3.105 + _worker_id, _threshold, (int)dcqs.completed_buffers_num()); 3.106 + } 3.107 + set_active(true); 3.108 + } else { 3.109 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.110 + dcqs.set_process_completed(true); 3.111 + } 3.112 + _monitor->notify(); 3.113 +} 3.114 + 3.115 +void ConcurrentG1RefineThread::deactivate() { 3.116 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 3.117 + if (_worker_id > 0) { 3.118 + if (G1TraceConcurrentRefinement) { 3.119 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.120 + gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d", 3.121 + _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num()); 3.122 + } 3.123 + set_active(false); 3.124 + } else { 3.125 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.126 + dcqs.set_process_completed(false); 3.127 + } 3.128 +} 3.129 + 3.130 void ConcurrentG1RefineThread::run() { 3.131 initialize_in_thread(); 3.132 - _vtime_start = os::elapsedVTime(); 3.133 wait_for_universe_init(); 3.134 3.135 + if (_worker_id >= cg1r()->worker_thread_num()) { 3.136 + run_young_rs_sampling(); 3.137 + terminate(); 3.138 + } 3.139 + 3.140 + _vtime_start = os::elapsedVTime(); 3.141 while (!_should_terminate) { 3.142 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 3.143 - // Wait for completed log buffers to exist. 3.144 - { 3.145 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 3.146 - while (((_worker_id == 0 && !dcqs.process_completed_buffers()) || 3.147 - (_worker_id > 0 && !is_active())) && 3.148 - !_should_terminate) { 3.149 - DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag); 3.150 - } 3.151 + 3.152 + // Wait for work 3.153 + wait_for_completed_buffers(); 3.154 + 3.155 + if (_should_terminate) { 3.156 + break; 3.157 } 3.158 3.159 - if (_should_terminate) { 3.160 - return; 3.161 - } 3.162 + _sts.join(); 3.163 3.164 - // Now we take them off (this doesn't hold locks while it applies 3.165 - // closures.) (If we did a full collection, then we'll do a full 3.166 - // traversal. 3.167 - _sts.join(); 3.168 - int n_logs = 0; 3.169 - int lower_limit = 0; 3.170 - double start_vtime_sec; // only used when G1SmoothConcRefine is on 3.171 - int prev_buffer_num; // only used when G1SmoothConcRefine is on 3.172 - // This thread activation threshold 3.173 - int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id; 3.174 - // Next thread activation threshold 3.175 - int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold; 3.176 - int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0); 3.177 + do { 3.178 + int curr_buffer_num = (int)dcqs.completed_buffers_num(); 3.179 + // If the number of the buffers falls down into the yellow zone, 3.180 + // that means that the transition period after the evacuation pause has ended. 3.181 + if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) { 3.182 + dcqs.set_completed_queue_padding(0); 3.183 + } 3.184 3.185 - if (G1SmoothConcRefine) { 3.186 - lower_limit = 0; 3.187 - start_vtime_sec = os::elapsedVTime(); 3.188 - prev_buffer_num = (int) dcqs.completed_buffers_num(); 3.189 - } else { 3.190 - lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now. 3.191 - } 3.192 - while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) { 3.193 - double end_vtime_sec; 3.194 - double elapsed_vtime_sec; 3.195 - int elapsed_vtime_ms; 3.196 - int curr_buffer_num = (int) dcqs.completed_buffers_num(); 3.197 - 3.198 - if (G1SmoothConcRefine) { 3.199 - end_vtime_sec = os::elapsedVTime(); 3.200 - elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 3.201 - elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0); 3.202 - 3.203 - if (curr_buffer_num > prev_buffer_num || 3.204 - curr_buffer_num > next_threshold) { 3.205 - decreaseInterval(elapsed_vtime_ms); 3.206 - } else if (curr_buffer_num < prev_buffer_num) { 3.207 - increaseInterval(elapsed_vtime_ms); 3.208 - } 3.209 - } 3.210 - if (_worker_id == 0) { 3.211 - sample_young_list_rs_lengths(); 3.212 - } else if (curr_buffer_num < deactivation_threshold) { 3.213 + if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) { 3.214 // If the number of the buffer has fallen below our threshold 3.215 // we should deactivate. The predecessor will reactivate this 3.216 // thread should the number of the buffers cross the threshold again. 3.217 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 3.218 deactivate(); 3.219 - if (G1TraceConcurrentRefinement) { 3.220 - gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id); 3.221 - } 3.222 break; 3.223 } 3.224 3.225 // Check if we need to activate the next thread. 3.226 - if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) { 3.227 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 3.228 + if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) { 3.229 _next->activate(); 3.230 - DirtyCardQ_CBL_mon->notify_all(); 3.231 - if (G1TraceConcurrentRefinement) { 3.232 - gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id); 3.233 - } 3.234 } 3.235 + } while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, cg1r()->green_zone())); 3.236 3.237 - if (G1SmoothConcRefine) { 3.238 - prev_buffer_num = curr_buffer_num; 3.239 - _sts.leave(); 3.240 - os::sleep(Thread::current(), (jlong) _interval_ms, false); 3.241 - _sts.join(); 3.242 - start_vtime_sec = os::elapsedVTime(); 3.243 - } 3.244 - n_logs++; 3.245 + // We can exit the loop above while being active if there was a yield request. 3.246 + if (is_active()) { 3.247 + deactivate(); 3.248 } 3.249 + 3.250 _sts.leave(); 3.251 3.252 if (os::supports_vtime()) { 3.253 @@ -172,7 +213,6 @@ 3.254 } 3.255 } 3.256 assert(_should_terminate, "just checking"); 3.257 - 3.258 terminate(); 3.259 } 3.260 3.261 @@ -191,8 +231,8 @@ 3.262 } 3.263 3.264 { 3.265 - MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag); 3.266 - DirtyCardQ_CBL_mon->notify_all(); 3.267 + MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag); 3.268 + _monitor->notify(); 3.269 } 3.270 3.271 {
4.1 --- a/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Thu Dec 17 07:02:39 2009 -0800 4.2 +++ b/src/share/vm/gc_implementation/g1/concurrentG1RefineThread.hpp Tue Dec 22 22:35:08 2009 -0800 4.3 @@ -40,42 +40,36 @@ 4.4 // when the number of the rset update buffer crosses a certain threshold. A successor 4.5 // would self-deactivate when the number of the buffers falls below the threshold. 4.6 bool _active; 4.7 - ConcurrentG1RefineThread * _next; 4.8 - public: 4.9 - virtual void run(); 4.10 + ConcurrentG1RefineThread* _next; 4.11 + Monitor* _monitor; 4.12 + ConcurrentG1Refine* _cg1r; 4.13 4.14 - bool is_active() { return _active; } 4.15 - void activate() { _active = true; } 4.16 - void deactivate() { _active = false; } 4.17 + int _thread_threshold_step; 4.18 + // This thread activation threshold 4.19 + int _threshold; 4.20 + // This thread deactivation threshold 4.21 + int _deactivation_threshold; 4.22 4.23 - private: 4.24 - ConcurrentG1Refine* _cg1r; 4.25 + void sample_young_list_rs_lengths(); 4.26 + void run_young_rs_sampling(); 4.27 + void wait_for_completed_buffers(); 4.28 4.29 - double _interval_ms; 4.30 - 4.31 - void decreaseInterval(int processing_time_ms) { 4.32 - double min_interval_ms = (double) processing_time_ms; 4.33 - _interval_ms = 0.8 * _interval_ms; 4.34 - if (_interval_ms < min_interval_ms) 4.35 - _interval_ms = min_interval_ms; 4.36 - } 4.37 - void increaseInterval(int processing_time_ms) { 4.38 - double max_interval_ms = 9.0 * (double) processing_time_ms; 4.39 - _interval_ms = 1.1 * _interval_ms; 4.40 - if (max_interval_ms > 0 && _interval_ms > max_interval_ms) 4.41 - _interval_ms = max_interval_ms; 4.42 - } 4.43 - 4.44 - void sleepBeforeNextCycle(); 4.45 + void set_active(bool x) { _active = x; } 4.46 + bool is_active(); 4.47 + void activate(); 4.48 + void deactivate(); 4.49 4.50 // For use by G1CollectedHeap, which is a friend. 4.51 static SuspendibleThreadSet* sts() { return &_sts; } 4.52 4.53 - public: 4.54 +public: 4.55 + virtual void run(); 4.56 // Constructor 4.57 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread* next, 4.58 int worker_id_offset, int worker_id); 4.59 4.60 + void initialize(); 4.61 + 4.62 // Printing 4.63 void print() const; 4.64 void print_on(outputStream* st) const; 4.65 @@ -83,13 +77,10 @@ 4.66 // Total virtual time so far. 4.67 double vtime_accum() { return _vtime_accum; } 4.68 4.69 - ConcurrentG1Refine* cg1r() { return _cg1r; } 4.70 - 4.71 - void sample_young_list_rs_lengths(); 4.72 + ConcurrentG1Refine* cg1r() { return _cg1r; } 4.73 4.74 // Yield for GC 4.75 - void yield(); 4.76 - 4.77 + void yield(); 4.78 // shutdown 4.79 void stop(); 4.80 };
5.1 --- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Dec 17 07:02:39 2009 -0800 5.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Tue Dec 22 22:35:08 2009 -0800 5.3 @@ -760,7 +760,6 @@ 5.4 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle 5.5 5.6 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 5.7 - satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); 5.8 satb_mq_set.set_active_all_threads(true); 5.9 5.10 // update_g1_committed() will be called at the end of an evac pause
6.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Thu Dec 17 07:02:39 2009 -0800 6.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.cpp Tue Dec 22 22:35:08 2009 -0800 6.3 @@ -61,8 +61,8 @@ 6.4 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list 6.5 #endif // _MSC_VER 6.6 6.7 -DirtyCardQueueSet::DirtyCardQueueSet() : 6.8 - PtrQueueSet(true /*notify_when_complete*/), 6.9 +DirtyCardQueueSet::DirtyCardQueueSet(bool notify_when_complete) : 6.10 + PtrQueueSet(notify_when_complete), 6.11 _closure(NULL), 6.12 _shared_dirty_card_queue(this, true /*perm*/), 6.13 _free_ids(NULL), 6.14 @@ -77,12 +77,12 @@ 6.15 } 6.16 6.17 void DirtyCardQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, 6.18 + int process_completed_threshold, 6.19 int max_completed_queue, 6.20 Mutex* lock, PtrQueueSet* fl_owner) { 6.21 - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue, fl_owner); 6.22 + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, 6.23 + max_completed_queue, fl_owner); 6.24 set_buffer_size(G1UpdateBufferSize); 6.25 - set_process_completed_threshold(G1UpdateBufferQueueProcessingThreshold); 6.26 - 6.27 _shared_dirty_card_queue.set_lock(lock); 6.28 _free_ids = new FreeIdSet((int) num_par_ids(), _cbl_mon); 6.29 } 6.30 @@ -154,9 +154,10 @@ 6.31 return b; 6.32 } 6.33 6.34 -DirtyCardQueueSet::CompletedBufferNode* 6.35 + 6.36 +BufferNode* 6.37 DirtyCardQueueSet::get_completed_buffer(int stop_at) { 6.38 - CompletedBufferNode* nd = NULL; 6.39 + BufferNode* nd = NULL; 6.40 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 6.41 6.42 if ((int)_n_completed_buffers <= stop_at) { 6.43 @@ -166,10 +167,11 @@ 6.44 6.45 if (_completed_buffers_head != NULL) { 6.46 nd = _completed_buffers_head; 6.47 - _completed_buffers_head = nd->next; 6.48 + _completed_buffers_head = nd->next(); 6.49 if (_completed_buffers_head == NULL) 6.50 _completed_buffers_tail = NULL; 6.51 _n_completed_buffers--; 6.52 + assert(_n_completed_buffers >= 0, "Invariant"); 6.53 } 6.54 debug_only(assert_completed_buffer_list_len_correct_locked()); 6.55 return nd; 6.56 @@ -177,20 +179,19 @@ 6.57 6.58 bool DirtyCardQueueSet:: 6.59 apply_closure_to_completed_buffer_helper(int worker_i, 6.60 - CompletedBufferNode* nd) { 6.61 + BufferNode* nd) { 6.62 if (nd != NULL) { 6.63 + void **buf = BufferNode::make_buffer_from_node(nd); 6.64 + size_t index = nd->index(); 6.65 bool b = 6.66 - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 6.67 - nd->index, _sz, 6.68 + DirtyCardQueue::apply_closure_to_buffer(_closure, buf, 6.69 + index, _sz, 6.70 true, worker_i); 6.71 - void** buf = nd->buf; 6.72 - size_t index = nd->index; 6.73 - delete nd; 6.74 if (b) { 6.75 deallocate_buffer(buf); 6.76 return true; // In normal case, go on to next buffer. 6.77 } else { 6.78 - enqueue_complete_buffer(buf, index, true); 6.79 + enqueue_complete_buffer(buf, index); 6.80 return false; 6.81 } 6.82 } else { 6.83 @@ -203,32 +204,33 @@ 6.84 bool during_pause) 6.85 { 6.86 assert(!during_pause || stop_at == 0, "Should not leave any completed buffers during a pause"); 6.87 - CompletedBufferNode* nd = get_completed_buffer(stop_at); 6.88 + BufferNode* nd = get_completed_buffer(stop_at); 6.89 bool res = apply_closure_to_completed_buffer_helper(worker_i, nd); 6.90 if (res) Atomic::inc(&_processed_buffers_rs_thread); 6.91 return res; 6.92 } 6.93 6.94 void DirtyCardQueueSet::apply_closure_to_all_completed_buffers() { 6.95 - CompletedBufferNode* nd = _completed_buffers_head; 6.96 + BufferNode* nd = _completed_buffers_head; 6.97 while (nd != NULL) { 6.98 bool b = 6.99 - DirtyCardQueue::apply_closure_to_buffer(_closure, nd->buf, 0, _sz, 6.100 - false); 6.101 + DirtyCardQueue::apply_closure_to_buffer(_closure, 6.102 + BufferNode::make_buffer_from_node(nd), 6.103 + 0, _sz, false); 6.104 guarantee(b, "Should not stop early."); 6.105 - nd = nd->next; 6.106 + nd = nd->next(); 6.107 } 6.108 } 6.109 6.110 void DirtyCardQueueSet::abandon_logs() { 6.111 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 6.112 - CompletedBufferNode* buffers_to_delete = NULL; 6.113 + BufferNode* buffers_to_delete = NULL; 6.114 { 6.115 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 6.116 while (_completed_buffers_head != NULL) { 6.117 - CompletedBufferNode* nd = _completed_buffers_head; 6.118 - _completed_buffers_head = nd->next; 6.119 - nd->next = buffers_to_delete; 6.120 + BufferNode* nd = _completed_buffers_head; 6.121 + _completed_buffers_head = nd->next(); 6.122 + nd->set_next(buffers_to_delete); 6.123 buffers_to_delete = nd; 6.124 } 6.125 _n_completed_buffers = 0; 6.126 @@ -236,10 +238,9 @@ 6.127 debug_only(assert_completed_buffer_list_len_correct_locked()); 6.128 } 6.129 while (buffers_to_delete != NULL) { 6.130 - CompletedBufferNode* nd = buffers_to_delete; 6.131 - buffers_to_delete = nd->next; 6.132 - deallocate_buffer(nd->buf); 6.133 - delete nd; 6.134 + BufferNode* nd = buffers_to_delete; 6.135 + buffers_to_delete = nd->next(); 6.136 + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); 6.137 } 6.138 // Since abandon is done only at safepoints, we can safely manipulate 6.139 // these queues.
7.1 --- a/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Thu Dec 17 07:02:39 2009 -0800 7.2 +++ b/src/share/vm/gc_implementation/g1/dirtyCardQueue.hpp Tue Dec 22 22:35:08 2009 -0800 7.3 @@ -84,11 +84,12 @@ 7.4 jint _processed_buffers_rs_thread; 7.5 7.6 public: 7.7 - DirtyCardQueueSet(); 7.8 + DirtyCardQueueSet(bool notify_when_complete = true); 7.9 7.10 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 7.11 - int max_completed_queue = 0, 7.12 - Mutex* lock = NULL, PtrQueueSet* fl_owner = NULL); 7.13 + int process_completed_threshold, 7.14 + int max_completed_queue, 7.15 + Mutex* lock, PtrQueueSet* fl_owner = NULL); 7.16 7.17 // The number of parallel ids that can be claimed to allow collector or 7.18 // mutator threads to do card-processing work. 7.19 @@ -123,9 +124,9 @@ 7.20 bool during_pause = false); 7.21 7.22 bool apply_closure_to_completed_buffer_helper(int worker_i, 7.23 - CompletedBufferNode* nd); 7.24 + BufferNode* nd); 7.25 7.26 - CompletedBufferNode* get_completed_buffer(int stop_at); 7.27 + BufferNode* get_completed_buffer(int stop_at); 7.28 7.29 // Applies the current closure to all completed buffers, 7.30 // non-consumptively.
8.1 --- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Thu Dec 17 07:02:39 2009 -0800 8.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp Tue Dec 22 22:35:08 2009 -0800 8.3 @@ -1375,6 +1375,7 @@ 8.4 G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : 8.5 SharedHeap(policy_), 8.6 _g1_policy(policy_), 8.7 + _dirty_card_queue_set(false), 8.8 _ref_processor(NULL), 8.9 _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)), 8.10 _bot_shared(NULL), 8.11 @@ -1460,8 +1461,6 @@ 8.12 Universe::check_alignment(init_byte_size, HeapRegion::GrainBytes, "g1 heap"); 8.13 Universe::check_alignment(max_byte_size, HeapRegion::GrainBytes, "g1 heap"); 8.14 8.15 - // We allocate this in any case, but only do no work if the command line 8.16 - // param is off. 8.17 _cg1r = new ConcurrentG1Refine(); 8.18 8.19 // Reserve the maximum. 8.20 @@ -1594,18 +1593,20 @@ 8.21 8.22 JavaThread::satb_mark_queue_set().initialize(SATB_Q_CBL_mon, 8.23 SATB_Q_FL_lock, 8.24 - 0, 8.25 + G1SATBProcessCompletedThreshold, 8.26 Shared_SATB_Q_lock); 8.27 8.28 JavaThread::dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 8.29 DirtyCardQ_FL_lock, 8.30 - G1UpdateBufferQueueMaxLength, 8.31 + concurrent_g1_refine()->yellow_zone(), 8.32 + concurrent_g1_refine()->red_zone(), 8.33 Shared_DirtyCardQ_lock); 8.34 8.35 if (G1DeferredRSUpdate) { 8.36 dirty_card_queue_set().initialize(DirtyCardQ_CBL_mon, 8.37 DirtyCardQ_FL_lock, 8.38 - 0, 8.39 + -1, // never trigger processing 8.40 + -1, // no limit on length 8.41 Shared_DirtyCardQ_lock, 8.42 &JavaThread::dirty_card_queue_set()); 8.43 } 8.44 @@ -4239,10 +4240,11 @@ 8.45 RedirtyLoggedCardTableEntryFastClosure redirty; 8.46 dirty_card_queue_set().set_closure(&redirty); 8.47 dirty_card_queue_set().apply_closure_to_all_completed_buffers(); 8.48 - JavaThread::dirty_card_queue_set().merge_bufferlists(&dirty_card_queue_set()); 8.49 + 8.50 + DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set(); 8.51 + dcq.merge_bufferlists(&dirty_card_queue_set()); 8.52 assert(dirty_card_queue_set().completed_buffers_num() == 0, "All should be consumed"); 8.53 } 8.54 - 8.55 COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); 8.56 } 8.57
9.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Thu Dec 17 07:02:39 2009 -0800 9.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp Tue Dec 22 22:35:08 2009 -0800 9.3 @@ -1914,6 +1914,10 @@ 9.4 calculate_young_list_min_length(); 9.5 calculate_young_list_target_config(); 9.6 9.7 + // Note that _mmu_tracker->max_gc_time() returns the time in seconds. 9.8 + double update_rs_time_goal_ms = _mmu_tracker->max_gc_time() * MILLIUNITS * G1RSUpdatePauseFractionPercent / 100.0; 9.9 + adjust_concurrent_refinement(update_rs_time, update_rs_processed_buffers, update_rs_time_goal_ms); 9.10 + 9.11 // </NEW PREDICTION> 9.12 9.13 _target_pause_time_ms = -1.0; 9.14 @@ -1921,6 +1925,47 @@ 9.15 9.16 // <NEW PREDICTION> 9.17 9.18 +void G1CollectorPolicy::adjust_concurrent_refinement(double update_rs_time, 9.19 + double update_rs_processed_buffers, 9.20 + double goal_ms) { 9.21 + DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set(); 9.22 + ConcurrentG1Refine *cg1r = G1CollectedHeap::heap()->concurrent_g1_refine(); 9.23 + 9.24 + if (G1AdaptiveConcRefine) { 9.25 + const int k_gy = 3, k_gr = 6; 9.26 + const double inc_k = 1.1, dec_k = 0.9; 9.27 + 9.28 + int g = cg1r->green_zone(); 9.29 + if (update_rs_time > goal_ms) { 9.30 + g = (int)(g * dec_k); // Can become 0, that's OK. That would mean a mutator-only processing. 9.31 + } else { 9.32 + if (update_rs_time < goal_ms && update_rs_processed_buffers > g) { 9.33 + g = (int)MAX2(g * inc_k, g + 1.0); 9.34 + } 9.35 + } 9.36 + // Change the refinement threads params 9.37 + cg1r->set_green_zone(g); 9.38 + cg1r->set_yellow_zone(g * k_gy); 9.39 + cg1r->set_red_zone(g * k_gr); 9.40 + cg1r->reinitialize_threads(); 9.41 + 9.42 + int processing_threshold_delta = MAX2((int)(cg1r->green_zone() * sigma()), 1); 9.43 + int processing_threshold = MIN2(cg1r->green_zone() + processing_threshold_delta, 9.44 + cg1r->yellow_zone()); 9.45 + // Change the barrier params 9.46 + dcqs.set_process_completed_threshold(processing_threshold); 9.47 + dcqs.set_max_completed_queue(cg1r->red_zone()); 9.48 + } 9.49 + 9.50 + int curr_queue_size = dcqs.completed_buffers_num(); 9.51 + if (curr_queue_size >= cg1r->yellow_zone()) { 9.52 + dcqs.set_completed_queue_padding(curr_queue_size); 9.53 + } else { 9.54 + dcqs.set_completed_queue_padding(0); 9.55 + } 9.56 + dcqs.notify_if_necessary(); 9.57 +} 9.58 + 9.59 double 9.60 G1CollectorPolicy:: 9.61 predict_young_collection_elapsed_time_ms(size_t adjustment) {
10.1 --- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Thu Dec 17 07:02:39 2009 -0800 10.2 +++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp Tue Dec 22 22:35:08 2009 -0800 10.3 @@ -316,6 +316,10 @@ 10.4 bool verify_young_ages(HeapRegion* head, SurvRateGroup *surv_rate_group); 10.5 #endif // PRODUCT 10.6 10.7 + void adjust_concurrent_refinement(double update_rs_time, 10.8 + double update_rs_processed_buffers, 10.9 + double goal_ms); 10.10 + 10.11 protected: 10.12 double _pause_time_target_ms; 10.13 double _recorded_young_cset_choice_time_ms;
11.1 --- a/src/share/vm/gc_implementation/g1/g1_globals.hpp Thu Dec 17 07:02:39 2009 -0800 11.2 +++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp Tue Dec 22 22:35:08 2009 -0800 11.3 @@ -85,7 +85,7 @@ 11.4 diagnostic(bool, G1SummarizeZFStats, false, \ 11.5 "Summarize zero-filling info") \ 11.6 \ 11.7 - develop(bool, G1TraceConcurrentRefinement, false, \ 11.8 + diagnostic(bool, G1TraceConcurrentRefinement, false, \ 11.9 "Trace G1 concurrent refinement") \ 11.10 \ 11.11 product(intx, G1MarkStackSize, 2 * 1024 * 1024, \ 11.12 @@ -94,19 +94,6 @@ 11.13 product(intx, G1MarkRegionStackSize, 1024 * 1024, \ 11.14 "Size of the region stack for concurrent marking.") \ 11.15 \ 11.16 - develop(bool, G1ConcRefine, true, \ 11.17 - "If true, run concurrent rem set refinement for G1") \ 11.18 - \ 11.19 - develop(intx, G1ConcRefineTargTraversals, 4, \ 11.20 - "Number of concurrent refinement we try to achieve") \ 11.21 - \ 11.22 - develop(intx, G1ConcRefineInitialDelta, 4, \ 11.23 - "Number of heap regions of alloc ahead of starting collection " \ 11.24 - "pause to start concurrent refinement (initially)") \ 11.25 - \ 11.26 - develop(bool, G1SmoothConcRefine, true, \ 11.27 - "Attempts to smooth out the overhead of concurrent refinement") \ 11.28 - \ 11.29 develop(bool, G1ConcZeroFill, true, \ 11.30 "If true, run concurrent zero-filling thread") \ 11.31 \ 11.32 @@ -178,13 +165,38 @@ 11.33 product(intx, G1UpdateBufferSize, 256, \ 11.34 "Size of an update buffer") \ 11.35 \ 11.36 - product(intx, G1UpdateBufferQueueProcessingThreshold, 5, \ 11.37 + product(intx, G1ConcRefineYellowZone, 0, \ 11.38 "Number of enqueued update buffers that will " \ 11.39 - "trigger concurrent processing") \ 11.40 + "trigger concurrent processing. Will be selected ergonomically " \ 11.41 + "by default.") \ 11.42 \ 11.43 - product(intx, G1UpdateBufferQueueMaxLength, 30, \ 11.44 + product(intx, G1ConcRefineRedZone, 0, \ 11.45 "Maximum number of enqueued update buffers before mutator " \ 11.46 - "threads start processing new ones instead of enqueueing them") \ 11.47 + "threads start processing new ones instead of enqueueing them. " \ 11.48 + "Will be selected ergonomically by default. Zero will disable " \ 11.49 + "concurrent processing.") \ 11.50 + \ 11.51 + product(intx, G1ConcRefineGreenZone, 0, \ 11.52 + "The number of update buffers that are left in the queue by the " \ 11.53 + "concurrent processing threads. Will be selected ergonomically " \ 11.54 + "by default.") \ 11.55 + \ 11.56 + product(intx, G1ConcRefineServiceInterval, 300, \ 11.57 + "The last concurrent refinement thread wakes up every " \ 11.58 + "specified number of milliseconds to do miscellaneous work.") \ 11.59 + \ 11.60 + product(intx, G1ConcRefineThresholdStep, 0, \ 11.61 + "Each time the rset update queue increases by this amount " \ 11.62 + "activate the next refinement thread if available. " \ 11.63 + "Will be selected ergonomically by default.") \ 11.64 + \ 11.65 + product(intx, G1RSUpdatePauseFractionPercent, 10, \ 11.66 + "A target percentage of time that is allowed to be spend on " \ 11.67 + "process RS update buffers during the collection pause.") \ 11.68 + \ 11.69 + product(bool, G1AdaptiveConcRefine, true, \ 11.70 + "Select green, yellow and red zones adaptively to meet the " \ 11.71 + "the pause requirements.") \ 11.72 \ 11.73 develop(intx, G1ConcRSLogCacheSize, 10, \ 11.74 "Log base 2 of the length of conc RS hot-card cache.") \
12.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.cpp Thu Dec 17 07:02:39 2009 -0800 12.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.cpp Tue Dec 22 22:35:08 2009 -0800 12.3 @@ -64,8 +64,8 @@ 12.4 while (_index == 0) { 12.5 handle_zero_index(); 12.6 } 12.7 + 12.8 assert(_index > 0, "postcondition"); 12.9 - 12.10 _index -= oopSize; 12.11 _buf[byte_index_to_index((int)_index)] = ptr; 12.12 assert(0 <= _index && _index <= _sz, "Invariant."); 12.13 @@ -99,95 +99,110 @@ 12.14 assert(_sz > 0, "Didn't set a buffer size."); 12.15 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); 12.16 if (_fl_owner->_buf_free_list != NULL) { 12.17 - void** res = _fl_owner->_buf_free_list; 12.18 - _fl_owner->_buf_free_list = (void**)_fl_owner->_buf_free_list[0]; 12.19 + void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list); 12.20 + _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next(); 12.21 _fl_owner->_buf_free_list_sz--; 12.22 - // Just override the next pointer with NULL, just in case we scan this part 12.23 - // of the buffer. 12.24 - res[0] = NULL; 12.25 return res; 12.26 } else { 12.27 - return (void**) NEW_C_HEAP_ARRAY(char, _sz); 12.28 + // Allocate space for the BufferNode in front of the buffer. 12.29 + char *b = NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size()); 12.30 + return BufferNode::make_buffer_from_block(b); 12.31 } 12.32 } 12.33 12.34 void PtrQueueSet::deallocate_buffer(void** buf) { 12.35 assert(_sz > 0, "Didn't set a buffer size."); 12.36 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag); 12.37 - buf[0] = (void*)_fl_owner->_buf_free_list; 12.38 - _fl_owner->_buf_free_list = buf; 12.39 + BufferNode *node = BufferNode::make_node_from_buffer(buf); 12.40 + node->set_next(_fl_owner->_buf_free_list); 12.41 + _fl_owner->_buf_free_list = node; 12.42 _fl_owner->_buf_free_list_sz++; 12.43 } 12.44 12.45 void PtrQueueSet::reduce_free_list() { 12.46 + assert(_fl_owner == this, "Free list reduction is allowed only for the owner"); 12.47 // For now we'll adopt the strategy of deleting half. 12.48 MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); 12.49 size_t n = _buf_free_list_sz / 2; 12.50 while (n > 0) { 12.51 assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong."); 12.52 - void** head = _buf_free_list; 12.53 - _buf_free_list = (void**)_buf_free_list[0]; 12.54 - FREE_C_HEAP_ARRAY(char, head); 12.55 + void* b = BufferNode::make_block_from_node(_buf_free_list); 12.56 + _buf_free_list = _buf_free_list->next(); 12.57 + FREE_C_HEAP_ARRAY(char, b); 12.58 _buf_free_list_sz --; 12.59 n--; 12.60 } 12.61 } 12.62 12.63 -void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index, bool ignore_max_completed) { 12.64 - // I use explicit locking here because there's a bailout in the middle. 12.65 - _cbl_mon->lock_without_safepoint_check(); 12.66 +void PtrQueue::handle_zero_index() { 12.67 + assert(0 == _index, "Precondition."); 12.68 + // This thread records the full buffer and allocates a new one (while 12.69 + // holding the lock if there is one). 12.70 + if (_buf != NULL) { 12.71 + if (_lock) { 12.72 + locking_enqueue_completed_buffer(_buf); 12.73 + } else { 12.74 + if (qset()->process_or_enqueue_complete_buffer(_buf)) { 12.75 + // Recycle the buffer. No allocation. 12.76 + _sz = qset()->buffer_size(); 12.77 + _index = _sz; 12.78 + return; 12.79 + } 12.80 + } 12.81 + } 12.82 + // Reallocate the buffer 12.83 + _buf = qset()->allocate_buffer(); 12.84 + _sz = qset()->buffer_size(); 12.85 + _index = _sz; 12.86 + assert(0 <= _index && _index <= _sz, "Invariant."); 12.87 +} 12.88 12.89 - Thread* thread = Thread::current(); 12.90 - assert( ignore_max_completed || 12.91 - thread->is_Java_thread() || 12.92 - SafepointSynchronize::is_at_safepoint(), 12.93 - "invariant" ); 12.94 - ignore_max_completed = ignore_max_completed || !thread->is_Java_thread(); 12.95 +bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) { 12.96 + if (Thread::current()->is_Java_thread()) { 12.97 + // We don't lock. It is fine to be epsilon-precise here. 12.98 + if (_max_completed_queue == 0 || _max_completed_queue > 0 && 12.99 + _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) { 12.100 + bool b = mut_process_buffer(buf); 12.101 + if (b) { 12.102 + // True here means that the buffer hasn't been deallocated and the caller may reuse it. 12.103 + return true; 12.104 + } 12.105 + } 12.106 + } 12.107 + // The buffer will be enqueued. The caller will have to get a new one. 12.108 + enqueue_complete_buffer(buf); 12.109 + return false; 12.110 +} 12.111 12.112 - if (!ignore_max_completed && _max_completed_queue > 0 && 12.113 - _n_completed_buffers >= (size_t) _max_completed_queue) { 12.114 - _cbl_mon->unlock(); 12.115 - bool b = mut_process_buffer(buf); 12.116 - if (b) { 12.117 - deallocate_buffer(buf); 12.118 - return; 12.119 - } 12.120 - 12.121 - // Otherwise, go ahead and enqueue the buffer. Must reaquire the lock. 12.122 - _cbl_mon->lock_without_safepoint_check(); 12.123 - } 12.124 - 12.125 - // Here we still hold the _cbl_mon. 12.126 - CompletedBufferNode* cbn = new CompletedBufferNode; 12.127 - cbn->buf = buf; 12.128 - cbn->next = NULL; 12.129 - cbn->index = index; 12.130 +void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) { 12.131 + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 12.132 + BufferNode* cbn = BufferNode::new_from_buffer(buf); 12.133 + cbn->set_index(index); 12.134 if (_completed_buffers_tail == NULL) { 12.135 assert(_completed_buffers_head == NULL, "Well-formedness"); 12.136 _completed_buffers_head = cbn; 12.137 _completed_buffers_tail = cbn; 12.138 } else { 12.139 - _completed_buffers_tail->next = cbn; 12.140 + _completed_buffers_tail->set_next(cbn); 12.141 _completed_buffers_tail = cbn; 12.142 } 12.143 _n_completed_buffers++; 12.144 12.145 - if (!_process_completed && 12.146 + if (!_process_completed && _process_completed_threshold >= 0 && 12.147 _n_completed_buffers >= _process_completed_threshold) { 12.148 _process_completed = true; 12.149 if (_notify_when_complete) 12.150 - _cbl_mon->notify_all(); 12.151 + _cbl_mon->notify(); 12.152 } 12.153 debug_only(assert_completed_buffer_list_len_correct_locked()); 12.154 - _cbl_mon->unlock(); 12.155 } 12.156 12.157 int PtrQueueSet::completed_buffers_list_length() { 12.158 int n = 0; 12.159 - CompletedBufferNode* cbn = _completed_buffers_head; 12.160 + BufferNode* cbn = _completed_buffers_head; 12.161 while (cbn != NULL) { 12.162 n++; 12.163 - cbn = cbn->next; 12.164 + cbn = cbn->next(); 12.165 } 12.166 return n; 12.167 } 12.168 @@ -198,7 +213,7 @@ 12.169 } 12.170 12.171 void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() { 12.172 - guarantee((size_t)completed_buffers_list_length() == _n_completed_buffers, 12.173 + guarantee(completed_buffers_list_length() == _n_completed_buffers, 12.174 "Completed buffer length is wrong."); 12.175 } 12.176 12.177 @@ -207,12 +222,8 @@ 12.178 _sz = sz * oopSize; 12.179 } 12.180 12.181 -void PtrQueueSet::set_process_completed_threshold(size_t sz) { 12.182 - _process_completed_threshold = sz; 12.183 -} 12.184 - 12.185 -// Merge lists of buffers. Notify waiting threads if the length of the list 12.186 -// exceeds threshold. The source queue is emptied as a result. The queues 12.187 +// Merge lists of buffers. Notify the processing threads. 12.188 +// The source queue is emptied as a result. The queues 12.189 // must share the monitor. 12.190 void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) { 12.191 assert(_cbl_mon == src->_cbl_mon, "Should share the same lock"); 12.192 @@ -224,7 +235,7 @@ 12.193 } else { 12.194 assert(_completed_buffers_head != NULL, "Well formedness"); 12.195 if (src->_completed_buffers_head != NULL) { 12.196 - _completed_buffers_tail->next = src->_completed_buffers_head; 12.197 + _completed_buffers_tail->set_next(src->_completed_buffers_head); 12.198 _completed_buffers_tail = src->_completed_buffers_tail; 12.199 } 12.200 } 12.201 @@ -237,31 +248,13 @@ 12.202 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL || 12.203 _completed_buffers_head != NULL && _completed_buffers_tail != NULL, 12.204 "Sanity"); 12.205 +} 12.206 12.207 - if (!_process_completed && 12.208 - _n_completed_buffers >= _process_completed_threshold) { 12.209 +void PtrQueueSet::notify_if_necessary() { 12.210 + MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 12.211 + if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) { 12.212 _process_completed = true; 12.213 if (_notify_when_complete) 12.214 - _cbl_mon->notify_all(); 12.215 + _cbl_mon->notify(); 12.216 } 12.217 } 12.218 - 12.219 -// Merge free lists of the two queues. The free list of the source 12.220 -// queue is emptied as a result. The queues must share the same 12.221 -// mutex that guards free lists. 12.222 -void PtrQueueSet::merge_freelists(PtrQueueSet* src) { 12.223 - assert(_fl_lock == src->_fl_lock, "Should share the same lock"); 12.224 - MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag); 12.225 - if (_buf_free_list != NULL) { 12.226 - void **p = _buf_free_list; 12.227 - while (*p != NULL) { 12.228 - p = (void**)*p; 12.229 - } 12.230 - *p = src->_buf_free_list; 12.231 - } else { 12.232 - _buf_free_list = src->_buf_free_list; 12.233 - } 12.234 - _buf_free_list_sz += src->_buf_free_list_sz; 12.235 - src->_buf_free_list = NULL; 12.236 - src->_buf_free_list_sz = 0; 12.237 -}
13.1 --- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp Thu Dec 17 07:02:39 2009 -0800 13.2 +++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp Tue Dec 22 22:35:08 2009 -0800 13.3 @@ -27,8 +27,10 @@ 13.4 // the addresses of modified old-generation objects. This type supports 13.5 // this operation. 13.6 13.7 +// The definition of placement operator new(size_t, void*) in the <new>. 13.8 +#include <new> 13.9 + 13.10 class PtrQueueSet; 13.11 - 13.12 class PtrQueue VALUE_OBJ_CLASS_SPEC { 13.13 13.14 protected: 13.15 @@ -77,7 +79,7 @@ 13.16 else enqueue_known_active(ptr); 13.17 } 13.18 13.19 - inline void handle_zero_index(); 13.20 + void handle_zero_index(); 13.21 void locking_enqueue_completed_buffer(void** buf); 13.22 13.23 void enqueue_known_active(void* ptr); 13.24 @@ -126,34 +128,65 @@ 13.25 13.26 }; 13.27 13.28 +class BufferNode { 13.29 + size_t _index; 13.30 + BufferNode* _next; 13.31 +public: 13.32 + BufferNode() : _index(0), _next(NULL) { } 13.33 + BufferNode* next() const { return _next; } 13.34 + void set_next(BufferNode* n) { _next = n; } 13.35 + size_t index() const { return _index; } 13.36 + void set_index(size_t i) { _index = i; } 13.37 + 13.38 + // Align the size of the structure to the size of the pointer 13.39 + static size_t aligned_size() { 13.40 + static const size_t alignment = round_to(sizeof(BufferNode), sizeof(void*)); 13.41 + return alignment; 13.42 + } 13.43 + 13.44 + // BufferNode is allocated before the buffer. 13.45 + // The chunk of memory that holds both of them is a block. 13.46 + 13.47 + // Produce a new BufferNode given a buffer. 13.48 + static BufferNode* new_from_buffer(void** buf) { 13.49 + return new (make_block_from_buffer(buf)) BufferNode; 13.50 + } 13.51 + 13.52 + // The following are the required conversion routines: 13.53 + static BufferNode* make_node_from_buffer(void** buf) { 13.54 + return (BufferNode*)make_block_from_buffer(buf); 13.55 + } 13.56 + static void** make_buffer_from_node(BufferNode *node) { 13.57 + return make_buffer_from_block(node); 13.58 + } 13.59 + static void* make_block_from_node(BufferNode *node) { 13.60 + return (void*)node; 13.61 + } 13.62 + static void** make_buffer_from_block(void* p) { 13.63 + return (void**)((char*)p + aligned_size()); 13.64 + } 13.65 + static void* make_block_from_buffer(void** p) { 13.66 + return (void*)((char*)p - aligned_size()); 13.67 + } 13.68 +}; 13.69 + 13.70 // A PtrQueueSet represents resources common to a set of pointer queues. 13.71 // In particular, the individual queues allocate buffers from this shared 13.72 // set, and return completed buffers to the set. 13.73 // All these variables are are protected by the TLOQ_CBL_mon. XXX ??? 13.74 class PtrQueueSet VALUE_OBJ_CLASS_SPEC { 13.75 - 13.76 protected: 13.77 - 13.78 - class CompletedBufferNode: public CHeapObj { 13.79 - public: 13.80 - void** buf; 13.81 - size_t index; 13.82 - CompletedBufferNode* next; 13.83 - CompletedBufferNode() : buf(NULL), 13.84 - index(0), next(NULL){ } 13.85 - }; 13.86 - 13.87 Monitor* _cbl_mon; // Protects the fields below. 13.88 - CompletedBufferNode* _completed_buffers_head; 13.89 - CompletedBufferNode* _completed_buffers_tail; 13.90 - size_t _n_completed_buffers; 13.91 - size_t _process_completed_threshold; 13.92 + BufferNode* _completed_buffers_head; 13.93 + BufferNode* _completed_buffers_tail; 13.94 + int _n_completed_buffers; 13.95 + int _process_completed_threshold; 13.96 volatile bool _process_completed; 13.97 13.98 // This (and the interpretation of the first element as a "next" 13.99 // pointer) are protected by the TLOQ_FL_lock. 13.100 Mutex* _fl_lock; 13.101 - void** _buf_free_list; 13.102 + BufferNode* _buf_free_list; 13.103 size_t _buf_free_list_sz; 13.104 // Queue set can share a freelist. The _fl_owner variable 13.105 // specifies the owner. It is set to "this" by default. 13.106 @@ -170,6 +203,7 @@ 13.107 // Maximum number of elements allowed on completed queue: after that, 13.108 // enqueuer does the work itself. Zero indicates no maximum. 13.109 int _max_completed_queue; 13.110 + int _completed_queue_padding; 13.111 13.112 int completed_buffers_list_length(); 13.113 void assert_completed_buffer_list_len_correct_locked(); 13.114 @@ -191,9 +225,12 @@ 13.115 // Because of init-order concerns, we can't pass these as constructor 13.116 // arguments. 13.117 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 13.118 - int max_completed_queue = 0, 13.119 + int process_completed_threshold, 13.120 + int max_completed_queue, 13.121 PtrQueueSet *fl_owner = NULL) { 13.122 _max_completed_queue = max_completed_queue; 13.123 + _process_completed_threshold = process_completed_threshold; 13.124 + _completed_queue_padding = 0; 13.125 assert(cbl_mon != NULL && fl_lock != NULL, "Init order issue?"); 13.126 _cbl_mon = cbl_mon; 13.127 _fl_lock = fl_lock; 13.128 @@ -208,14 +245,17 @@ 13.129 void deallocate_buffer(void** buf); 13.130 13.131 // Declares that "buf" is a complete buffer. 13.132 - void enqueue_complete_buffer(void** buf, size_t index = 0, 13.133 - bool ignore_max_completed = false); 13.134 + void enqueue_complete_buffer(void** buf, size_t index = 0); 13.135 + 13.136 + // To be invoked by the mutator. 13.137 + bool process_or_enqueue_complete_buffer(void** buf); 13.138 13.139 bool completed_buffers_exist_dirty() { 13.140 return _n_completed_buffers > 0; 13.141 } 13.142 13.143 bool process_completed_buffers() { return _process_completed; } 13.144 + void set_process_completed(bool x) { _process_completed = x; } 13.145 13.146 bool active() { return _all_active; } 13.147 13.148 @@ -226,15 +266,24 @@ 13.149 // Get the buffer size. 13.150 size_t buffer_size() { return _sz; } 13.151 13.152 - // Set the number of completed buffers that triggers log processing. 13.153 - void set_process_completed_threshold(size_t sz); 13.154 + // Get/Set the number of completed buffers that triggers log processing. 13.155 + void set_process_completed_threshold(int sz) { _process_completed_threshold = sz; } 13.156 + int process_completed_threshold() const { return _process_completed_threshold; } 13.157 13.158 // Must only be called at a safe point. Indicates that the buffer free 13.159 // list size may be reduced, if that is deemed desirable. 13.160 void reduce_free_list(); 13.161 13.162 - size_t completed_buffers_num() { return _n_completed_buffers; } 13.163 + int completed_buffers_num() { return _n_completed_buffers; } 13.164 13.165 void merge_bufferlists(PtrQueueSet* src); 13.166 - void merge_freelists(PtrQueueSet* src); 13.167 + 13.168 + void set_max_completed_queue(int m) { _max_completed_queue = m; } 13.169 + int max_completed_queue() { return _max_completed_queue; } 13.170 + 13.171 + void set_completed_queue_padding(int padding) { _completed_queue_padding = padding; } 13.172 + int completed_queue_padding() { return _completed_queue_padding; } 13.173 + 13.174 + // Notify the consumer if the number of buffers crossed the threshold 13.175 + void notify_if_necessary(); 13.176 };
14.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.cpp Thu Dec 17 07:02:39 2009 -0800 14.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp Tue Dec 22 22:35:08 2009 -0800 14.3 @@ -67,9 +67,9 @@ 14.4 {} 14.5 14.6 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock, 14.7 - int max_completed_queue, 14.8 + int process_completed_threshold, 14.9 Mutex* lock) { 14.10 - PtrQueueSet::initialize(cbl_mon, fl_lock, max_completed_queue); 14.11 + PtrQueueSet::initialize(cbl_mon, fl_lock, process_completed_threshold, -1); 14.12 _shared_satb_queue.set_lock(lock); 14.13 if (ParallelGCThreads > 0) { 14.14 _par_closures = NEW_C_HEAP_ARRAY(ObjectClosure*, ParallelGCThreads); 14.15 @@ -122,12 +122,12 @@ 14.16 14.17 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par, 14.18 int worker) { 14.19 - CompletedBufferNode* nd = NULL; 14.20 + BufferNode* nd = NULL; 14.21 { 14.22 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 14.23 if (_completed_buffers_head != NULL) { 14.24 nd = _completed_buffers_head; 14.25 - _completed_buffers_head = nd->next; 14.26 + _completed_buffers_head = nd->next(); 14.27 if (_completed_buffers_head == NULL) _completed_buffers_tail = NULL; 14.28 _n_completed_buffers--; 14.29 if (_n_completed_buffers == 0) _process_completed = false; 14.30 @@ -135,9 +135,9 @@ 14.31 } 14.32 ObjectClosure* cl = (par ? _par_closures[worker] : _closure); 14.33 if (nd != NULL) { 14.34 - ObjPtrQueue::apply_closure_to_buffer(cl, nd->buf, 0, _sz); 14.35 - deallocate_buffer(nd->buf); 14.36 - delete nd; 14.37 + void **buf = BufferNode::make_buffer_from_node(nd); 14.38 + ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz); 14.39 + deallocate_buffer(buf); 14.40 return true; 14.41 } else { 14.42 return false; 14.43 @@ -145,13 +145,13 @@ 14.44 } 14.45 14.46 void SATBMarkQueueSet::abandon_partial_marking() { 14.47 - CompletedBufferNode* buffers_to_delete = NULL; 14.48 + BufferNode* buffers_to_delete = NULL; 14.49 { 14.50 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag); 14.51 while (_completed_buffers_head != NULL) { 14.52 - CompletedBufferNode* nd = _completed_buffers_head; 14.53 - _completed_buffers_head = nd->next; 14.54 - nd->next = buffers_to_delete; 14.55 + BufferNode* nd = _completed_buffers_head; 14.56 + _completed_buffers_head = nd->next(); 14.57 + nd->set_next(buffers_to_delete); 14.58 buffers_to_delete = nd; 14.59 } 14.60 _completed_buffers_tail = NULL; 14.61 @@ -159,10 +159,9 @@ 14.62 DEBUG_ONLY(assert_completed_buffer_list_len_correct_locked()); 14.63 } 14.64 while (buffers_to_delete != NULL) { 14.65 - CompletedBufferNode* nd = buffers_to_delete; 14.66 - buffers_to_delete = nd->next; 14.67 - deallocate_buffer(nd->buf); 14.68 - delete nd; 14.69 + BufferNode* nd = buffers_to_delete; 14.70 + buffers_to_delete = nd->next(); 14.71 + deallocate_buffer(BufferNode::make_buffer_from_node(nd)); 14.72 } 14.73 assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint."); 14.74 // So we can safely manipulate these queues.
15.1 --- a/src/share/vm/gc_implementation/g1/satbQueue.hpp Thu Dec 17 07:02:39 2009 -0800 15.2 +++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp Tue Dec 22 22:35:08 2009 -0800 15.3 @@ -60,8 +60,8 @@ 15.4 SATBMarkQueueSet(); 15.5 15.6 void initialize(Monitor* cbl_mon, Mutex* fl_lock, 15.7 - int max_completed_queue = 0, 15.8 - Mutex* lock = NULL); 15.9 + int process_completed_threshold, 15.10 + Mutex* lock); 15.11 15.12 static void handle_zero_index_for_thread(JavaThread* t); 15.13
16.1 --- a/src/share/vm/gc_implementation/includeDB_gc_g1 Thu Dec 17 07:02:39 2009 -0800 16.2 +++ b/src/share/vm/gc_implementation/includeDB_gc_g1 Tue Dec 22 22:35:08 2009 -0800 16.3 @@ -109,7 +109,6 @@ 16.4 dirtyCardQueue.cpp dirtyCardQueue.hpp 16.5 dirtyCardQueue.cpp heapRegionRemSet.hpp 16.6 dirtyCardQueue.cpp mutexLocker.hpp 16.7 -dirtyCardQueue.cpp ptrQueue.inline.hpp 16.8 dirtyCardQueue.cpp safepoint.hpp 16.9 dirtyCardQueue.cpp thread.hpp 16.10 dirtyCardQueue.cpp thread_<os_family>.inline.hpp 16.11 @@ -319,7 +318,6 @@ 16.12 ptrQueue.cpp mutex.hpp 16.13 ptrQueue.cpp mutexLocker.hpp 16.14 ptrQueue.cpp ptrQueue.hpp 16.15 -ptrQueue.cpp ptrQueue.inline.hpp 16.16 ptrQueue.cpp thread_<os_family>.inline.hpp 16.17 16.18 ptrQueue.hpp allocation.hpp 16.19 @@ -329,7 +327,6 @@ 16.20 16.21 satbQueue.cpp allocation.inline.hpp 16.22 satbQueue.cpp mutexLocker.hpp 16.23 -satbQueue.cpp ptrQueue.inline.hpp 16.24 satbQueue.cpp satbQueue.hpp 16.25 satbQueue.cpp sharedHeap.hpp 16.26 satbQueue.cpp thread.hpp