Mon, 03 Aug 2009 12:59:30 -0700
6865703: G1: Parallelize hot card cache cleanup
Summary: Have the GC worker threads clear the hot card cache in parallel by having each worker thread claim a chunk of the card cache and process the cards in that chunk. The size of the chunks that each thread will claim is determined at VM initialization from the size of the card cache and the number of worker threads.
Reviewed-by: jmasa, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentG1RefineThread.cpp.incl"
28 // ======= Concurrent Mark Thread ========
30 // The CM thread is created when the G1 garbage collector is used
32 ConcurrentG1RefineThread::
33 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
34 int worker_id_offset, int worker_id) :
35 ConcurrentGCThread(),
36 _worker_id_offset(worker_id_offset),
37 _worker_id(worker_id),
38 _active(false),
39 _next(next),
40 _cg1r(cg1r),
41 _vtime_accum(0.0),
42 _co_tracker(G1CRGroup),
43 _interval_ms(5.0)
44 {
45 create_and_start();
46 }
48 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
49 G1CollectedHeap* g1h = G1CollectedHeap::heap();
50 G1CollectorPolicy* g1p = g1h->g1_policy();
51 if (g1p->adaptive_young_list_length()) {
52 int regions_visited = 0;
54 g1h->young_list_rs_length_sampling_init();
55 while (g1h->young_list_rs_length_sampling_more()) {
56 g1h->young_list_rs_length_sampling_next();
57 ++regions_visited;
59 // we try to yield every time we visit 10 regions
60 if (regions_visited == 10) {
61 if (_sts.should_yield()) {
62 _sts.yield("G1 refine");
63 // we just abandon the iteration
64 break;
65 }
66 regions_visited = 0;
67 }
68 }
70 g1p->check_prediction_validity();
71 }
72 }
74 void ConcurrentG1RefineThread::run() {
75 initialize_in_thread();
76 _vtime_start = os::elapsedVTime();
77 wait_for_universe_init();
79 _co_tracker.enable();
80 _co_tracker.start();
82 while (!_should_terminate) {
83 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
84 // Wait for completed log buffers to exist.
85 {
86 MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
87 while (((_worker_id == 0 && !dcqs.process_completed_buffers()) ||
88 (_worker_id > 0 && !is_active())) &&
89 !_should_terminate) {
90 DirtyCardQ_CBL_mon->wait(Mutex::_no_safepoint_check_flag);
91 }
92 }
94 if (_should_terminate) {
95 return;
96 }
98 // Now we take them off (this doesn't hold locks while it applies
99 // closures.) (If we did a full collection, then we'll do a full
100 // traversal.
101 _sts.join();
102 int n_logs = 0;
103 int lower_limit = 0;
104 double start_vtime_sec; // only used when G1SmoothConcRefine is on
105 int prev_buffer_num; // only used when G1SmoothConcRefine is on
106 // This thread activation threshold
107 int threshold = G1UpdateBufferQueueProcessingThreshold * _worker_id;
108 // Next thread activation threshold
109 int next_threshold = threshold + G1UpdateBufferQueueProcessingThreshold;
110 int deactivation_threshold = MAX2<int>(threshold - G1UpdateBufferQueueProcessingThreshold / 2, 0);
112 if (G1SmoothConcRefine) {
113 lower_limit = 0;
114 start_vtime_sec = os::elapsedVTime();
115 prev_buffer_num = (int) dcqs.completed_buffers_num();
116 } else {
117 lower_limit = G1UpdateBufferQueueProcessingThreshold / 4; // For now.
118 }
119 while (dcqs.apply_closure_to_completed_buffer(_worker_id + _worker_id_offset, lower_limit)) {
120 double end_vtime_sec;
121 double elapsed_vtime_sec;
122 int elapsed_vtime_ms;
123 int curr_buffer_num = (int) dcqs.completed_buffers_num();
125 if (G1SmoothConcRefine) {
126 end_vtime_sec = os::elapsedVTime();
127 elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
128 elapsed_vtime_ms = (int) (elapsed_vtime_sec * 1000.0);
130 if (curr_buffer_num > prev_buffer_num ||
131 curr_buffer_num > next_threshold) {
132 decreaseInterval(elapsed_vtime_ms);
133 } else if (curr_buffer_num < prev_buffer_num) {
134 increaseInterval(elapsed_vtime_ms);
135 }
136 }
137 if (_worker_id == 0) {
138 sample_young_list_rs_lengths();
139 } else if (curr_buffer_num < deactivation_threshold) {
140 // If the number of the buffer has fallen below our threshold
141 // we should deactivate. The predecessor will reactivate this
142 // thread should the number of the buffers cross the threshold again.
143 MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
144 deactivate();
145 if (G1TraceConcurrentRefinement) {
146 gclog_or_tty->print_cr("G1-Refine-deactivated worker %d", _worker_id);
147 }
148 break;
149 }
150 _co_tracker.update(false);
152 // Check if we need to activate the next thread.
153 if (curr_buffer_num > next_threshold && _next != NULL && !_next->is_active()) {
154 MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
155 _next->activate();
156 DirtyCardQ_CBL_mon->notify_all();
157 if (G1TraceConcurrentRefinement) {
158 gclog_or_tty->print_cr("G1-Refine-activated worker %d", _next->_worker_id);
159 }
160 }
162 if (G1SmoothConcRefine) {
163 prev_buffer_num = curr_buffer_num;
164 _sts.leave();
165 os::sleep(Thread::current(), (jlong) _interval_ms, false);
166 _sts.join();
167 start_vtime_sec = os::elapsedVTime();
168 }
169 n_logs++;
170 }
171 _co_tracker.update(false);
172 _sts.leave();
174 if (os::supports_vtime()) {
175 _vtime_accum = (os::elapsedVTime() - _vtime_start);
176 } else {
177 _vtime_accum = 0.0;
178 }
179 }
180 _sts.join();
181 _co_tracker.update(true);
182 _sts.leave();
183 assert(_should_terminate, "just checking");
185 terminate();
186 }
189 void ConcurrentG1RefineThread::yield() {
190 if (G1TraceConcurrentRefinement) gclog_or_tty->print_cr("G1-Refine-yield");
191 _sts.yield("G1 refine");
192 if (G1TraceConcurrentRefinement) gclog_or_tty->print_cr("G1-Refine-yield-end");
193 }
195 void ConcurrentG1RefineThread::stop() {
196 // it is ok to take late safepoints here, if needed
197 {
198 MutexLockerEx mu(Terminator_lock);
199 _should_terminate = true;
200 }
202 {
203 MutexLockerEx x(DirtyCardQ_CBL_mon, Mutex::_no_safepoint_check_flag);
204 DirtyCardQ_CBL_mon->notify_all();
205 }
207 {
208 MutexLockerEx mu(Terminator_lock);
209 while (!_has_terminated) {
210 Terminator_lock->wait();
211 }
212 }
213 if (G1TraceConcurrentRefinement) gclog_or_tty->print_cr("G1-Refine-stop");
214 }
216 void ConcurrentG1RefineThread::print() {
217 gclog_or_tty->print("\"Concurrent G1 Refinement Thread\" ");
218 Thread::print();
219 gclog_or_tty->cr();
220 }