Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/concurrentG1Refine.hpp"
27 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
28 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
29 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
30 #include "memory/resourceArea.hpp"
31 #include "runtime/handles.inline.hpp"
32 #include "runtime/mutexLocker.hpp"
34 ConcurrentG1RefineThread::
35 ConcurrentG1RefineThread(ConcurrentG1Refine* cg1r, ConcurrentG1RefineThread *next,
36 CardTableEntryClosure* refine_closure,
37 uint worker_id_offset, uint worker_id) :
38 ConcurrentGCThread(),
39 _refine_closure(refine_closure),
40 _worker_id_offset(worker_id_offset),
41 _worker_id(worker_id),
42 _active(false),
43 _next(next),
44 _monitor(NULL),
45 _cg1r(cg1r),
46 _vtime_accum(0.0)
47 {
49 // Each thread has its own monitor. The i-th thread is responsible for signalling
50 // to thread i+1 if the number of buffers in the queue exceeds a threashold for this
51 // thread. Monitors are also used to wake up the threads during termination.
52 // The 0th worker in notified by mutator threads and has a special monitor.
53 // The last worker is used for young gen rset size sampling.
54 if (worker_id > 0) {
55 _monitor = new Monitor(Mutex::nonleaf, "Refinement monitor", true);
56 } else {
57 _monitor = DirtyCardQ_CBL_mon;
58 }
59 initialize();
60 create_and_start();
61 }
63 void ConcurrentG1RefineThread::initialize() {
64 if (_worker_id < cg1r()->worker_thread_num()) {
65 // Current thread activation threshold
66 _threshold = MIN2<int>(cg1r()->thread_threshold_step() * (_worker_id + 1) + cg1r()->green_zone(),
67 cg1r()->yellow_zone());
68 // A thread deactivates once the number of buffer reached a deactivation threshold
69 _deactivation_threshold = MAX2<int>(_threshold - cg1r()->thread_threshold_step(), cg1r()->green_zone());
70 } else {
71 set_active(true);
72 }
73 }
75 void ConcurrentG1RefineThread::sample_young_list_rs_lengths() {
76 SuspendibleThreadSetJoiner sts;
77 G1CollectedHeap* g1h = G1CollectedHeap::heap();
78 G1CollectorPolicy* g1p = g1h->g1_policy();
79 if (g1p->adaptive_young_list_length()) {
80 int regions_visited = 0;
81 g1h->young_list()->rs_length_sampling_init();
82 while (g1h->young_list()->rs_length_sampling_more()) {
83 g1h->young_list()->rs_length_sampling_next();
84 ++regions_visited;
86 // we try to yield every time we visit 10 regions
87 if (regions_visited == 10) {
88 if (sts.should_yield()) {
89 sts.yield();
90 // we just abandon the iteration
91 break;
92 }
93 regions_visited = 0;
94 }
95 }
97 g1p->revise_young_list_target_length_if_necessary();
98 }
99 }
101 void ConcurrentG1RefineThread::run_young_rs_sampling() {
102 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
103 _vtime_start = os::elapsedVTime();
104 while(!_should_terminate) {
105 sample_young_list_rs_lengths();
107 if (os::supports_vtime()) {
108 _vtime_accum = (os::elapsedVTime() - _vtime_start);
109 } else {
110 _vtime_accum = 0.0;
111 }
113 MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
114 if (_should_terminate) {
115 break;
116 }
117 _monitor->wait(Mutex::_no_safepoint_check_flag, G1ConcRefinementServiceIntervalMillis);
118 }
119 }
121 void ConcurrentG1RefineThread::wait_for_completed_buffers() {
122 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
123 MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
124 while (!_should_terminate && !is_active()) {
125 _monitor->wait(Mutex::_no_safepoint_check_flag);
126 }
127 }
129 bool ConcurrentG1RefineThread::is_active() {
130 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
131 return _worker_id > 0 ? _active : dcqs.process_completed_buffers();
132 }
134 void ConcurrentG1RefineThread::activate() {
135 MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
136 if (_worker_id > 0) {
137 if (G1TraceConcRefinement) {
138 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
139 gclog_or_tty->print_cr("G1-Refine-activated worker %d, on threshold %d, current %d",
140 _worker_id, _threshold, (int)dcqs.completed_buffers_num());
141 }
142 set_active(true);
143 } else {
144 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
145 dcqs.set_process_completed(true);
146 }
147 _monitor->notify();
148 }
150 void ConcurrentG1RefineThread::deactivate() {
151 MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
152 if (_worker_id > 0) {
153 if (G1TraceConcRefinement) {
154 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
155 gclog_or_tty->print_cr("G1-Refine-deactivated worker %d, off threshold %d, current %d",
156 _worker_id, _deactivation_threshold, (int)dcqs.completed_buffers_num());
157 }
158 set_active(false);
159 } else {
160 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
161 dcqs.set_process_completed(false);
162 }
163 }
165 void ConcurrentG1RefineThread::run() {
166 initialize_in_thread();
167 wait_for_universe_init();
169 if (_worker_id >= cg1r()->worker_thread_num()) {
170 run_young_rs_sampling();
171 terminate();
172 return;
173 }
175 _vtime_start = os::elapsedVTime();
176 while (!_should_terminate) {
177 DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
179 // Wait for work
180 wait_for_completed_buffers();
182 if (_should_terminate) {
183 break;
184 }
186 {
187 SuspendibleThreadSetJoiner sts;
189 do {
190 int curr_buffer_num = (int)dcqs.completed_buffers_num();
191 // If the number of the buffers falls down into the yellow zone,
192 // that means that the transition period after the evacuation pause has ended.
193 if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cg1r()->yellow_zone()) {
194 dcqs.set_completed_queue_padding(0);
195 }
197 if (_worker_id > 0 && curr_buffer_num <= _deactivation_threshold) {
198 // If the number of the buffer has fallen below our threshold
199 // we should deactivate. The predecessor will reactivate this
200 // thread should the number of the buffers cross the threshold again.
201 deactivate();
202 break;
203 }
205 // Check if we need to activate the next thread.
206 if (_next != NULL && !_next->is_active() && curr_buffer_num > _next->_threshold) {
207 _next->activate();
208 }
209 } while (dcqs.apply_closure_to_completed_buffer(_refine_closure, _worker_id + _worker_id_offset, cg1r()->green_zone()));
211 // We can exit the loop above while being active if there was a yield request.
212 if (is_active()) {
213 deactivate();
214 }
215 }
217 if (os::supports_vtime()) {
218 _vtime_accum = (os::elapsedVTime() - _vtime_start);
219 } else {
220 _vtime_accum = 0.0;
221 }
222 }
223 assert(_should_terminate, "just checking");
224 terminate();
225 }
227 void ConcurrentG1RefineThread::stop() {
228 // it is ok to take late safepoints here, if needed
229 {
230 MutexLockerEx mu(Terminator_lock);
231 _should_terminate = true;
232 }
234 {
235 MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
236 _monitor->notify();
237 }
239 {
240 MutexLockerEx mu(Terminator_lock);
241 while (!_has_terminated) {
242 Terminator_lock->wait();
243 }
244 }
245 if (G1TraceConcRefinement) {
246 gclog_or_tty->print_cr("G1-Refine-stop");
247 }
248 }
250 void ConcurrentG1RefineThread::print() const {
251 print_on(tty);
252 }
254 void ConcurrentG1RefineThread::print_on(outputStream* st) const {
255 st->print("\"G1 Concurrent Refinement Thread#%d\" ", _worker_id);
256 Thread::print_on(st);
257 st->cr();
258 }