Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "gc_implementation/g1/ptrQueue.hpp"
27 #include "memory/allocation.hpp"
28 #include "memory/allocation.inline.hpp"
29 #include "runtime/mutex.hpp"
30 #include "runtime/mutexLocker.hpp"
31 #include "runtime/thread.inline.hpp"
33 PtrQueue::PtrQueue(PtrQueueSet* qset, bool perm, bool active) :
34 _qset(qset), _buf(NULL), _index(0), _active(active),
35 _perm(perm), _lock(NULL)
36 {}
38 void PtrQueue::flush() {
39 if (!_perm && _buf != NULL) {
40 if (_index == _sz) {
41 // No work to do.
42 qset()->deallocate_buffer(_buf);
43 } else {
44 // We must NULL out the unused entries, then enqueue.
45 for (size_t i = 0; i < _index; i += oopSize) {
46 _buf[byte_index_to_index((int)i)] = NULL;
47 }
48 qset()->enqueue_complete_buffer(_buf);
49 }
50 _buf = NULL;
51 _index = 0;
52 }
53 }
56 void PtrQueue::enqueue_known_active(void* ptr) {
57 assert(0 <= _index && _index <= _sz, "Invariant.");
58 assert(_index == 0 || _buf != NULL, "invariant");
60 while (_index == 0) {
61 handle_zero_index();
62 }
64 assert(_index > 0, "postcondition");
65 _index -= oopSize;
66 _buf[byte_index_to_index((int)_index)] = ptr;
67 assert(0 <= _index && _index <= _sz, "Invariant.");
68 }
70 void PtrQueue::locking_enqueue_completed_buffer(void** buf) {
71 assert(_lock->owned_by_self(), "Required.");
73 // We have to unlock _lock (which may be Shared_DirtyCardQ_lock) before
74 // we acquire DirtyCardQ_CBL_mon inside enqeue_complete_buffer as they
75 // have the same rank and we may get the "possible deadlock" message
76 _lock->unlock();
78 qset()->enqueue_complete_buffer(buf);
79 // We must relock only because the caller will unlock, for the normal
80 // case.
81 _lock->lock_without_safepoint_check();
82 }
85 PtrQueueSet::PtrQueueSet(bool notify_when_complete) :
86 _max_completed_queue(0),
87 _cbl_mon(NULL), _fl_lock(NULL),
88 _notify_when_complete(notify_when_complete),
89 _sz(0),
90 _completed_buffers_head(NULL),
91 _completed_buffers_tail(NULL),
92 _n_completed_buffers(0),
93 _process_completed_threshold(0), _process_completed(false),
94 _buf_free_list(NULL), _buf_free_list_sz(0)
95 {
96 _fl_owner = this;
97 }
99 void** PtrQueueSet::allocate_buffer() {
100 assert(_sz > 0, "Didn't set a buffer size.");
101 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
102 if (_fl_owner->_buf_free_list != NULL) {
103 void** res = BufferNode::make_buffer_from_node(_fl_owner->_buf_free_list);
104 _fl_owner->_buf_free_list = _fl_owner->_buf_free_list->next();
105 _fl_owner->_buf_free_list_sz--;
106 return res;
107 } else {
108 // Allocate space for the BufferNode in front of the buffer.
109 char *b = NEW_C_HEAP_ARRAY(char, _sz + BufferNode::aligned_size(), mtGC);
110 return BufferNode::make_buffer_from_block(b);
111 }
112 }
114 void PtrQueueSet::deallocate_buffer(void** buf) {
115 assert(_sz > 0, "Didn't set a buffer size.");
116 MutexLockerEx x(_fl_owner->_fl_lock, Mutex::_no_safepoint_check_flag);
117 BufferNode *node = BufferNode::make_node_from_buffer(buf);
118 node->set_next(_fl_owner->_buf_free_list);
119 _fl_owner->_buf_free_list = node;
120 _fl_owner->_buf_free_list_sz++;
121 }
123 void PtrQueueSet::reduce_free_list() {
124 assert(_fl_owner == this, "Free list reduction is allowed only for the owner");
125 // For now we'll adopt the strategy of deleting half.
126 MutexLockerEx x(_fl_lock, Mutex::_no_safepoint_check_flag);
127 size_t n = _buf_free_list_sz / 2;
128 while (n > 0) {
129 assert(_buf_free_list != NULL, "_buf_free_list_sz must be wrong.");
130 void* b = BufferNode::make_block_from_node(_buf_free_list);
131 _buf_free_list = _buf_free_list->next();
132 FREE_C_HEAP_ARRAY(char, b, mtGC);
133 _buf_free_list_sz --;
134 n--;
135 }
136 }
138 void PtrQueue::handle_zero_index() {
139 assert(_index == 0, "Precondition.");
141 // This thread records the full buffer and allocates a new one (while
142 // holding the lock if there is one).
143 if (_buf != NULL) {
144 if (!should_enqueue_buffer()) {
145 assert(_index > 0, "the buffer can only be re-used if it's not full");
146 return;
147 }
149 if (_lock) {
150 assert(_lock->owned_by_self(), "Required.");
152 // The current PtrQ may be the shared dirty card queue and
153 // may be being manipulated by more than one worker thread
154 // during a pause. Since the enqueuing of the completed
155 // buffer unlocks the Shared_DirtyCardQ_lock more than one
156 // worker thread can 'race' on reading the shared queue attributes
157 // (_buf and _index) and multiple threads can call into this
158 // routine for the same buffer. This will cause the completed
159 // buffer to be added to the CBL multiple times.
161 // We "claim" the current buffer by caching value of _buf in
162 // a local and clearing the field while holding _lock. When
163 // _lock is released (while enqueueing the completed buffer)
164 // the thread that acquires _lock will skip this code,
165 // preventing the subsequent the multiple enqueue, and
166 // install a newly allocated buffer below.
168 void** buf = _buf; // local pointer to completed buffer
169 _buf = NULL; // clear shared _buf field
171 locking_enqueue_completed_buffer(buf); // enqueue completed buffer
173 // While the current thread was enqueuing the buffer another thread
174 // may have a allocated a new buffer and inserted it into this pointer
175 // queue. If that happens then we just return so that the current
176 // thread doesn't overwrite the buffer allocated by the other thread
177 // and potentially losing some dirtied cards.
179 if (_buf != NULL) return;
180 } else {
181 if (qset()->process_or_enqueue_complete_buffer(_buf)) {
182 // Recycle the buffer. No allocation.
183 _sz = qset()->buffer_size();
184 _index = _sz;
185 return;
186 }
187 }
188 }
189 // Reallocate the buffer
190 _buf = qset()->allocate_buffer();
191 _sz = qset()->buffer_size();
192 _index = _sz;
193 assert(0 <= _index && _index <= _sz, "Invariant.");
194 }
196 bool PtrQueueSet::process_or_enqueue_complete_buffer(void** buf) {
197 if (Thread::current()->is_Java_thread()) {
198 // We don't lock. It is fine to be epsilon-precise here.
199 if (_max_completed_queue == 0 || _max_completed_queue > 0 &&
200 _n_completed_buffers >= _max_completed_queue + _completed_queue_padding) {
201 bool b = mut_process_buffer(buf);
202 if (b) {
203 // True here means that the buffer hasn't been deallocated and the caller may reuse it.
204 return true;
205 }
206 }
207 }
208 // The buffer will be enqueued. The caller will have to get a new one.
209 enqueue_complete_buffer(buf);
210 return false;
211 }
213 void PtrQueueSet::enqueue_complete_buffer(void** buf, size_t index) {
214 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
215 BufferNode* cbn = BufferNode::new_from_buffer(buf);
216 cbn->set_index(index);
217 if (_completed_buffers_tail == NULL) {
218 assert(_completed_buffers_head == NULL, "Well-formedness");
219 _completed_buffers_head = cbn;
220 _completed_buffers_tail = cbn;
221 } else {
222 _completed_buffers_tail->set_next(cbn);
223 _completed_buffers_tail = cbn;
224 }
225 _n_completed_buffers++;
227 if (!_process_completed && _process_completed_threshold >= 0 &&
228 _n_completed_buffers >= _process_completed_threshold) {
229 _process_completed = true;
230 if (_notify_when_complete)
231 _cbl_mon->notify();
232 }
233 debug_only(assert_completed_buffer_list_len_correct_locked());
234 }
236 int PtrQueueSet::completed_buffers_list_length() {
237 int n = 0;
238 BufferNode* cbn = _completed_buffers_head;
239 while (cbn != NULL) {
240 n++;
241 cbn = cbn->next();
242 }
243 return n;
244 }
246 void PtrQueueSet::assert_completed_buffer_list_len_correct() {
247 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
248 assert_completed_buffer_list_len_correct_locked();
249 }
251 void PtrQueueSet::assert_completed_buffer_list_len_correct_locked() {
252 guarantee(completed_buffers_list_length() == _n_completed_buffers,
253 "Completed buffer length is wrong.");
254 }
256 void PtrQueueSet::set_buffer_size(size_t sz) {
257 assert(_sz == 0 && sz > 0, "Should be called only once.");
258 _sz = sz * oopSize;
259 }
261 // Merge lists of buffers. Notify the processing threads.
262 // The source queue is emptied as a result. The queues
263 // must share the monitor.
264 void PtrQueueSet::merge_bufferlists(PtrQueueSet *src) {
265 assert(_cbl_mon == src->_cbl_mon, "Should share the same lock");
266 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
267 if (_completed_buffers_tail == NULL) {
268 assert(_completed_buffers_head == NULL, "Well-formedness");
269 _completed_buffers_head = src->_completed_buffers_head;
270 _completed_buffers_tail = src->_completed_buffers_tail;
271 } else {
272 assert(_completed_buffers_head != NULL, "Well formedness");
273 if (src->_completed_buffers_head != NULL) {
274 _completed_buffers_tail->set_next(src->_completed_buffers_head);
275 _completed_buffers_tail = src->_completed_buffers_tail;
276 }
277 }
278 _n_completed_buffers += src->_n_completed_buffers;
280 src->_n_completed_buffers = 0;
281 src->_completed_buffers_head = NULL;
282 src->_completed_buffers_tail = NULL;
284 assert(_completed_buffers_head == NULL && _completed_buffers_tail == NULL ||
285 _completed_buffers_head != NULL && _completed_buffers_tail != NULL,
286 "Sanity");
287 }
289 void PtrQueueSet::notify_if_necessary() {
290 MutexLockerEx x(_cbl_mon, Mutex::_no_safepoint_check_flag);
291 if (_n_completed_buffers >= _process_completed_threshold || _max_completed_queue == 0) {
292 _process_completed = true;
293 if (_notify_when_complete)
294 _cbl_mon->notify();
295 }
296 }