Fri, 10 Oct 2014 15:51:58 +0200
8059758: Footprint regressions with JDK-8038423
Summary: Changes in JDK-8038423 always initialize (zero out) virtual memory used for auxiliary data structures. This causes a footprint regression for G1 in startup benchmarks. This is because they do not touch that memory at all, so the operating system does not actually commit these pages. The fix is to, if the initialization value of the data structures matches the default value of just committed memory (=0), do not do anything.
Reviewed-by: jwilhelm, brutisso
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
28 #include "gc_implementation/g1/concurrentMark.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 // Utility routine to set an exclusive range of cards on the given
32 // card liveness bitmap
33 inline void ConcurrentMark::set_card_bitmap_range(BitMap* card_bm,
34 BitMap::idx_t start_idx,
35 BitMap::idx_t end_idx,
36 bool is_par) {
38 // Set the exclusive bit range [start_idx, end_idx).
39 assert((end_idx - start_idx) > 0, "at least one card");
40 assert(end_idx <= card_bm->size(), "sanity");
42 // Silently clip the end index
43 end_idx = MIN2(end_idx, card_bm->size());
45 // For small ranges use a simple loop; otherwise use set_range or
46 // use par_at_put_range (if parallel). The range is made up of the
47 // cards that are spanned by an object/mem region so 8 cards will
48 // allow up to object sizes up to 4K to be handled using the loop.
49 if ((end_idx - start_idx) <= 8) {
50 for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
51 if (is_par) {
52 card_bm->par_set_bit(i);
53 } else {
54 card_bm->set_bit(i);
55 }
56 }
57 } else {
58 // Note BitMap::par_at_put_range() and BitMap::set_range() are exclusive.
59 if (is_par) {
60 card_bm->par_at_put_range(start_idx, end_idx, true);
61 } else {
62 card_bm->set_range(start_idx, end_idx);
63 }
64 }
65 }
67 // Returns the index in the liveness accounting card bitmap
68 // for the given address
69 inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
70 // Below, the term "card num" means the result of shifting an address
71 // by the card shift -- address 0 corresponds to card number 0. One
72 // must subtract the card num of the bottom of the heap to obtain a
73 // card table index.
74 intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
75 return card_num - heap_bottom_card_num();
76 }
78 // Counts the given memory region in the given task/worker
79 // counting data structures.
80 inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
81 size_t* marked_bytes_array,
82 BitMap* task_card_bm) {
83 G1CollectedHeap* g1h = _g1h;
84 CardTableModRefBS* ct_bs = g1h->g1_barrier_set();
86 HeapWord* start = mr.start();
87 HeapWord* end = mr.end();
88 size_t region_size_bytes = mr.byte_size();
89 uint index = hr->hrm_index();
91 assert(!hr->continuesHumongous(), "should not be HC region");
92 assert(hr == g1h->heap_region_containing(start), "sanity");
93 assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
94 assert(marked_bytes_array != NULL, "pre-condition");
95 assert(task_card_bm != NULL, "pre-condition");
97 // Add to the task local marked bytes for this region.
98 marked_bytes_array[index] += region_size_bytes;
100 BitMap::idx_t start_idx = card_bitmap_index_for(start);
101 BitMap::idx_t end_idx = card_bitmap_index_for(end);
103 // Note: if we're looking at the last region in heap - end
104 // could be actually just beyond the end of the heap; end_idx
105 // will then correspond to a (non-existent) card that is also
106 // just beyond the heap.
107 if (g1h->is_in_g1_reserved(end) && !ct_bs->is_card_aligned(end)) {
108 // end of region is not card aligned - incremement to cover
109 // all the cards spanned by the region.
110 end_idx += 1;
111 }
112 // The card bitmap is task/worker specific => no need to use
113 // the 'par' BitMap routines.
114 // Set bits in the exclusive bit range [start_idx, end_idx).
115 set_card_bitmap_range(task_card_bm, start_idx, end_idx, false /* is_par */);
116 }
118 // Counts the given memory region in the task/worker counting
119 // data structures for the given worker id.
120 inline void ConcurrentMark::count_region(MemRegion mr,
121 HeapRegion* hr,
122 uint worker_id) {
123 size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
124 BitMap* task_card_bm = count_card_bitmap_for(worker_id);
125 count_region(mr, hr, marked_bytes_array, task_card_bm);
126 }
128 // Counts the given object in the given task/worker counting data structures.
129 inline void ConcurrentMark::count_object(oop obj,
130 HeapRegion* hr,
131 size_t* marked_bytes_array,
132 BitMap* task_card_bm) {
133 MemRegion mr((HeapWord*)obj, obj->size());
134 count_region(mr, hr, marked_bytes_array, task_card_bm);
135 }
137 // Attempts to mark the given object and, if successful, counts
138 // the object in the given task/worker counting structures.
139 inline bool ConcurrentMark::par_mark_and_count(oop obj,
140 HeapRegion* hr,
141 size_t* marked_bytes_array,
142 BitMap* task_card_bm) {
143 HeapWord* addr = (HeapWord*)obj;
144 if (_nextMarkBitMap->parMark(addr)) {
145 // Update the task specific count data for the object.
146 count_object(obj, hr, marked_bytes_array, task_card_bm);
147 return true;
148 }
149 return false;
150 }
152 // Attempts to mark the given object and, if successful, counts
153 // the object in the task/worker counting structures for the
154 // given worker id.
155 inline bool ConcurrentMark::par_mark_and_count(oop obj,
156 size_t word_size,
157 HeapRegion* hr,
158 uint worker_id) {
159 HeapWord* addr = (HeapWord*)obj;
160 if (_nextMarkBitMap->parMark(addr)) {
161 MemRegion mr(addr, word_size);
162 count_region(mr, hr, worker_id);
163 return true;
164 }
165 return false;
166 }
168 inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
169 HeapWord* start_addr = MAX2(startWord(), mr.start());
170 HeapWord* end_addr = MIN2(endWord(), mr.end());
172 if (end_addr > start_addr) {
173 // Right-open interval [start-offset, end-offset).
174 BitMap::idx_t start_offset = heapWordToOffset(start_addr);
175 BitMap::idx_t end_offset = heapWordToOffset(end_addr);
177 start_offset = _bm.get_next_one_offset(start_offset, end_offset);
178 while (start_offset < end_offset) {
179 if (!cl->do_bit(start_offset)) {
180 return false;
181 }
182 HeapWord* next_addr = MIN2(nextObject(offsetToHeapWord(start_offset)), end_addr);
183 BitMap::idx_t next_offset = heapWordToOffset(next_addr);
184 start_offset = _bm.get_next_one_offset(next_offset, end_offset);
185 }
186 }
187 return true;
188 }
190 inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
191 MemRegion mr(startWord(), sizeInWords());
192 return iterate(cl, mr);
193 }
195 #define check_mark(addr) \
196 assert(_bmStartWord <= (addr) && (addr) < (_bmStartWord + _bmWordSize), \
197 "outside underlying space?"); \
198 assert(G1CollectedHeap::heap()->is_in_exact(addr), \
199 err_msg("Trying to access not available bitmap "PTR_FORMAT \
200 " corresponding to "PTR_FORMAT" (%u)", \
201 p2i(this), p2i(addr), G1CollectedHeap::heap()->addr_to_region(addr)));
203 inline void CMBitMap::mark(HeapWord* addr) {
204 check_mark(addr);
205 _bm.set_bit(heapWordToOffset(addr));
206 }
208 inline void CMBitMap::clear(HeapWord* addr) {
209 check_mark(addr);
210 _bm.clear_bit(heapWordToOffset(addr));
211 }
213 inline bool CMBitMap::parMark(HeapWord* addr) {
214 check_mark(addr);
215 return _bm.par_set_bit(heapWordToOffset(addr));
216 }
218 inline bool CMBitMap::parClear(HeapWord* addr) {
219 check_mark(addr);
220 return _bm.par_clear_bit(heapWordToOffset(addr));
221 }
223 #undef check_mark
225 inline void CMTask::push(oop obj) {
226 HeapWord* objAddr = (HeapWord*) obj;
227 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
228 assert(!_g1h->is_on_master_free_list(
229 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
230 assert(!_g1h->is_obj_ill(obj), "invariant");
231 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
233 if (_cm->verbose_high()) {
234 gclog_or_tty->print_cr("[%u] pushing " PTR_FORMAT, _worker_id, p2i((void*) obj));
235 }
237 if (!_task_queue->push(obj)) {
238 // The local task queue looks full. We need to push some entries
239 // to the global stack.
241 if (_cm->verbose_medium()) {
242 gclog_or_tty->print_cr("[%u] task queue overflow, "
243 "moving entries to the global stack",
244 _worker_id);
245 }
246 move_entries_to_global_stack();
248 // this should succeed since, even if we overflow the global
249 // stack, we should have definitely removed some entries from the
250 // local queue. So, there must be space on it.
251 bool success = _task_queue->push(obj);
252 assert(success, "invariant");
253 }
255 statsOnly( int tmp_size = _task_queue->size();
256 if (tmp_size > _local_max_size) {
257 _local_max_size = tmp_size;
258 }
259 ++_local_pushes );
260 }
262 // This determines whether the method below will check both the local
263 // and global fingers when determining whether to push on the stack a
264 // gray object (value 1) or whether it will only check the global one
265 // (value 0). The tradeoffs are that the former will be a bit more
266 // accurate and possibly push less on the stack, but it might also be
267 // a little bit slower.
269 #define _CHECK_BOTH_FINGERS_ 1
271 inline void CMTask::deal_with_reference(oop obj) {
272 if (_cm->verbose_high()) {
273 gclog_or_tty->print_cr("[%u] we're dealing with reference = "PTR_FORMAT,
274 _worker_id, p2i((void*) obj));
275 }
277 ++_refs_reached;
279 HeapWord* objAddr = (HeapWord*) obj;
280 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
281 if (_g1h->is_in_g1_reserved(objAddr)) {
282 assert(obj != NULL, "null check is implicit");
283 if (!_nextMarkBitMap->isMarked(objAddr)) {
284 // Only get the containing region if the object is not marked on the
285 // bitmap (otherwise, it's a waste of time since we won't do
286 // anything with it).
287 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
288 if (!hr->obj_allocated_since_next_marking(obj)) {
289 if (_cm->verbose_high()) {
290 gclog_or_tty->print_cr("[%u] "PTR_FORMAT" is not considered marked",
291 _worker_id, p2i((void*) obj));
292 }
294 // we need to mark it first
295 if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
296 // No OrderAccess:store_load() is needed. It is implicit in the
297 // CAS done in CMBitMap::parMark() call in the routine above.
298 HeapWord* global_finger = _cm->finger();
300 #if _CHECK_BOTH_FINGERS_
301 // we will check both the local and global fingers
303 if (_finger != NULL && objAddr < _finger) {
304 if (_cm->verbose_high()) {
305 gclog_or_tty->print_cr("[%u] below the local finger ("PTR_FORMAT"), "
306 "pushing it", _worker_id, p2i(_finger));
307 }
308 push(obj);
309 } else if (_curr_region != NULL && objAddr < _region_limit) {
310 // do nothing
311 } else if (objAddr < global_finger) {
312 // Notice that the global finger might be moving forward
313 // concurrently. This is not a problem. In the worst case, we
314 // mark the object while it is above the global finger and, by
315 // the time we read the global finger, it has moved forward
316 // passed this object. In this case, the object will probably
317 // be visited when a task is scanning the region and will also
318 // be pushed on the stack. So, some duplicate work, but no
319 // correctness problems.
321 if (_cm->verbose_high()) {
322 gclog_or_tty->print_cr("[%u] below the global finger "
323 "("PTR_FORMAT"), pushing it",
324 _worker_id, p2i(global_finger));
325 }
326 push(obj);
327 } else {
328 // do nothing
329 }
330 #else // _CHECK_BOTH_FINGERS_
331 // we will only check the global finger
333 if (objAddr < global_finger) {
334 // see long comment above
336 if (_cm->verbose_high()) {
337 gclog_or_tty->print_cr("[%u] below the global finger "
338 "("PTR_FORMAT"), pushing it",
339 _worker_id, p2i(global_finger));
340 }
341 push(obj);
342 }
343 #endif // _CHECK_BOTH_FINGERS_
344 }
345 }
346 }
347 }
348 }
350 inline void ConcurrentMark::markPrev(oop p) {
351 assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
352 // Note we are overriding the read-only view of the prev map here, via
353 // the cast.
354 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
355 }
357 inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
358 uint worker_id, HeapRegion* hr) {
359 assert(obj != NULL, "pre-condition");
360 HeapWord* addr = (HeapWord*) obj;
361 if (hr == NULL) {
362 hr = _g1h->heap_region_containing_raw(addr);
363 } else {
364 assert(hr->is_in(addr), "pre-condition");
365 }
366 assert(hr != NULL, "sanity");
367 // Given that we're looking for a region that contains an object
368 // header it's impossible to get back a HC region.
369 assert(!hr->continuesHumongous(), "sanity");
371 // We cannot assert that word_size == obj->size() given that obj
372 // might not be in a consistent state (another thread might be in
373 // the process of copying it). So the best thing we can do is to
374 // assert that word_size is under an upper bound which is its
375 // containing region's capacity.
376 assert(word_size * HeapWordSize <= hr->capacity(),
377 err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
378 word_size * HeapWordSize, hr->capacity(),
379 HR_FORMAT_PARAMS(hr)));
381 if (addr < hr->next_top_at_mark_start()) {
382 if (!_nextMarkBitMap->isMarked(addr)) {
383 par_mark_and_count(obj, word_size, hr, worker_id);
384 }
385 }
386 }
388 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP