Tue, 21 Aug 2012 14:10:39 -0700
7185699: G1: Prediction model discrepancies
Summary: Correct the result value of G1CollectedHeap::pending_card_num(). Change the code that calculates the GC efficiency of a non-young heap region to use historical data from mixed GCs and the actual number of live bytes when predicting how long it would take to collect the region. Changes were also reviewed by Thomas Schatzl.
Reviewed-by: azeemj, brutisso
1 /*
2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
26 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
28 #include "gc_implementation/g1/concurrentMark.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 // Returns the index in the liveness accounting card bitmap
32 // for the given address
33 inline BitMap::idx_t ConcurrentMark::card_bitmap_index_for(HeapWord* addr) {
34 // Below, the term "card num" means the result of shifting an address
35 // by the card shift -- address 0 corresponds to card number 0. One
36 // must subtract the card num of the bottom of the heap to obtain a
37 // card table index.
39 intptr_t card_num = intptr_t(uintptr_t(addr) >> CardTableModRefBS::card_shift);
40 return card_num - heap_bottom_card_num();
41 }
43 // Counts the given memory region in the given task/worker
44 // counting data structures.
45 inline void ConcurrentMark::count_region(MemRegion mr, HeapRegion* hr,
46 size_t* marked_bytes_array,
47 BitMap* task_card_bm) {
48 G1CollectedHeap* g1h = _g1h;
49 HeapWord* start = mr.start();
50 HeapWord* last = mr.last();
51 size_t region_size_bytes = mr.byte_size();
52 uint index = hr->hrs_index();
54 assert(!hr->continuesHumongous(), "should not be HC region");
55 assert(hr == g1h->heap_region_containing(start), "sanity");
56 assert(hr == g1h->heap_region_containing(mr.last()), "sanity");
57 assert(marked_bytes_array != NULL, "pre-condition");
58 assert(task_card_bm != NULL, "pre-condition");
60 // Add to the task local marked bytes for this region.
61 marked_bytes_array[index] += region_size_bytes;
63 BitMap::idx_t start_idx = card_bitmap_index_for(start);
64 BitMap::idx_t last_idx = card_bitmap_index_for(last);
66 // The card bitmap is task/worker specific => no need to use 'par' routines.
67 // Set bits in the inclusive bit range [start_idx, last_idx].
68 //
69 // For small ranges use a simple loop; otherwise use set_range
70 // The range are the cards that are spanned by the object/region
71 // so 8 cards will allow objects/regions up to 4K to be handled
72 // using the loop.
73 if ((last_idx - start_idx) <= 8) {
74 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
75 task_card_bm->set_bit(i);
76 }
77 } else {
78 assert(last_idx < task_card_bm->size(), "sanity");
79 // Note: BitMap::set_range() is exclusive.
80 task_card_bm->set_range(start_idx, last_idx+1);
81 }
82 }
84 // Counts the given memory region in the task/worker counting
85 // data structures for the given worker id.
86 inline void ConcurrentMark::count_region(MemRegion mr,
87 HeapRegion* hr,
88 uint worker_id) {
89 size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
90 BitMap* task_card_bm = count_card_bitmap_for(worker_id);
91 count_region(mr, hr, marked_bytes_array, task_card_bm);
92 }
94 // Counts the given memory region, which may be a single object, in the
95 // task/worker counting data structures for the given worker id.
96 inline void ConcurrentMark::count_region(MemRegion mr, uint worker_id) {
97 HeapWord* addr = mr.start();
98 HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
99 count_region(mr, hr, worker_id);
100 }
102 // Counts the given object in the given task/worker counting data structures.
103 inline void ConcurrentMark::count_object(oop obj,
104 HeapRegion* hr,
105 size_t* marked_bytes_array,
106 BitMap* task_card_bm) {
107 MemRegion mr((HeapWord*)obj, obj->size());
108 count_region(mr, hr, marked_bytes_array, task_card_bm);
109 }
111 // Counts the given object in the task/worker counting data
112 // structures for the given worker id.
113 inline void ConcurrentMark::count_object(oop obj,
114 HeapRegion* hr,
115 uint worker_id) {
116 size_t* marked_bytes_array = count_marked_bytes_array_for(worker_id);
117 BitMap* task_card_bm = count_card_bitmap_for(worker_id);
118 HeapWord* addr = (HeapWord*) obj;
119 count_object(obj, hr, marked_bytes_array, task_card_bm);
120 }
122 // Attempts to mark the given object and, if successful, counts
123 // the object in the given task/worker counting structures.
124 inline bool ConcurrentMark::par_mark_and_count(oop obj,
125 HeapRegion* hr,
126 size_t* marked_bytes_array,
127 BitMap* task_card_bm) {
128 HeapWord* addr = (HeapWord*)obj;
129 if (_nextMarkBitMap->parMark(addr)) {
130 // Update the task specific count data for the object.
131 count_object(obj, hr, marked_bytes_array, task_card_bm);
132 return true;
133 }
134 return false;
135 }
137 // Attempts to mark the given object and, if successful, counts
138 // the object in the task/worker counting structures for the
139 // given worker id.
140 inline bool ConcurrentMark::par_mark_and_count(oop obj,
141 size_t word_size,
142 HeapRegion* hr,
143 uint worker_id) {
144 HeapWord* addr = (HeapWord*)obj;
145 if (_nextMarkBitMap->parMark(addr)) {
146 MemRegion mr(addr, word_size);
147 count_region(mr, hr, worker_id);
148 return true;
149 }
150 return false;
151 }
153 // Attempts to mark the given object and, if successful, counts
154 // the object in the task/worker counting structures for the
155 // given worker id.
156 inline bool ConcurrentMark::par_mark_and_count(oop obj,
157 HeapRegion* hr,
158 uint worker_id) {
159 HeapWord* addr = (HeapWord*)obj;
160 if (_nextMarkBitMap->parMark(addr)) {
161 // Update the task specific count data for the object.
162 count_object(obj, hr, worker_id);
163 return true;
164 }
165 return false;
166 }
168 // As above - but we don't know the heap region containing the
169 // object and so have to supply it.
170 inline bool ConcurrentMark::par_mark_and_count(oop obj, uint worker_id) {
171 HeapWord* addr = (HeapWord*)obj;
172 HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
173 return par_mark_and_count(obj, hr, worker_id);
174 }
176 // Similar to the above routine but we already know the size, in words, of
177 // the object that we wish to mark/count
178 inline bool ConcurrentMark::par_mark_and_count(oop obj,
179 size_t word_size,
180 uint worker_id) {
181 HeapWord* addr = (HeapWord*)obj;
182 if (_nextMarkBitMap->parMark(addr)) {
183 // Update the task specific count data for the object.
184 MemRegion mr(addr, word_size);
185 count_region(mr, worker_id);
186 return true;
187 }
188 return false;
189 }
191 // Unconditionally mark the given object, and unconditinally count
192 // the object in the counting structures for worker id 0.
193 // Should *not* be called from parallel code.
194 inline bool ConcurrentMark::mark_and_count(oop obj, HeapRegion* hr) {
195 HeapWord* addr = (HeapWord*)obj;
196 _nextMarkBitMap->mark(addr);
197 // Update the task specific count data for the object.
198 count_object(obj, hr, 0 /* worker_id */);
199 return true;
200 }
202 // As above - but we don't have the heap region containing the
203 // object, so we have to supply it.
204 inline bool ConcurrentMark::mark_and_count(oop obj) {
205 HeapWord* addr = (HeapWord*)obj;
206 HeapRegion* hr = _g1h->heap_region_containing_raw(addr);
207 return mark_and_count(obj, hr);
208 }
210 inline bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
211 HeapWord* start_addr = MAX2(startWord(), mr.start());
212 HeapWord* end_addr = MIN2(endWord(), mr.end());
214 if (end_addr > start_addr) {
215 // Right-open interval [start-offset, end-offset).
216 BitMap::idx_t start_offset = heapWordToOffset(start_addr);
217 BitMap::idx_t end_offset = heapWordToOffset(end_addr);
219 start_offset = _bm.get_next_one_offset(start_offset, end_offset);
220 while (start_offset < end_offset) {
221 HeapWord* obj_addr = offsetToHeapWord(start_offset);
222 oop obj = (oop) obj_addr;
223 if (!cl->do_bit(start_offset)) {
224 return false;
225 }
226 HeapWord* next_addr = MIN2(obj_addr + obj->size(), end_addr);
227 BitMap::idx_t next_offset = heapWordToOffset(next_addr);
228 start_offset = _bm.get_next_one_offset(next_offset, end_offset);
229 }
230 }
231 return true;
232 }
234 inline bool CMBitMapRO::iterate(BitMapClosure* cl) {
235 MemRegion mr(startWord(), sizeInWords());
236 return iterate(cl, mr);
237 }
239 inline void CMTask::push(oop obj) {
240 HeapWord* objAddr = (HeapWord*) obj;
241 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
242 assert(!_g1h->is_on_master_free_list(
243 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
244 assert(!_g1h->is_obj_ill(obj), "invariant");
245 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
247 if (_cm->verbose_high()) {
248 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
249 }
251 if (!_task_queue->push(obj)) {
252 // The local task queue looks full. We need to push some entries
253 // to the global stack.
255 if (_cm->verbose_medium()) {
256 gclog_or_tty->print_cr("[%d] task queue overflow, "
257 "moving entries to the global stack",
258 _task_id);
259 }
260 move_entries_to_global_stack();
262 // this should succeed since, even if we overflow the global
263 // stack, we should have definitely removed some entries from the
264 // local queue. So, there must be space on it.
265 bool success = _task_queue->push(obj);
266 assert(success, "invariant");
267 }
269 statsOnly( int tmp_size = _task_queue->size();
270 if (tmp_size > _local_max_size) {
271 _local_max_size = tmp_size;
272 }
273 ++_local_pushes );
274 }
276 // This determines whether the method below will check both the local
277 // and global fingers when determining whether to push on the stack a
278 // gray object (value 1) or whether it will only check the global one
279 // (value 0). The tradeoffs are that the former will be a bit more
280 // accurate and possibly push less on the stack, but it might also be
281 // a little bit slower.
283 #define _CHECK_BOTH_FINGERS_ 1
285 inline void CMTask::deal_with_reference(oop obj) {
286 if (_cm->verbose_high()) {
287 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
288 _task_id, (void*) obj);
289 }
291 ++_refs_reached;
293 HeapWord* objAddr = (HeapWord*) obj;
294 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
295 if (_g1h->is_in_g1_reserved(objAddr)) {
296 assert(obj != NULL, "null check is implicit");
297 if (!_nextMarkBitMap->isMarked(objAddr)) {
298 // Only get the containing region if the object is not marked on the
299 // bitmap (otherwise, it's a waste of time since we won't do
300 // anything with it).
301 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
302 if (!hr->obj_allocated_since_next_marking(obj)) {
303 if (_cm->verbose_high()) {
304 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
305 _task_id, (void*) obj);
306 }
308 // we need to mark it first
309 if (_cm->par_mark_and_count(obj, hr, _marked_bytes_array, _card_bm)) {
310 // No OrderAccess:store_load() is needed. It is implicit in the
311 // CAS done in CMBitMap::parMark() call in the routine above.
312 HeapWord* global_finger = _cm->finger();
314 #if _CHECK_BOTH_FINGERS_
315 // we will check both the local and global fingers
317 if (_finger != NULL && objAddr < _finger) {
318 if (_cm->verbose_high()) {
319 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
320 "pushing it", _task_id, _finger);
321 }
322 push(obj);
323 } else if (_curr_region != NULL && objAddr < _region_limit) {
324 // do nothing
325 } else if (objAddr < global_finger) {
326 // Notice that the global finger might be moving forward
327 // concurrently. This is not a problem. In the worst case, we
328 // mark the object while it is above the global finger and, by
329 // the time we read the global finger, it has moved forward
330 // passed this object. In this case, the object will probably
331 // be visited when a task is scanning the region and will also
332 // be pushed on the stack. So, some duplicate work, but no
333 // correctness problems.
335 if (_cm->verbose_high()) {
336 gclog_or_tty->print_cr("[%d] below the global finger "
337 "("PTR_FORMAT"), pushing it",
338 _task_id, global_finger);
339 }
340 push(obj);
341 } else {
342 // do nothing
343 }
344 #else // _CHECK_BOTH_FINGERS_
345 // we will only check the global finger
347 if (objAddr < global_finger) {
348 // see long comment above
350 if (_cm->verbose_high()) {
351 gclog_or_tty->print_cr("[%d] below the global finger "
352 "("PTR_FORMAT"), pushing it",
353 _task_id, global_finger);
354 }
355 push(obj);
356 }
357 #endif // _CHECK_BOTH_FINGERS_
358 }
359 }
360 }
361 }
362 }
364 inline void ConcurrentMark::markPrev(oop p) {
365 assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
366 // Note we are overriding the read-only view of the prev map here, via
367 // the cast.
368 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
369 }
371 inline void ConcurrentMark::grayRoot(oop obj, size_t word_size,
372 uint worker_id, HeapRegion* hr) {
373 assert(obj != NULL, "pre-condition");
374 HeapWord* addr = (HeapWord*) obj;
375 if (hr == NULL) {
376 hr = _g1h->heap_region_containing_raw(addr);
377 } else {
378 assert(hr->is_in(addr), "pre-condition");
379 }
380 assert(hr != NULL, "sanity");
381 // Given that we're looking for a region that contains an object
382 // header it's impossible to get back a HC region.
383 assert(!hr->continuesHumongous(), "sanity");
385 // We cannot assert that word_size == obj->size() given that obj
386 // might not be in a consistent state (another thread might be in
387 // the process of copying it). So the best thing we can do is to
388 // assert that word_size is under an upper bound which is its
389 // containing region's capacity.
390 assert(word_size * HeapWordSize <= hr->capacity(),
391 err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
392 word_size * HeapWordSize, hr->capacity(),
393 HR_FORMAT_PARAMS(hr)));
395 if (addr < hr->next_top_at_mark_start()) {
396 if (!_nextMarkBitMap->isMarked(addr)) {
397 par_mark_and_count(obj, word_size, hr, worker_id);
398 }
399 }
400 }
402 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP