Mon, 12 Mar 2012 14:59:00 -0700
7147724: G1: hang in SurrogateLockerThread::manipulatePLL
Summary: Attempting to initiate a marking cycle when allocating a humongous object can, if a marking cycle is successfully initiated by another thread, result in the allocating thread spinning until the marking cycle is complete. Eliminate a deadlock between the main ConcurrentMarkThread, the SurrogateLocker thread, the VM thread, and a mutator thread waiting on the SecondaryFreeList_lock (while free regions are going to become available) by not manipulating the pending list lock during the prologue and epilogue of the cleanup pause.
Reviewed-by: brutisso, jcoomes, tonyp
1 /*
2 * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 #include "gc_implementation/g1/g1RemSet.hpp"
34 #include "gc_implementation/g1/heapRegion.inline.hpp"
35 #include "gc_implementation/g1/heapRegionRemSet.hpp"
36 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
37 #include "gc_implementation/shared/vmGCOperations.hpp"
38 #include "memory/genOopClosures.inline.hpp"
39 #include "memory/referencePolicy.hpp"
40 #include "memory/resourceArea.hpp"
41 #include "oops/oop.inline.hpp"
42 #include "runtime/handles.inline.hpp"
43 #include "runtime/java.hpp"
45 // Concurrent marking bit map wrapper
47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
48 _bm((uintptr_t*)NULL,0),
49 _shifter(shifter) {
50 _bmStartWord = (HeapWord*)(rs.base());
51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
55 guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
56 // For now we'll just commit all of the bit map up fromt.
57 // Later on we'll try to be more parsimonious with swap.
58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 "couldn't reseve backing store for concurrent marking bit map");
60 assert(_virtual_space.committed_size() == brs.size(),
61 "didn't reserve backing store for all of concurrent marking bit map?");
62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 _bmWordSize, "inconsistency in bit map sizing");
65 _bm.set_size(_bmWordSize >> _shifter);
66 }
68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 // First we must round addr *up* to a possible object boundary.
71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 HeapWordSize << _shifter);
73 size_t addrOffset = heapWordToOffset(addr);
74 if (limit == NULL) {
75 limit = _bmStartWord + _bmWordSize;
76 }
77 size_t limitOffset = heapWordToOffset(limit);
78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 assert(nextAddr >= addr, "get_next_one postcondition");
81 assert(nextAddr == limit || isMarked(nextAddr),
82 "get_next_one postcondition");
83 return nextAddr;
84 }
86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 HeapWord* limit) const {
88 size_t addrOffset = heapWordToOffset(addr);
89 if (limit == NULL) {
90 limit = _bmStartWord + _bmWordSize;
91 }
92 size_t limitOffset = heapWordToOffset(limit);
93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 assert(nextAddr >= addr, "get_next_one postcondition");
96 assert(nextAddr == limit || !isMarked(nextAddr),
97 "get_next_one postcondition");
98 return nextAddr;
99 }
101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 return (int) (diff >> _shifter);
104 }
106 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
107 size_t from_start_index,
108 HeapWord* to_start_word,
109 size_t word_num) {
110 _bm.mostly_disjoint_range_union(from_bitmap,
111 from_start_index,
112 heapWordToOffset(to_start_word),
113 word_num);
114 }
116 #ifndef PRODUCT
117 bool CMBitMapRO::covers(ReservedSpace rs) const {
118 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
119 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
120 "size inconsistency");
121 return _bmStartWord == (HeapWord*)(rs.base()) &&
122 _bmWordSize == rs.size()>>LogHeapWordSize;
123 }
124 #endif
126 void CMBitMap::clearAll() {
127 _bm.clear();
128 return;
129 }
131 void CMBitMap::markRange(MemRegion mr) {
132 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
133 assert(!mr.is_empty(), "unexpected empty region");
134 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
135 ((HeapWord *) mr.end())),
136 "markRange memory region end is not card aligned");
137 // convert address range into offset range
138 _bm.at_put_range(heapWordToOffset(mr.start()),
139 heapWordToOffset(mr.end()), true);
140 }
142 void CMBitMap::clearRange(MemRegion mr) {
143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 assert(!mr.is_empty(), "unexpected empty region");
145 // convert address range into offset range
146 _bm.at_put_range(heapWordToOffset(mr.start()),
147 heapWordToOffset(mr.end()), false);
148 }
150 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
151 HeapWord* end_addr) {
152 HeapWord* start = getNextMarkedWordAddress(addr);
153 start = MIN2(start, end_addr);
154 HeapWord* end = getNextUnmarkedWordAddress(start);
155 end = MIN2(end, end_addr);
156 assert(start <= end, "Consistency check");
157 MemRegion mr(start, end);
158 if (!mr.is_empty()) {
159 clearRange(mr);
160 }
161 return mr;
162 }
164 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
165 _base(NULL), _cm(cm)
166 #ifdef ASSERT
167 , _drain_in_progress(false)
168 , _drain_in_progress_yields(false)
169 #endif
170 {}
172 void CMMarkStack::allocate(size_t size) {
173 _base = NEW_C_HEAP_ARRAY(oop, size);
174 if (_base == NULL) {
175 vm_exit_during_initialization("Failed to allocate CM region mark stack");
176 }
177 _index = 0;
178 _capacity = (jint) size;
179 _saved_index = -1;
180 NOT_PRODUCT(_max_depth = 0);
181 }
183 CMMarkStack::~CMMarkStack() {
184 if (_base != NULL) {
185 FREE_C_HEAP_ARRAY(oop, _base);
186 }
187 }
189 void CMMarkStack::par_push(oop ptr) {
190 while (true) {
191 if (isFull()) {
192 _overflow = true;
193 return;
194 }
195 // Otherwise...
196 jint index = _index;
197 jint next_index = index+1;
198 jint res = Atomic::cmpxchg(next_index, &_index, index);
199 if (res == index) {
200 _base[index] = ptr;
201 // Note that we don't maintain this atomically. We could, but it
202 // doesn't seem necessary.
203 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
204 return;
205 }
206 // Otherwise, we need to try again.
207 }
208 }
210 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
211 while (true) {
212 if (isFull()) {
213 _overflow = true;
214 return;
215 }
216 // Otherwise...
217 jint index = _index;
218 jint next_index = index + n;
219 if (next_index > _capacity) {
220 _overflow = true;
221 return;
222 }
223 jint res = Atomic::cmpxchg(next_index, &_index, index);
224 if (res == index) {
225 for (int i = 0; i < n; i++) {
226 int ind = index + i;
227 assert(ind < _capacity, "By overflow test above.");
228 _base[ind] = ptr_arr[i];
229 }
230 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
231 return;
232 }
233 // Otherwise, we need to try again.
234 }
235 }
238 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
239 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
240 jint start = _index;
241 jint next_index = start + n;
242 if (next_index > _capacity) {
243 _overflow = true;
244 return;
245 }
246 // Otherwise.
247 _index = next_index;
248 for (int i = 0; i < n; i++) {
249 int ind = start + i;
250 assert(ind < _capacity, "By overflow test above.");
251 _base[ind] = ptr_arr[i];
252 }
253 }
256 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
257 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
258 jint index = _index;
259 if (index == 0) {
260 *n = 0;
261 return false;
262 } else {
263 int k = MIN2(max, index);
264 jint new_ind = index - k;
265 for (int j = 0; j < k; j++) {
266 ptr_arr[j] = _base[new_ind + j];
267 }
268 _index = new_ind;
269 *n = k;
270 return true;
271 }
272 }
274 CMRegionStack::CMRegionStack() : _base(NULL) {}
276 void CMRegionStack::allocate(size_t size) {
277 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
278 if (_base == NULL) {
279 vm_exit_during_initialization("Failed to allocate CM region mark stack");
280 }
281 _index = 0;
282 _capacity = (jint) size;
283 }
285 CMRegionStack::~CMRegionStack() {
286 if (_base != NULL) {
287 FREE_C_HEAP_ARRAY(oop, _base);
288 }
289 }
291 void CMRegionStack::push_lock_free(MemRegion mr) {
292 guarantee(false, "push_lock_free(): don't call this any more");
294 assert(mr.word_size() > 0, "Precondition");
295 while (true) {
296 jint index = _index;
298 if (index >= _capacity) {
299 _overflow = true;
300 return;
301 }
302 // Otherwise...
303 jint next_index = index+1;
304 jint res = Atomic::cmpxchg(next_index, &_index, index);
305 if (res == index) {
306 _base[index] = mr;
307 return;
308 }
309 // Otherwise, we need to try again.
310 }
311 }
313 // Lock-free pop of the region stack. Called during the concurrent
314 // marking / remark phases. Should only be called in tandem with
315 // other lock-free pops.
316 MemRegion CMRegionStack::pop_lock_free() {
317 guarantee(false, "pop_lock_free(): don't call this any more");
319 while (true) {
320 jint index = _index;
322 if (index == 0) {
323 return MemRegion();
324 }
325 // Otherwise...
326 jint next_index = index-1;
327 jint res = Atomic::cmpxchg(next_index, &_index, index);
328 if (res == index) {
329 MemRegion mr = _base[next_index];
330 if (mr.start() != NULL) {
331 assert(mr.end() != NULL, "invariant");
332 assert(mr.word_size() > 0, "invariant");
333 return mr;
334 } else {
335 // that entry was invalidated... let's skip it
336 assert(mr.end() == NULL, "invariant");
337 }
338 }
339 // Otherwise, we need to try again.
340 }
341 }
343 #if 0
344 // The routines that manipulate the region stack with a lock are
345 // not currently used. They should be retained, however, as a
346 // diagnostic aid.
348 void CMRegionStack::push_with_lock(MemRegion mr) {
349 assert(mr.word_size() > 0, "Precondition");
350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
352 if (isFull()) {
353 _overflow = true;
354 return;
355 }
357 _base[_index] = mr;
358 _index += 1;
359 }
361 MemRegion CMRegionStack::pop_with_lock() {
362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
364 while (true) {
365 if (_index == 0) {
366 return MemRegion();
367 }
368 _index -= 1;
370 MemRegion mr = _base[_index];
371 if (mr.start() != NULL) {
372 assert(mr.end() != NULL, "invariant");
373 assert(mr.word_size() > 0, "invariant");
374 return mr;
375 } else {
376 // that entry was invalidated... let's skip it
377 assert(mr.end() == NULL, "invariant");
378 }
379 }
380 }
381 #endif
383 bool CMRegionStack::invalidate_entries_into_cset() {
384 guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
386 bool result = false;
387 G1CollectedHeap* g1h = G1CollectedHeap::heap();
388 for (int i = 0; i < _oops_do_bound; ++i) {
389 MemRegion mr = _base[i];
390 if (mr.start() != NULL) {
391 assert(mr.end() != NULL, "invariant");
392 assert(mr.word_size() > 0, "invariant");
393 HeapRegion* hr = g1h->heap_region_containing(mr.start());
394 assert(hr != NULL, "invariant");
395 if (hr->in_collection_set()) {
396 // The region points into the collection set
397 _base[i] = MemRegion();
398 result = true;
399 }
400 } else {
401 // that entry was invalidated... let's skip it
402 assert(mr.end() == NULL, "invariant");
403 }
404 }
405 return result;
406 }
408 template<class OopClosureClass>
409 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
410 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
411 || SafepointSynchronize::is_at_safepoint(),
412 "Drain recursion must be yield-safe.");
413 bool res = true;
414 debug_only(_drain_in_progress = true);
415 debug_only(_drain_in_progress_yields = yield_after);
416 while (!isEmpty()) {
417 oop newOop = pop();
418 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
419 assert(newOop->is_oop(), "Expected an oop");
420 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
421 "only grey objects on this stack");
422 newOop->oop_iterate(cl);
423 if (yield_after && _cm->do_yield_check()) {
424 res = false;
425 break;
426 }
427 }
428 debug_only(_drain_in_progress = false);
429 return res;
430 }
432 void CMMarkStack::note_start_of_gc() {
433 assert(_saved_index == -1,
434 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
435 _saved_index = _index;
436 }
438 void CMMarkStack::note_end_of_gc() {
439 // This is intentionally a guarantee, instead of an assert. If we
440 // accidentally add something to the mark stack during GC, it
441 // will be a correctness issue so it's better if we crash. we'll
442 // only check this once per GC anyway, so it won't be a performance
443 // issue in any way.
444 guarantee(_saved_index == _index,
445 err_msg("saved index: %d index: %d", _saved_index, _index));
446 _saved_index = -1;
447 }
449 void CMMarkStack::oops_do(OopClosure* f) {
450 assert(_saved_index == _index,
451 err_msg("saved index: %d index: %d", _saved_index, _index));
452 for (int i = 0; i < _index; i += 1) {
453 f->do_oop(&_base[i]);
454 }
455 }
457 bool ConcurrentMark::not_yet_marked(oop obj) const {
458 return (_g1h->is_obj_ill(obj)
459 || (_g1h->is_in_permanent(obj)
460 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
461 }
463 CMRootRegions::CMRootRegions() :
464 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
465 _should_abort(false), _next_survivor(NULL) { }
467 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
468 _young_list = g1h->young_list();
469 _cm = cm;
470 }
472 void CMRootRegions::prepare_for_scan() {
473 assert(!scan_in_progress(), "pre-condition");
475 // Currently, only survivors can be root regions.
476 assert(_next_survivor == NULL, "pre-condition");
477 _next_survivor = _young_list->first_survivor_region();
478 _scan_in_progress = (_next_survivor != NULL);
479 _should_abort = false;
480 }
482 HeapRegion* CMRootRegions::claim_next() {
483 if (_should_abort) {
484 // If someone has set the should_abort flag, we return NULL to
485 // force the caller to bail out of their loop.
486 return NULL;
487 }
489 // Currently, only survivors can be root regions.
490 HeapRegion* res = _next_survivor;
491 if (res != NULL) {
492 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
493 // Read it again in case it changed while we were waiting for the lock.
494 res = _next_survivor;
495 if (res != NULL) {
496 if (res == _young_list->last_survivor_region()) {
497 // We just claimed the last survivor so store NULL to indicate
498 // that we're done.
499 _next_survivor = NULL;
500 } else {
501 _next_survivor = res->get_next_young_region();
502 }
503 } else {
504 // Someone else claimed the last survivor while we were trying
505 // to take the lock so nothing else to do.
506 }
507 }
508 assert(res == NULL || res->is_survivor(), "post-condition");
510 return res;
511 }
513 void CMRootRegions::scan_finished() {
514 assert(scan_in_progress(), "pre-condition");
516 // Currently, only survivors can be root regions.
517 if (!_should_abort) {
518 assert(_next_survivor == NULL, "we should have claimed all survivors");
519 }
520 _next_survivor = NULL;
522 {
523 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
524 _scan_in_progress = false;
525 RootRegionScan_lock->notify_all();
526 }
527 }
529 bool CMRootRegions::wait_until_scan_finished() {
530 if (!scan_in_progress()) return false;
532 {
533 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
534 while (scan_in_progress()) {
535 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
536 }
537 }
538 return true;
539 }
541 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
542 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
543 #endif // _MSC_VER
545 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
546 return MAX2((n_par_threads + 2) / 4, 1U);
547 }
549 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
550 int max_regions) :
551 _markBitMap1(rs, MinObjAlignment - 1),
552 _markBitMap2(rs, MinObjAlignment - 1),
554 _parallel_marking_threads(0),
555 _max_parallel_marking_threads(0),
556 _sleep_factor(0.0),
557 _marking_task_overhead(1.0),
558 _cleanup_sleep_factor(0.0),
559 _cleanup_task_overhead(1.0),
560 _cleanup_list("Cleanup List"),
561 _region_bm(max_regions, false /* in_resource_area*/),
562 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
563 CardTableModRefBS::card_shift,
564 false /* in_resource_area*/),
566 _prevMarkBitMap(&_markBitMap1),
567 _nextMarkBitMap(&_markBitMap2),
568 _at_least_one_mark_complete(false),
570 _markStack(this),
571 _regionStack(),
572 // _finger set in set_non_marking_state
574 _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
575 // _active_tasks set in set_non_marking_state
576 // _tasks set inside the constructor
577 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
578 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
580 _has_overflown(false),
581 _concurrent(false),
582 _has_aborted(false),
583 _restart_for_overflow(false),
584 _concurrent_marking_in_progress(false),
585 _should_gray_objects(false),
587 // _verbose_level set below
589 _init_times(),
590 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
591 _cleanup_times(),
592 _total_counting_time(0.0),
593 _total_rs_scrub_time(0.0),
595 _parallel_workers(NULL),
597 _count_card_bitmaps(NULL),
598 _count_marked_bytes(NULL) {
599 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
600 if (verbose_level < no_verbose) {
601 verbose_level = no_verbose;
602 }
603 if (verbose_level > high_verbose) {
604 verbose_level = high_verbose;
605 }
606 _verbose_level = verbose_level;
608 if (verbose_low()) {
609 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
610 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
611 }
613 _markStack.allocate(MarkStackSize);
614 _regionStack.allocate(G1MarkRegionStackSize);
616 // Create & start a ConcurrentMark thread.
617 _cmThread = new ConcurrentMarkThread(this);
618 assert(cmThread() != NULL, "CM Thread should have been created");
619 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
621 _g1h = G1CollectedHeap::heap();
622 assert(CGC_lock != NULL, "Where's the CGC_lock?");
623 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
624 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
626 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
627 satb_qs.set_buffer_size(G1SATBBufferSize);
629 _root_regions.init(_g1h, this);
631 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
632 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
634 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_task_num);
635 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
637 BitMap::idx_t card_bm_size = _card_bm.size();
639 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
640 _active_tasks = _max_task_num;
641 for (int i = 0; i < (int) _max_task_num; ++i) {
642 CMTaskQueue* task_queue = new CMTaskQueue();
643 task_queue->initialize();
644 _task_queues->register_queue(i, task_queue);
646 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
647 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
649 _tasks[i] = new CMTask(i, this,
650 _count_marked_bytes[i],
651 &_count_card_bitmaps[i],
652 task_queue, _task_queues);
654 _accum_task_vtime[i] = 0.0;
655 }
657 // Calculate the card number for the bottom of the heap. Used
658 // in biasing indexes into the accounting card bitmaps.
659 _heap_bottom_card_num =
660 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
661 CardTableModRefBS::card_shift);
663 // Clear all the liveness counting data
664 clear_all_count_data();
666 if (ConcGCThreads > ParallelGCThreads) {
667 vm_exit_during_initialization("Can't have more ConcGCThreads "
668 "than ParallelGCThreads.");
669 }
670 if (ParallelGCThreads == 0) {
671 // if we are not running with any parallel GC threads we will not
672 // spawn any marking threads either
673 _parallel_marking_threads = 0;
674 _max_parallel_marking_threads = 0;
675 _sleep_factor = 0.0;
676 _marking_task_overhead = 1.0;
677 } else {
678 if (ConcGCThreads > 0) {
679 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
680 // if both are set
682 _parallel_marking_threads = (uint) ConcGCThreads;
683 _max_parallel_marking_threads = _parallel_marking_threads;
684 _sleep_factor = 0.0;
685 _marking_task_overhead = 1.0;
686 } else if (G1MarkingOverheadPercent > 0) {
687 // we will calculate the number of parallel marking threads
688 // based on a target overhead with respect to the soft real-time
689 // goal
691 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
692 double overall_cm_overhead =
693 (double) MaxGCPauseMillis * marking_overhead /
694 (double) GCPauseIntervalMillis;
695 double cpu_ratio = 1.0 / (double) os::processor_count();
696 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
697 double marking_task_overhead =
698 overall_cm_overhead / marking_thread_num *
699 (double) os::processor_count();
700 double sleep_factor =
701 (1.0 - marking_task_overhead) / marking_task_overhead;
703 _parallel_marking_threads = (uint) marking_thread_num;
704 _max_parallel_marking_threads = _parallel_marking_threads;
705 _sleep_factor = sleep_factor;
706 _marking_task_overhead = marking_task_overhead;
707 } else {
708 _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
709 _max_parallel_marking_threads = _parallel_marking_threads;
710 _sleep_factor = 0.0;
711 _marking_task_overhead = 1.0;
712 }
714 if (parallel_marking_threads() > 1) {
715 _cleanup_task_overhead = 1.0;
716 } else {
717 _cleanup_task_overhead = marking_task_overhead();
718 }
719 _cleanup_sleep_factor =
720 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
722 #if 0
723 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
724 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
725 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
726 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
727 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
728 #endif
730 guarantee(parallel_marking_threads() > 0, "peace of mind");
731 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
732 _max_parallel_marking_threads, false, true);
733 if (_parallel_workers == NULL) {
734 vm_exit_during_initialization("Failed necessary allocation.");
735 } else {
736 _parallel_workers->initialize_workers();
737 }
738 }
740 // so that the call below can read a sensible value
741 _heap_start = (HeapWord*) rs.base();
742 set_non_marking_state();
743 }
745 void ConcurrentMark::update_g1_committed(bool force) {
746 // If concurrent marking is not in progress, then we do not need to
747 // update _heap_end. This has a subtle and important
748 // side-effect. Imagine that two evacuation pauses happen between
749 // marking completion and remark. The first one can grow the
750 // heap (hence now the finger is below the heap end). Then, the
751 // second one could unnecessarily push regions on the region
752 // stack. This causes the invariant that the region stack is empty
753 // at the beginning of remark to be false. By ensuring that we do
754 // not observe heap expansions after marking is complete, then we do
755 // not have this problem.
756 if (!concurrent_marking_in_progress() && !force) return;
758 MemRegion committed = _g1h->g1_committed();
759 assert(committed.start() == _heap_start, "start shouldn't change");
760 HeapWord* new_end = committed.end();
761 if (new_end > _heap_end) {
762 // The heap has been expanded.
764 _heap_end = new_end;
765 }
766 // Notice that the heap can also shrink. However, this only happens
767 // during a Full GC (at least currently) and the entire marking
768 // phase will bail out and the task will not be restarted. So, let's
769 // do nothing.
770 }
772 void ConcurrentMark::reset() {
773 // Starting values for these two. This should be called in a STW
774 // phase. CM will be notified of any future g1_committed expansions
775 // will be at the end of evacuation pauses, when tasks are
776 // inactive.
777 MemRegion committed = _g1h->g1_committed();
778 _heap_start = committed.start();
779 _heap_end = committed.end();
781 // Separated the asserts so that we know which one fires.
782 assert(_heap_start != NULL, "heap bounds should look ok");
783 assert(_heap_end != NULL, "heap bounds should look ok");
784 assert(_heap_start < _heap_end, "heap bounds should look ok");
786 // reset all the marking data structures and any necessary flags
787 clear_marking_state();
789 if (verbose_low()) {
790 gclog_or_tty->print_cr("[global] resetting");
791 }
793 // We do reset all of them, since different phases will use
794 // different number of active threads. So, it's easiest to have all
795 // of them ready.
796 for (int i = 0; i < (int) _max_task_num; ++i) {
797 _tasks[i]->reset(_nextMarkBitMap);
798 }
800 // we need this to make sure that the flag is on during the evac
801 // pause with initial mark piggy-backed
802 set_concurrent_marking_in_progress();
803 }
805 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
806 assert(active_tasks <= _max_task_num, "we should not have more");
808 _active_tasks = active_tasks;
809 // Need to update the three data structures below according to the
810 // number of active threads for this phase.
811 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
812 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
813 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
815 _concurrent = concurrent;
816 // We propagate this to all tasks, not just the active ones.
817 for (int i = 0; i < (int) _max_task_num; ++i)
818 _tasks[i]->set_concurrent(concurrent);
820 if (concurrent) {
821 set_concurrent_marking_in_progress();
822 } else {
823 // We currently assume that the concurrent flag has been set to
824 // false before we start remark. At this point we should also be
825 // in a STW phase.
826 assert(!concurrent_marking_in_progress(), "invariant");
827 assert(_finger == _heap_end, "only way to get here");
828 update_g1_committed(true);
829 }
830 }
832 void ConcurrentMark::set_non_marking_state() {
833 // We set the global marking state to some default values when we're
834 // not doing marking.
835 clear_marking_state();
836 _active_tasks = 0;
837 clear_concurrent_marking_in_progress();
838 }
840 ConcurrentMark::~ConcurrentMark() {
841 // The ConcurrentMark instance is never freed.
842 ShouldNotReachHere();
843 }
845 void ConcurrentMark::clearNextBitmap() {
846 G1CollectedHeap* g1h = G1CollectedHeap::heap();
847 G1CollectorPolicy* g1p = g1h->g1_policy();
849 // Make sure that the concurrent mark thread looks to still be in
850 // the current cycle.
851 guarantee(cmThread()->during_cycle(), "invariant");
853 // We are finishing up the current cycle by clearing the next
854 // marking bitmap and getting it ready for the next cycle. During
855 // this time no other cycle can start. So, let's make sure that this
856 // is the case.
857 guarantee(!g1h->mark_in_progress(), "invariant");
859 // clear the mark bitmap (no grey objects to start with).
860 // We need to do this in chunks and offer to yield in between
861 // each chunk.
862 HeapWord* start = _nextMarkBitMap->startWord();
863 HeapWord* end = _nextMarkBitMap->endWord();
864 HeapWord* cur = start;
865 size_t chunkSize = M;
866 while (cur < end) {
867 HeapWord* next = cur + chunkSize;
868 if (next > end) {
869 next = end;
870 }
871 MemRegion mr(cur,next);
872 _nextMarkBitMap->clearRange(mr);
873 cur = next;
874 do_yield_check();
876 // Repeat the asserts from above. We'll do them as asserts here to
877 // minimize their overhead on the product. However, we'll have
878 // them as guarantees at the beginning / end of the bitmap
879 // clearing to get some checking in the product.
880 assert(cmThread()->during_cycle(), "invariant");
881 assert(!g1h->mark_in_progress(), "invariant");
882 }
884 // Clear the liveness counting data
885 clear_all_count_data();
887 // Repeat the asserts from above.
888 guarantee(cmThread()->during_cycle(), "invariant");
889 guarantee(!g1h->mark_in_progress(), "invariant");
890 }
892 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
893 public:
894 bool doHeapRegion(HeapRegion* r) {
895 if (!r->continuesHumongous()) {
896 r->note_start_of_marking();
897 }
898 return false;
899 }
900 };
902 void ConcurrentMark::checkpointRootsInitialPre() {
903 G1CollectedHeap* g1h = G1CollectedHeap::heap();
904 G1CollectorPolicy* g1p = g1h->g1_policy();
906 _has_aborted = false;
908 #ifndef PRODUCT
909 if (G1PrintReachableAtInitialMark) {
910 print_reachable("at-cycle-start",
911 VerifyOption_G1UsePrevMarking, true /* all */);
912 }
913 #endif
915 // Initialise marking structures. This has to be done in a STW phase.
916 reset();
918 // For each region note start of marking.
919 NoteStartOfMarkHRClosure startcl;
920 g1h->heap_region_iterate(&startcl);
921 }
924 void ConcurrentMark::checkpointRootsInitialPost() {
925 G1CollectedHeap* g1h = G1CollectedHeap::heap();
927 // If we force an overflow during remark, the remark operation will
928 // actually abort and we'll restart concurrent marking. If we always
929 // force an oveflow during remark we'll never actually complete the
930 // marking phase. So, we initilize this here, at the start of the
931 // cycle, so that at the remaining overflow number will decrease at
932 // every remark and we'll eventually not need to cause one.
933 force_overflow_stw()->init();
935 // Start Concurrent Marking weak-reference discovery.
936 ReferenceProcessor* rp = g1h->ref_processor_cm();
937 // enable ("weak") refs discovery
938 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
939 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
941 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
942 // This is the start of the marking cycle, we're expected all
943 // threads to have SATB queues with active set to false.
944 satb_mq_set.set_active_all_threads(true, /* new active value */
945 false /* expected_active */);
947 _root_regions.prepare_for_scan();
949 // update_g1_committed() will be called at the end of an evac pause
950 // when marking is on. So, it's also called at the end of the
951 // initial-mark pause to update the heap end, if the heap expands
952 // during it. No need to call it here.
953 }
955 /*
956 * Notice that in the next two methods, we actually leave the STS
957 * during the barrier sync and join it immediately afterwards. If we
958 * do not do this, the following deadlock can occur: one thread could
959 * be in the barrier sync code, waiting for the other thread to also
960 * sync up, whereas another one could be trying to yield, while also
961 * waiting for the other threads to sync up too.
962 *
963 * Note, however, that this code is also used during remark and in
964 * this case we should not attempt to leave / enter the STS, otherwise
965 * we'll either hit an asseert (debug / fastdebug) or deadlock
966 * (product). So we should only leave / enter the STS if we are
967 * operating concurrently.
968 *
969 * Because the thread that does the sync barrier has left the STS, it
970 * is possible to be suspended for a Full GC or an evacuation pause
971 * could occur. This is actually safe, since the entering the sync
972 * barrier is one of the last things do_marking_step() does, and it
973 * doesn't manipulate any data structures afterwards.
974 */
976 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
977 if (verbose_low()) {
978 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
979 }
981 if (concurrent()) {
982 ConcurrentGCThread::stsLeave();
983 }
984 _first_overflow_barrier_sync.enter();
985 if (concurrent()) {
986 ConcurrentGCThread::stsJoin();
987 }
988 // at this point everyone should have synced up and not be doing any
989 // more work
991 if (verbose_low()) {
992 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
993 }
995 // let task 0 do this
996 if (task_num == 0) {
997 // task 0 is responsible for clearing the global data structures
998 // We should be here because of an overflow. During STW we should
999 // not clear the overflow flag since we rely on it being true when
1000 // we exit this method to abort the pause and restart concurent
1001 // marking.
1002 clear_marking_state(concurrent() /* clear_overflow */);
1003 force_overflow()->update();
1005 if (PrintGC) {
1006 gclog_or_tty->date_stamp(PrintGCDateStamps);
1007 gclog_or_tty->stamp(PrintGCTimeStamps);
1008 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1009 }
1010 }
1012 // after this, each task should reset its own data structures then
1013 // then go into the second barrier
1014 }
1016 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
1017 if (verbose_low()) {
1018 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
1019 }
1021 if (concurrent()) {
1022 ConcurrentGCThread::stsLeave();
1023 }
1024 _second_overflow_barrier_sync.enter();
1025 if (concurrent()) {
1026 ConcurrentGCThread::stsJoin();
1027 }
1028 // at this point everything should be re-initialised and ready to go
1030 if (verbose_low()) {
1031 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
1032 }
1033 }
1035 #ifndef PRODUCT
1036 void ForceOverflowSettings::init() {
1037 _num_remaining = G1ConcMarkForceOverflow;
1038 _force = false;
1039 update();
1040 }
1042 void ForceOverflowSettings::update() {
1043 if (_num_remaining > 0) {
1044 _num_remaining -= 1;
1045 _force = true;
1046 } else {
1047 _force = false;
1048 }
1049 }
1051 bool ForceOverflowSettings::should_force() {
1052 if (_force) {
1053 _force = false;
1054 return true;
1055 } else {
1056 return false;
1057 }
1058 }
1059 #endif // !PRODUCT
1061 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1062 guarantee(false, "grayRegionIfNecessary(): don't call this any more");
1064 // The objects on the region have already been marked "in bulk" by
1065 // the caller. We only need to decide whether to push the region on
1066 // the region stack or not.
1068 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1069 // We're done with marking and waiting for remark. We do not need to
1070 // push anything else on the region stack.
1071 return;
1072 }
1074 HeapWord* finger = _finger;
1076 if (verbose_low()) {
1077 gclog_or_tty->print_cr("[global] attempting to push "
1078 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1079 PTR_FORMAT, mr.start(), mr.end(), finger);
1080 }
1082 if (mr.start() < finger) {
1083 // The finger is always heap region aligned and it is not possible
1084 // for mr to span heap regions.
1085 assert(mr.end() <= finger, "invariant");
1087 // Separated the asserts so that we know which one fires.
1088 assert(mr.start() <= mr.end(),
1089 "region boundaries should fall within the committed space");
1090 assert(_heap_start <= mr.start(),
1091 "region boundaries should fall within the committed space");
1092 assert(mr.end() <= _heap_end,
1093 "region boundaries should fall within the committed space");
1094 if (verbose_low()) {
1095 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1096 "below the finger, pushing it",
1097 mr.start(), mr.end());
1098 }
1100 if (!region_stack_push_lock_free(mr)) {
1101 if (verbose_low()) {
1102 gclog_or_tty->print_cr("[global] region stack has overflown.");
1103 }
1104 }
1105 }
1106 }
1108 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1109 guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
1111 // The object is not marked by the caller. We need to at least mark
1112 // it and maybe push in on the stack.
1114 HeapWord* addr = (HeapWord*)p;
1115 if (!_nextMarkBitMap->isMarked(addr)) {
1116 // We definitely need to mark it, irrespective whether we bail out
1117 // because we're done with marking.
1118 if (_nextMarkBitMap->parMark(addr)) {
1119 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1120 // If we're done with concurrent marking and we're waiting for
1121 // remark, then we're not pushing anything on the stack.
1122 return;
1123 }
1125 // No OrderAccess:store_load() is needed. It is implicit in the
1126 // CAS done in parMark(addr) above
1127 HeapWord* finger = _finger;
1129 if (addr < finger) {
1130 if (!mark_stack_push(oop(addr))) {
1131 if (verbose_low()) {
1132 gclog_or_tty->print_cr("[global] global stack overflow "
1133 "during parMark");
1134 }
1135 }
1136 }
1137 }
1138 }
1139 }
1141 class CMConcurrentMarkingTask: public AbstractGangTask {
1142 private:
1143 ConcurrentMark* _cm;
1144 ConcurrentMarkThread* _cmt;
1146 public:
1147 void work(uint worker_id) {
1148 assert(Thread::current()->is_ConcurrentGC_thread(),
1149 "this should only be done by a conc GC thread");
1150 ResourceMark rm;
1152 double start_vtime = os::elapsedVTime();
1154 ConcurrentGCThread::stsJoin();
1156 assert(worker_id < _cm->active_tasks(), "invariant");
1157 CMTask* the_task = _cm->task(worker_id);
1158 the_task->record_start_time();
1159 if (!_cm->has_aborted()) {
1160 do {
1161 double start_vtime_sec = os::elapsedVTime();
1162 double start_time_sec = os::elapsedTime();
1163 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1165 the_task->do_marking_step(mark_step_duration_ms,
1166 true /* do_stealing */,
1167 true /* do_termination */);
1169 double end_time_sec = os::elapsedTime();
1170 double end_vtime_sec = os::elapsedVTime();
1171 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1172 double elapsed_time_sec = end_time_sec - start_time_sec;
1173 _cm->clear_has_overflown();
1175 bool ret = _cm->do_yield_check(worker_id);
1177 jlong sleep_time_ms;
1178 if (!_cm->has_aborted() && the_task->has_aborted()) {
1179 sleep_time_ms =
1180 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1181 ConcurrentGCThread::stsLeave();
1182 os::sleep(Thread::current(), sleep_time_ms, false);
1183 ConcurrentGCThread::stsJoin();
1184 }
1185 double end_time2_sec = os::elapsedTime();
1186 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1188 #if 0
1189 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1190 "overhead %1.4lf",
1191 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1192 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1193 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1194 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1195 #endif
1196 } while (!_cm->has_aborted() && the_task->has_aborted());
1197 }
1198 the_task->record_end_time();
1199 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1201 ConcurrentGCThread::stsLeave();
1203 double end_vtime = os::elapsedVTime();
1204 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1205 }
1207 CMConcurrentMarkingTask(ConcurrentMark* cm,
1208 ConcurrentMarkThread* cmt) :
1209 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1211 ~CMConcurrentMarkingTask() { }
1212 };
1214 // Calculates the number of active workers for a concurrent
1215 // phase.
1216 uint ConcurrentMark::calc_parallel_marking_threads() {
1217 if (G1CollectedHeap::use_parallel_gc_threads()) {
1218 uint n_conc_workers = 0;
1219 if (!UseDynamicNumberOfGCThreads ||
1220 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1221 !ForceDynamicNumberOfGCThreads)) {
1222 n_conc_workers = max_parallel_marking_threads();
1223 } else {
1224 n_conc_workers =
1225 AdaptiveSizePolicy::calc_default_active_workers(
1226 max_parallel_marking_threads(),
1227 1, /* Minimum workers */
1228 parallel_marking_threads(),
1229 Threads::number_of_non_daemon_threads());
1230 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1231 // that scaling has already gone into "_max_parallel_marking_threads".
1232 }
1233 assert(n_conc_workers > 0, "Always need at least 1");
1234 return n_conc_workers;
1235 }
1236 // If we are not running with any parallel GC threads we will not
1237 // have spawned any marking threads either. Hence the number of
1238 // concurrent workers should be 0.
1239 return 0;
1240 }
1242 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1243 // Currently, only survivors can be root regions.
1244 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1245 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1247 const uintx interval = PrefetchScanIntervalInBytes;
1248 HeapWord* curr = hr->bottom();
1249 const HeapWord* end = hr->top();
1250 while (curr < end) {
1251 Prefetch::read(curr, interval);
1252 oop obj = oop(curr);
1253 int size = obj->oop_iterate(&cl);
1254 assert(size == obj->size(), "sanity");
1255 curr += size;
1256 }
1257 }
1259 class CMRootRegionScanTask : public AbstractGangTask {
1260 private:
1261 ConcurrentMark* _cm;
1263 public:
1264 CMRootRegionScanTask(ConcurrentMark* cm) :
1265 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1267 void work(uint worker_id) {
1268 assert(Thread::current()->is_ConcurrentGC_thread(),
1269 "this should only be done by a conc GC thread");
1271 CMRootRegions* root_regions = _cm->root_regions();
1272 HeapRegion* hr = root_regions->claim_next();
1273 while (hr != NULL) {
1274 _cm->scanRootRegion(hr, worker_id);
1275 hr = root_regions->claim_next();
1276 }
1277 }
1278 };
1280 void ConcurrentMark::scanRootRegions() {
1281 // scan_in_progress() will have been set to true only if there was
1282 // at least one root region to scan. So, if it's false, we
1283 // should not attempt to do any further work.
1284 if (root_regions()->scan_in_progress()) {
1285 _parallel_marking_threads = calc_parallel_marking_threads();
1286 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1287 "Maximum number of marking threads exceeded");
1288 uint active_workers = MAX2(1U, parallel_marking_threads());
1290 CMRootRegionScanTask task(this);
1291 if (parallel_marking_threads() > 0) {
1292 _parallel_workers->set_active_workers((int) active_workers);
1293 _parallel_workers->run_task(&task);
1294 } else {
1295 task.work(0);
1296 }
1298 // It's possible that has_aborted() is true here without actually
1299 // aborting the survivor scan earlier. This is OK as it's
1300 // mainly used for sanity checking.
1301 root_regions()->scan_finished();
1302 }
1303 }
1305 void ConcurrentMark::markFromRoots() {
1306 // we might be tempted to assert that:
1307 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1308 // "inconsistent argument?");
1309 // However that wouldn't be right, because it's possible that
1310 // a safepoint is indeed in progress as a younger generation
1311 // stop-the-world GC happens even as we mark in this generation.
1313 _restart_for_overflow = false;
1314 force_overflow_conc()->init();
1316 // _g1h has _n_par_threads
1317 _parallel_marking_threads = calc_parallel_marking_threads();
1318 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1319 "Maximum number of marking threads exceeded");
1321 uint active_workers = MAX2(1U, parallel_marking_threads());
1323 // Parallel task terminator is set in "set_phase()"
1324 set_phase(active_workers, true /* concurrent */);
1326 CMConcurrentMarkingTask markingTask(this, cmThread());
1327 if (parallel_marking_threads() > 0) {
1328 _parallel_workers->set_active_workers((int)active_workers);
1329 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1330 // and the decisions on that MT processing is made elsewhere.
1331 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1332 _parallel_workers->run_task(&markingTask);
1333 } else {
1334 markingTask.work(0);
1335 }
1336 print_stats();
1337 }
1339 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1340 // world is stopped at this checkpoint
1341 assert(SafepointSynchronize::is_at_safepoint(),
1342 "world should be stopped");
1344 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1346 // If a full collection has happened, we shouldn't do this.
1347 if (has_aborted()) {
1348 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1349 return;
1350 }
1352 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1354 if (VerifyDuringGC) {
1355 HandleMark hm; // handle scope
1356 gclog_or_tty->print(" VerifyDuringGC:(before)");
1357 Universe::heap()->prepare_for_verify();
1358 Universe::verify(/* allow dirty */ true,
1359 /* silent */ false,
1360 /* option */ VerifyOption_G1UsePrevMarking);
1361 }
1363 G1CollectorPolicy* g1p = g1h->g1_policy();
1364 g1p->record_concurrent_mark_remark_start();
1366 double start = os::elapsedTime();
1368 checkpointRootsFinalWork();
1370 double mark_work_end = os::elapsedTime();
1372 weakRefsWork(clear_all_soft_refs);
1374 if (has_overflown()) {
1375 // Oops. We overflowed. Restart concurrent marking.
1376 _restart_for_overflow = true;
1377 // Clear the flag. We do not need it any more.
1378 clear_has_overflown();
1379 if (G1TraceMarkStackOverflow) {
1380 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1381 }
1382 } else {
1383 // Aggregate the per-task counting data that we have accumulated
1384 // while marking.
1385 aggregate_count_data();
1387 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1388 // We're done with marking.
1389 // This is the end of the marking cycle, we're expected all
1390 // threads to have SATB queues with active set to true.
1391 satb_mq_set.set_active_all_threads(false, /* new active value */
1392 true /* expected_active */);
1394 if (VerifyDuringGC) {
1395 HandleMark hm; // handle scope
1396 gclog_or_tty->print(" VerifyDuringGC:(after)");
1397 Universe::heap()->prepare_for_verify();
1398 Universe::verify(/* allow dirty */ true,
1399 /* silent */ false,
1400 /* option */ VerifyOption_G1UseNextMarking);
1401 }
1402 assert(!restart_for_overflow(), "sanity");
1403 }
1405 // Reset the marking state if marking completed
1406 if (!restart_for_overflow()) {
1407 set_non_marking_state();
1408 }
1410 #if VERIFY_OBJS_PROCESSED
1411 _scan_obj_cl.objs_processed = 0;
1412 ThreadLocalObjQueue::objs_enqueued = 0;
1413 #endif
1415 // Statistics
1416 double now = os::elapsedTime();
1417 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1418 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1419 _remark_times.add((now - start) * 1000.0);
1421 g1p->record_concurrent_mark_remark_end();
1422 }
1424 // Used to calculate the # live objects per region
1425 // for verification purposes
1426 class CalcLiveObjectsClosure: public HeapRegionClosure {
1428 CMBitMapRO* _bm;
1429 ConcurrentMark* _cm;
1430 BitMap* _region_bm;
1431 BitMap* _card_bm;
1433 // Debugging
1434 size_t _tot_words_done;
1435 size_t _tot_live;
1436 size_t _tot_used;
1438 size_t _region_marked_bytes;
1440 intptr_t _bottom_card_num;
1442 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1443 assert(start_card_num <= last_card_num, "sanity");
1444 BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1445 BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1447 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1448 _card_bm->par_at_put(i, 1);
1449 }
1450 }
1452 public:
1453 CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1454 BitMap* region_bm, BitMap* card_bm) :
1455 _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1456 _region_marked_bytes(0), _tot_words_done(0),
1457 _tot_live(0), _tot_used(0),
1458 _bottom_card_num(cm->heap_bottom_card_num()) { }
1460 // It takes a region that's not empty (i.e., it has at least one
1461 // live object in it and sets its corresponding bit on the region
1462 // bitmap to 1. If the region is "starts humongous" it will also set
1463 // to 1 the bits on the region bitmap that correspond to its
1464 // associated "continues humongous" regions.
1465 void set_bit_for_region(HeapRegion* hr) {
1466 assert(!hr->continuesHumongous(), "should have filtered those out");
1468 size_t index = hr->hrs_index();
1469 if (!hr->startsHumongous()) {
1470 // Normal (non-humongous) case: just set the bit.
1471 _region_bm->par_at_put((BitMap::idx_t) index, true);
1472 } else {
1473 // Starts humongous case: calculate how many regions are part of
1474 // this humongous region and then set the bit range.
1475 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1476 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1477 size_t end_index = last_hr->hrs_index() + 1;
1478 _region_bm->par_at_put_range((BitMap::idx_t) index,
1479 (BitMap::idx_t) end_index, true);
1480 }
1481 }
1483 bool doHeapRegion(HeapRegion* hr) {
1485 if (hr->continuesHumongous()) {
1486 // We will ignore these here and process them when their
1487 // associated "starts humongous" region is processed (see
1488 // set_bit_for_heap_region()). Note that we cannot rely on their
1489 // associated "starts humongous" region to have their bit set to
1490 // 1 since, due to the region chunking in the parallel region
1491 // iteration, a "continues humongous" region might be visited
1492 // before its associated "starts humongous".
1493 return false;
1494 }
1496 HeapWord* nextTop = hr->next_top_at_mark_start();
1497 HeapWord* start = hr->bottom();
1499 assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1500 err_msg("Preconditions not met - "
1501 "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1502 start, nextTop, hr->end()));
1504 // Record the number of word's we'll examine.
1505 size_t words_done = (nextTop - start);
1507 // Find the first marked object at or after "start".
1508 start = _bm->getNextMarkedWordAddress(start, nextTop);
1510 size_t marked_bytes = 0;
1512 // Below, the term "card num" means the result of shifting an address
1513 // by the card shift -- address 0 corresponds to card number 0. One
1514 // must subtract the card num of the bottom of the heap to obtain a
1515 // card table index.
1517 // The first card num of the sequence of live cards currently being
1518 // constructed. -1 ==> no sequence.
1519 intptr_t start_card_num = -1;
1521 // The last card num of the sequence of live cards currently being
1522 // constructed. -1 ==> no sequence.
1523 intptr_t last_card_num = -1;
1525 while (start < nextTop) {
1526 oop obj = oop(start);
1527 int obj_sz = obj->size();
1529 // The card num of the start of the current object.
1530 intptr_t obj_card_num =
1531 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1532 HeapWord* obj_last = start + obj_sz - 1;
1533 intptr_t obj_last_card_num =
1534 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1536 if (obj_card_num != last_card_num) {
1537 if (start_card_num == -1) {
1538 assert(last_card_num == -1, "Both or neither.");
1539 start_card_num = obj_card_num;
1540 } else {
1541 assert(last_card_num != -1, "Both or neither.");
1542 assert(obj_card_num >= last_card_num, "Inv");
1543 if ((obj_card_num - last_card_num) > 1) {
1544 // Mark the last run, and start a new one.
1545 mark_card_num_range(start_card_num, last_card_num);
1546 start_card_num = obj_card_num;
1547 }
1548 }
1549 }
1550 // In any case, we set the last card num.
1551 last_card_num = obj_last_card_num;
1553 marked_bytes += (size_t)obj_sz * HeapWordSize;
1555 // Find the next marked object after this one.
1556 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1557 }
1559 // Handle the last range, if any.
1560 if (start_card_num != -1) {
1561 mark_card_num_range(start_card_num, last_card_num);
1562 }
1564 // Mark the allocated-since-marking portion...
1565 HeapWord* top = hr->top();
1566 if (nextTop < top) {
1567 start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1568 last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1570 mark_card_num_range(start_card_num, last_card_num);
1572 // This definitely means the region has live objects.
1573 set_bit_for_region(hr);
1574 }
1576 // Update the live region bitmap.
1577 if (marked_bytes > 0) {
1578 set_bit_for_region(hr);
1579 }
1581 // Set the marked bytes for the current region so that
1582 // it can be queried by a calling verificiation routine
1583 _region_marked_bytes = marked_bytes;
1585 _tot_live += hr->next_live_bytes();
1586 _tot_used += hr->used();
1587 _tot_words_done = words_done;
1589 return false;
1590 }
1592 size_t region_marked_bytes() const { return _region_marked_bytes; }
1594 // Debugging
1595 size_t tot_words_done() const { return _tot_words_done; }
1596 size_t tot_live() const { return _tot_live; }
1597 size_t tot_used() const { return _tot_used; }
1598 };
1600 // Heap region closure used for verifying the counting data
1601 // that was accumulated concurrently and aggregated during
1602 // the remark pause. This closure is applied to the heap
1603 // regions during the STW cleanup pause.
1605 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1606 ConcurrentMark* _cm;
1607 CalcLiveObjectsClosure _calc_cl;
1608 BitMap* _region_bm; // Region BM to be verified
1609 BitMap* _card_bm; // Card BM to be verified
1610 bool _verbose; // verbose output?
1612 BitMap* _exp_region_bm; // Expected Region BM values
1613 BitMap* _exp_card_bm; // Expected card BM values
1615 int _failures;
1617 public:
1618 VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1619 BitMap* region_bm,
1620 BitMap* card_bm,
1621 BitMap* exp_region_bm,
1622 BitMap* exp_card_bm,
1623 bool verbose) :
1624 _cm(cm),
1625 _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1626 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1627 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1628 _failures(0) { }
1630 int failures() const { return _failures; }
1632 bool doHeapRegion(HeapRegion* hr) {
1633 if (hr->continuesHumongous()) {
1634 // We will ignore these here and process them when their
1635 // associated "starts humongous" region is processed (see
1636 // set_bit_for_heap_region()). Note that we cannot rely on their
1637 // associated "starts humongous" region to have their bit set to
1638 // 1 since, due to the region chunking in the parallel region
1639 // iteration, a "continues humongous" region might be visited
1640 // before its associated "starts humongous".
1641 return false;
1642 }
1644 int failures = 0;
1646 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1647 // this region and set the corresponding bits in the expected region
1648 // and card bitmaps.
1649 bool res = _calc_cl.doHeapRegion(hr);
1650 assert(res == false, "should be continuing");
1652 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1653 Mutex::_no_safepoint_check_flag);
1655 // Verify that _top_at_conc_count == ntams
1656 if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1657 if (_verbose) {
1658 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": top at conc count incorrect: "
1659 "expected " PTR_FORMAT ", actual: " PTR_FORMAT,
1660 hr->hrs_index(), hr->next_top_at_mark_start(),
1661 hr->top_at_conc_mark_count());
1662 }
1663 failures += 1;
1664 }
1666 // Verify the marked bytes for this region.
1667 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1668 size_t act_marked_bytes = hr->next_marked_bytes();
1670 // We're not OK if expected marked bytes > actual marked bytes. It means
1671 // we have missed accounting some objects during the actual marking.
1672 if (exp_marked_bytes > act_marked_bytes) {
1673 if (_verbose) {
1674 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": marked bytes mismatch: "
1675 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1676 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1677 }
1678 failures += 1;
1679 }
1681 // Verify the bit, for this region, in the actual and expected
1682 // (which was just calculated) region bit maps.
1683 // We're not OK if the bit in the calculated expected region
1684 // bitmap is set and the bit in the actual region bitmap is not.
1685 BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1687 bool expected = _exp_region_bm->at(index);
1688 bool actual = _region_bm->at(index);
1689 if (expected && !actual) {
1690 if (_verbose) {
1691 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": region bitmap mismatch: "
1692 "expected: %d, actual: %d",
1693 hr->hrs_index(), expected, actual);
1694 }
1695 failures += 1;
1696 }
1698 // Verify that the card bit maps for the cards spanned by the current
1699 // region match. We have an error if we have a set bit in the expected
1700 // bit map and the corresponding bit in the actual bitmap is not set.
1702 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1703 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1705 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1706 expected = _exp_card_bm->at(i);
1707 actual = _card_bm->at(i);
1709 if (expected && !actual) {
1710 if (_verbose) {
1711 gclog_or_tty->print_cr("Region " SIZE_FORMAT ": card bitmap mismatch at " SIZE_FORMAT ": "
1712 "expected: %d, actual: %d",
1713 hr->hrs_index(), i, expected, actual);
1714 }
1715 failures += 1;
1716 }
1717 }
1719 if (failures > 0 && _verbose) {
1720 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1721 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1722 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1723 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1724 }
1726 _failures += failures;
1728 // We could stop iteration over the heap when we
1729 // find the first voilating region by returning true.
1730 return false;
1731 }
1732 };
1735 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1736 protected:
1737 G1CollectedHeap* _g1h;
1738 ConcurrentMark* _cm;
1739 BitMap* _actual_region_bm;
1740 BitMap* _actual_card_bm;
1742 uint _n_workers;
1744 BitMap* _expected_region_bm;
1745 BitMap* _expected_card_bm;
1747 int _failures;
1748 bool _verbose;
1750 public:
1751 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1752 BitMap* region_bm, BitMap* card_bm,
1753 BitMap* expected_region_bm, BitMap* expected_card_bm)
1754 : AbstractGangTask("G1 verify final counting"),
1755 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1756 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1757 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1758 _failures(0), _verbose(false),
1759 _n_workers(0) {
1760 assert(VerifyDuringGC, "don't call this otherwise");
1762 // Use the value already set as the number of active threads
1763 // in the call to run_task().
1764 if (G1CollectedHeap::use_parallel_gc_threads()) {
1765 assert( _g1h->workers()->active_workers() > 0,
1766 "Should have been previously set");
1767 _n_workers = _g1h->workers()->active_workers();
1768 } else {
1769 _n_workers = 1;
1770 }
1772 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1773 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1775 _verbose = _cm->verbose_medium();
1776 }
1778 void work(uint worker_id) {
1779 assert(worker_id < _n_workers, "invariant");
1781 VerifyLiveObjectDataHRClosure verify_cl(_cm,
1782 _actual_region_bm, _actual_card_bm,
1783 _expected_region_bm,
1784 _expected_card_bm,
1785 _verbose);
1787 if (G1CollectedHeap::use_parallel_gc_threads()) {
1788 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1789 worker_id,
1790 _n_workers,
1791 HeapRegion::VerifyCountClaimValue);
1792 } else {
1793 _g1h->heap_region_iterate(&verify_cl);
1794 }
1796 Atomic::add(verify_cl.failures(), &_failures);
1797 }
1799 int failures() const { return _failures; }
1800 };
1802 // Final update of count data (during cleanup).
1803 // Adds [top_at_count, NTAMS) to the marked bytes for each
1804 // region. Sets the bits in the card bitmap corresponding
1805 // to the interval [top_at_count, top], and sets the
1806 // liveness bit for each region containing live data
1807 // in the region bitmap.
1809 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1810 ConcurrentMark* _cm;
1811 BitMap* _region_bm;
1812 BitMap* _card_bm;
1814 size_t _total_live_bytes;
1815 size_t _total_used_bytes;
1816 size_t _total_words_done;
1818 void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1819 assert(start_idx <= last_idx, "sanity");
1821 // Set the inclusive bit range [start_idx, last_idx].
1822 // For small ranges (up to 8 cards) use a simple loop; otherwise
1823 // use par_at_put_range.
1824 if ((last_idx - start_idx) <= 8) {
1825 for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1826 _card_bm->par_set_bit(i);
1827 }
1828 } else {
1829 assert(last_idx < _card_bm->size(), "sanity");
1830 // Note BitMap::par_at_put_range() is exclusive.
1831 _card_bm->par_at_put_range(start_idx, last_idx+1, true);
1832 }
1833 }
1835 // It takes a region that's not empty (i.e., it has at least one
1836 // live object in it and sets its corresponding bit on the region
1837 // bitmap to 1. If the region is "starts humongous" it will also set
1838 // to 1 the bits on the region bitmap that correspond to its
1839 // associated "continues humongous" regions.
1840 void set_bit_for_region(HeapRegion* hr) {
1841 assert(!hr->continuesHumongous(), "should have filtered those out");
1843 size_t index = hr->hrs_index();
1844 if (!hr->startsHumongous()) {
1845 // Normal (non-humongous) case: just set the bit.
1846 _region_bm->par_set_bit((BitMap::idx_t) index);
1847 } else {
1848 // Starts humongous case: calculate how many regions are part of
1849 // this humongous region and then set the bit range.
1850 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1851 HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1852 size_t end_index = last_hr->hrs_index() + 1;
1853 _region_bm->par_at_put_range((BitMap::idx_t) index,
1854 (BitMap::idx_t) end_index, true);
1855 }
1856 }
1858 public:
1859 FinalCountDataUpdateClosure(ConcurrentMark* cm,
1860 BitMap* region_bm,
1861 BitMap* card_bm) :
1862 _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1863 _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0) { }
1865 bool doHeapRegion(HeapRegion* hr) {
1867 if (hr->continuesHumongous()) {
1868 // We will ignore these here and process them when their
1869 // associated "starts humongous" region is processed (see
1870 // set_bit_for_heap_region()). Note that we cannot rely on their
1871 // associated "starts humongous" region to have their bit set to
1872 // 1 since, due to the region chunking in the parallel region
1873 // iteration, a "continues humongous" region might be visited
1874 // before its associated "starts humongous".
1875 return false;
1876 }
1878 HeapWord* start = hr->top_at_conc_mark_count();
1879 HeapWord* ntams = hr->next_top_at_mark_start();
1880 HeapWord* top = hr->top();
1882 assert(hr->bottom() <= start && start <= hr->end() &&
1883 hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1885 size_t words_done = ntams - hr->bottom();
1887 if (start < ntams) {
1888 // Region was changed between remark and cleanup pauses
1889 // We need to add (ntams - start) to the marked bytes
1890 // for this region, and set bits for the range
1891 // [ card_idx(start), card_idx(ntams) ) in the card bitmap.
1892 size_t live_bytes = (ntams - start) * HeapWordSize;
1893 hr->add_to_marked_bytes(live_bytes);
1895 // Record the new top at conc count
1896 hr->set_top_at_conc_mark_count(ntams);
1898 // The setting of the bits in the card bitmap takes place below
1899 }
1901 // Mark the allocated-since-marking portion...
1902 if (ntams < top) {
1903 // This definitely means the region has live objects.
1904 set_bit_for_region(hr);
1905 }
1907 // Now set the bits for [start, top]
1908 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1909 BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
1910 set_card_bitmap_range(start_idx, last_idx);
1912 // Set the bit for the region if it contains live data
1913 if (hr->next_marked_bytes() > 0) {
1914 set_bit_for_region(hr);
1915 }
1917 _total_words_done += words_done;
1918 _total_used_bytes += hr->used();
1919 _total_live_bytes += hr->next_marked_bytes();
1921 return false;
1922 }
1924 size_t total_words_done() const { return _total_words_done; }
1925 size_t total_live_bytes() const { return _total_live_bytes; }
1926 size_t total_used_bytes() const { return _total_used_bytes; }
1927 };
1929 class G1ParFinalCountTask: public AbstractGangTask {
1930 protected:
1931 G1CollectedHeap* _g1h;
1932 ConcurrentMark* _cm;
1933 BitMap* _actual_region_bm;
1934 BitMap* _actual_card_bm;
1936 uint _n_workers;
1938 size_t *_live_bytes;
1939 size_t *_used_bytes;
1941 public:
1942 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1943 : AbstractGangTask("G1 final counting"),
1944 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1945 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1946 _n_workers(0) {
1947 // Use the value already set as the number of active threads
1948 // in the call to run_task(). Needed for the allocation of
1949 // _live_bytes and _used_bytes.
1950 if (G1CollectedHeap::use_parallel_gc_threads()) {
1951 assert( _g1h->workers()->active_workers() > 0,
1952 "Should have been previously set");
1953 _n_workers = _g1h->workers()->active_workers();
1954 } else {
1955 _n_workers = 1;
1956 }
1958 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1959 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1960 }
1962 ~G1ParFinalCountTask() {
1963 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1964 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1965 }
1967 void work(uint worker_id) {
1968 assert(worker_id < _n_workers, "invariant");
1970 FinalCountDataUpdateClosure final_update_cl(_cm,
1971 _actual_region_bm,
1972 _actual_card_bm);
1974 if (G1CollectedHeap::use_parallel_gc_threads()) {
1975 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1976 worker_id,
1977 _n_workers,
1978 HeapRegion::FinalCountClaimValue);
1979 } else {
1980 _g1h->heap_region_iterate(&final_update_cl);
1981 }
1983 _live_bytes[worker_id] = final_update_cl.total_live_bytes();
1984 _used_bytes[worker_id] = final_update_cl.total_used_bytes();
1985 }
1987 size_t live_bytes() {
1988 size_t live_bytes = 0;
1989 for (uint i = 0; i < _n_workers; ++i)
1990 live_bytes += _live_bytes[i];
1991 return live_bytes;
1992 }
1994 size_t used_bytes() {
1995 size_t used_bytes = 0;
1996 for (uint i = 0; i < _n_workers; ++i)
1997 used_bytes += _used_bytes[i];
1998 return used_bytes;
1999 }
2000 };
2002 class G1ParNoteEndTask;
2004 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
2005 G1CollectedHeap* _g1;
2006 int _worker_num;
2007 size_t _max_live_bytes;
2008 size_t _regions_claimed;
2009 size_t _freed_bytes;
2010 FreeRegionList* _local_cleanup_list;
2011 OldRegionSet* _old_proxy_set;
2012 HumongousRegionSet* _humongous_proxy_set;
2013 HRRSCleanupTask* _hrrs_cleanup_task;
2014 double _claimed_region_time;
2015 double _max_region_time;
2017 public:
2018 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
2019 int worker_num,
2020 FreeRegionList* local_cleanup_list,
2021 OldRegionSet* old_proxy_set,
2022 HumongousRegionSet* humongous_proxy_set,
2023 HRRSCleanupTask* hrrs_cleanup_task) :
2024 _g1(g1), _worker_num(worker_num),
2025 _max_live_bytes(0), _regions_claimed(0),
2026 _freed_bytes(0),
2027 _claimed_region_time(0.0), _max_region_time(0.0),
2028 _local_cleanup_list(local_cleanup_list),
2029 _old_proxy_set(old_proxy_set),
2030 _humongous_proxy_set(humongous_proxy_set),
2031 _hrrs_cleanup_task(hrrs_cleanup_task) { }
2033 size_t freed_bytes() { return _freed_bytes; }
2035 bool doHeapRegion(HeapRegion *hr) {
2036 // We use a claim value of zero here because all regions
2037 // were claimed with value 1 in the FinalCount task.
2038 hr->reset_gc_time_stamp();
2039 if (!hr->continuesHumongous()) {
2040 double start = os::elapsedTime();
2041 _regions_claimed++;
2042 hr->note_end_of_marking();
2043 _max_live_bytes += hr->max_live_bytes();
2044 _g1->free_region_if_empty(hr,
2045 &_freed_bytes,
2046 _local_cleanup_list,
2047 _old_proxy_set,
2048 _humongous_proxy_set,
2049 _hrrs_cleanup_task,
2050 true /* par */);
2051 double region_time = (os::elapsedTime() - start);
2052 _claimed_region_time += region_time;
2053 if (region_time > _max_region_time) {
2054 _max_region_time = region_time;
2055 }
2056 }
2057 return false;
2058 }
2060 size_t max_live_bytes() { return _max_live_bytes; }
2061 size_t regions_claimed() { return _regions_claimed; }
2062 double claimed_region_time_sec() { return _claimed_region_time; }
2063 double max_region_time_sec() { return _max_region_time; }
2064 };
2066 class G1ParNoteEndTask: public AbstractGangTask {
2067 friend class G1NoteEndOfConcMarkClosure;
2069 protected:
2070 G1CollectedHeap* _g1h;
2071 size_t _max_live_bytes;
2072 size_t _freed_bytes;
2073 FreeRegionList* _cleanup_list;
2075 public:
2076 G1ParNoteEndTask(G1CollectedHeap* g1h,
2077 FreeRegionList* cleanup_list) :
2078 AbstractGangTask("G1 note end"), _g1h(g1h),
2079 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
2081 void work(uint worker_id) {
2082 double start = os::elapsedTime();
2083 FreeRegionList local_cleanup_list("Local Cleanup List");
2084 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
2085 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
2086 HRRSCleanupTask hrrs_cleanup_task;
2087 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
2088 &old_proxy_set,
2089 &humongous_proxy_set,
2090 &hrrs_cleanup_task);
2091 if (G1CollectedHeap::use_parallel_gc_threads()) {
2092 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
2093 _g1h->workers()->active_workers(),
2094 HeapRegion::NoteEndClaimValue);
2095 } else {
2096 _g1h->heap_region_iterate(&g1_note_end);
2097 }
2098 assert(g1_note_end.complete(), "Shouldn't have yielded!");
2100 // Now update the lists
2101 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
2102 NULL /* free_list */,
2103 &old_proxy_set,
2104 &humongous_proxy_set,
2105 true /* par */);
2106 {
2107 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
2108 _max_live_bytes += g1_note_end.max_live_bytes();
2109 _freed_bytes += g1_note_end.freed_bytes();
2111 // If we iterate over the global cleanup list at the end of
2112 // cleanup to do this printing we will not guarantee to only
2113 // generate output for the newly-reclaimed regions (the list
2114 // might not be empty at the beginning of cleanup; we might
2115 // still be working on its previous contents). So we do the
2116 // printing here, before we append the new regions to the global
2117 // cleanup list.
2119 G1HRPrinter* hr_printer = _g1h->hr_printer();
2120 if (hr_printer->is_active()) {
2121 HeapRegionLinkedListIterator iter(&local_cleanup_list);
2122 while (iter.more_available()) {
2123 HeapRegion* hr = iter.get_next();
2124 hr_printer->cleanup(hr);
2125 }
2126 }
2128 _cleanup_list->add_as_tail(&local_cleanup_list);
2129 assert(local_cleanup_list.is_empty(), "post-condition");
2131 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
2132 }
2133 double end = os::elapsedTime();
2134 if (G1PrintParCleanupStats) {
2135 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
2136 "claimed %u regions (tot = %8.3f ms, max = %8.3f ms).\n",
2137 worker_id, start, end, (end-start)*1000.0,
2138 g1_note_end.regions_claimed(),
2139 g1_note_end.claimed_region_time_sec()*1000.0,
2140 g1_note_end.max_region_time_sec()*1000.0);
2141 }
2142 }
2143 size_t max_live_bytes() { return _max_live_bytes; }
2144 size_t freed_bytes() { return _freed_bytes; }
2145 };
2147 class G1ParScrubRemSetTask: public AbstractGangTask {
2148 protected:
2149 G1RemSet* _g1rs;
2150 BitMap* _region_bm;
2151 BitMap* _card_bm;
2152 public:
2153 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
2154 BitMap* region_bm, BitMap* card_bm) :
2155 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
2156 _region_bm(region_bm), _card_bm(card_bm) { }
2158 void work(uint worker_id) {
2159 if (G1CollectedHeap::use_parallel_gc_threads()) {
2160 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
2161 HeapRegion::ScrubRemSetClaimValue);
2162 } else {
2163 _g1rs->scrub(_region_bm, _card_bm);
2164 }
2165 }
2167 };
2169 void ConcurrentMark::cleanup() {
2170 // world is stopped at this checkpoint
2171 assert(SafepointSynchronize::is_at_safepoint(),
2172 "world should be stopped");
2173 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2175 // If a full collection has happened, we shouldn't do this.
2176 if (has_aborted()) {
2177 g1h->set_marking_complete(); // So bitmap clearing isn't confused
2178 return;
2179 }
2181 HRSPhaseSetter x(HRSPhaseCleanup);
2182 g1h->verify_region_sets_optional();
2184 if (VerifyDuringGC) {
2185 HandleMark hm; // handle scope
2186 gclog_or_tty->print(" VerifyDuringGC:(before)");
2187 Universe::heap()->prepare_for_verify();
2188 Universe::verify(/* allow dirty */ true,
2189 /* silent */ false,
2190 /* option */ VerifyOption_G1UsePrevMarking);
2191 }
2193 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2194 g1p->record_concurrent_mark_cleanup_start();
2196 double start = os::elapsedTime();
2198 HeapRegionRemSet::reset_for_cleanup_tasks();
2200 uint n_workers;
2202 // Do counting once more with the world stopped for good measure.
2203 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2205 if (G1CollectedHeap::use_parallel_gc_threads()) {
2206 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2207 "sanity check");
2209 g1h->set_par_threads();
2210 n_workers = g1h->n_par_threads();
2211 assert(g1h->n_par_threads() == n_workers,
2212 "Should not have been reset");
2213 g1h->workers()->run_task(&g1_par_count_task);
2214 // Done with the parallel phase so reset to 0.
2215 g1h->set_par_threads(0);
2217 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2218 "sanity check");
2219 } else {
2220 n_workers = 1;
2221 g1_par_count_task.work(0);
2222 }
2224 if (VerifyDuringGC) {
2225 // Verify that the counting data accumulated during marking matches
2226 // that calculated by walking the marking bitmap.
2228 // Bitmaps to hold expected values
2229 BitMap expected_region_bm(_region_bm.size(), false);
2230 BitMap expected_card_bm(_card_bm.size(), false);
2232 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2233 &_region_bm,
2234 &_card_bm,
2235 &expected_region_bm,
2236 &expected_card_bm);
2238 if (G1CollectedHeap::use_parallel_gc_threads()) {
2239 g1h->set_par_threads((int)n_workers);
2240 g1h->workers()->run_task(&g1_par_verify_task);
2241 // Done with the parallel phase so reset to 0.
2242 g1h->set_par_threads(0);
2244 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2245 "sanity check");
2246 } else {
2247 g1_par_verify_task.work(0);
2248 }
2250 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2251 }
2253 size_t known_garbage_bytes =
2254 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
2255 g1p->set_known_garbage_bytes(known_garbage_bytes);
2257 size_t start_used_bytes = g1h->used();
2258 _at_least_one_mark_complete = true;
2259 g1h->set_marking_complete();
2261 ergo_verbose4(ErgoConcCycles,
2262 "finish cleanup",
2263 ergo_format_byte("occupancy")
2264 ergo_format_byte("capacity")
2265 ergo_format_byte_perc("known garbage"),
2266 start_used_bytes, g1h->capacity(),
2267 known_garbage_bytes,
2268 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
2270 double count_end = os::elapsedTime();
2271 double this_final_counting_time = (count_end - start);
2272 if (G1PrintParCleanupStats) {
2273 gclog_or_tty->print_cr("Cleanup:");
2274 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
2275 this_final_counting_time*1000.0);
2276 }
2277 _total_counting_time += this_final_counting_time;
2279 if (G1PrintRegionLivenessInfo) {
2280 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2281 _g1h->heap_region_iterate(&cl);
2282 }
2284 // Install newly created mark bitMap as "prev".
2285 swapMarkBitMaps();
2287 g1h->reset_gc_time_stamp();
2289 // Note end of marking in all heap regions.
2290 double note_end_start = os::elapsedTime();
2291 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2292 if (G1CollectedHeap::use_parallel_gc_threads()) {
2293 g1h->set_par_threads((int)n_workers);
2294 g1h->workers()->run_task(&g1_par_note_end_task);
2295 g1h->set_par_threads(0);
2297 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2298 "sanity check");
2299 } else {
2300 g1_par_note_end_task.work(0);
2301 }
2303 if (!cleanup_list_is_empty()) {
2304 // The cleanup list is not empty, so we'll have to process it
2305 // concurrently. Notify anyone else that might be wanting free
2306 // regions that there will be more free regions coming soon.
2307 g1h->set_free_regions_coming();
2308 }
2309 double note_end_end = os::elapsedTime();
2310 if (G1PrintParCleanupStats) {
2311 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
2312 (note_end_end - note_end_start)*1000.0);
2313 }
2315 // call below, since it affects the metric by which we sort the heap
2316 // regions.
2317 if (G1ScrubRemSets) {
2318 double rs_scrub_start = os::elapsedTime();
2319 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2320 if (G1CollectedHeap::use_parallel_gc_threads()) {
2321 g1h->set_par_threads((int)n_workers);
2322 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2323 g1h->set_par_threads(0);
2325 assert(g1h->check_heap_region_claim_values(
2326 HeapRegion::ScrubRemSetClaimValue),
2327 "sanity check");
2328 } else {
2329 g1_par_scrub_rs_task.work(0);
2330 }
2332 double rs_scrub_end = os::elapsedTime();
2333 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2334 _total_rs_scrub_time += this_rs_scrub_time;
2335 }
2337 // this will also free any regions totally full of garbage objects,
2338 // and sort the regions.
2339 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2341 // Statistics.
2342 double end = os::elapsedTime();
2343 _cleanup_times.add((end - start) * 1000.0);
2345 if (PrintGC || PrintGCDetails) {
2346 g1h->print_size_transition(gclog_or_tty,
2347 start_used_bytes,
2348 g1h->used(),
2349 g1h->capacity());
2350 }
2352 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
2353 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
2355 // Clean up will have freed any regions completely full of garbage.
2356 // Update the soft reference policy with the new heap occupancy.
2357 Universe::update_heap_info_at_gc();
2359 // We need to make this be a "collection" so any collection pause that
2360 // races with it goes around and waits for completeCleanup to finish.
2361 g1h->increment_total_collections();
2363 // We reclaimed old regions so we should calculate the sizes to make
2364 // sure we update the old gen/space data.
2365 g1h->g1mm()->update_sizes();
2367 if (VerifyDuringGC) {
2368 HandleMark hm; // handle scope
2369 gclog_or_tty->print(" VerifyDuringGC:(after)");
2370 Universe::heap()->prepare_for_verify();
2371 Universe::verify(/* allow dirty */ true,
2372 /* silent */ false,
2373 /* option */ VerifyOption_G1UsePrevMarking);
2374 }
2376 g1h->verify_region_sets_optional();
2377 }
2379 void ConcurrentMark::completeCleanup() {
2380 if (has_aborted()) return;
2382 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2384 _cleanup_list.verify_optional();
2385 FreeRegionList tmp_free_list("Tmp Free List");
2387 if (G1ConcRegionFreeingVerbose) {
2388 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2389 "cleanup list has "SIZE_FORMAT" entries",
2390 _cleanup_list.length());
2391 }
2393 // Noone else should be accessing the _cleanup_list at this point,
2394 // so it's not necessary to take any locks
2395 while (!_cleanup_list.is_empty()) {
2396 HeapRegion* hr = _cleanup_list.remove_head();
2397 assert(hr != NULL, "the list was not empty");
2398 hr->par_clear();
2399 tmp_free_list.add_as_tail(hr);
2401 // Instead of adding one region at a time to the secondary_free_list,
2402 // we accumulate them in the local list and move them a few at a
2403 // time. This also cuts down on the number of notify_all() calls
2404 // we do during this process. We'll also append the local list when
2405 // _cleanup_list is empty (which means we just removed the last
2406 // region from the _cleanup_list).
2407 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2408 _cleanup_list.is_empty()) {
2409 if (G1ConcRegionFreeingVerbose) {
2410 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2411 "appending "SIZE_FORMAT" entries to the "
2412 "secondary_free_list, clean list still has "
2413 SIZE_FORMAT" entries",
2414 tmp_free_list.length(),
2415 _cleanup_list.length());
2416 }
2418 {
2419 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2420 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2421 SecondaryFreeList_lock->notify_all();
2422 }
2424 if (G1StressConcRegionFreeing) {
2425 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2426 os::sleep(Thread::current(), (jlong) 1, false);
2427 }
2428 }
2429 }
2430 }
2431 assert(tmp_free_list.is_empty(), "post-condition");
2432 }
2434 // Support closures for reference procssing in G1
2436 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2437 HeapWord* addr = (HeapWord*)obj;
2438 return addr != NULL &&
2439 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2440 }
2442 class G1CMKeepAliveClosure: public OopClosure {
2443 G1CollectedHeap* _g1;
2444 ConcurrentMark* _cm;
2445 public:
2446 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2447 _g1(g1), _cm(cm) {
2448 assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2449 }
2451 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2452 virtual void do_oop( oop* p) { do_oop_work(p); }
2454 template <class T> void do_oop_work(T* p) {
2455 oop obj = oopDesc::load_decode_heap_oop(p);
2456 HeapWord* addr = (HeapWord*)obj;
2458 if (_cm->verbose_high()) {
2459 gclog_or_tty->print_cr("\t[0] we're looking at location "
2460 "*"PTR_FORMAT" = "PTR_FORMAT,
2461 p, (void*) obj);
2462 }
2464 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2465 _cm->mark_and_count(obj);
2466 _cm->mark_stack_push(obj);
2467 }
2468 }
2469 };
2471 class G1CMDrainMarkingStackClosure: public VoidClosure {
2472 ConcurrentMark* _cm;
2473 CMMarkStack* _markStack;
2474 G1CMKeepAliveClosure* _oopClosure;
2475 public:
2476 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2477 G1CMKeepAliveClosure* oopClosure) :
2478 _cm(cm),
2479 _markStack(markStack),
2480 _oopClosure(oopClosure) { }
2482 void do_void() {
2483 _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2484 }
2485 };
2487 // 'Keep Alive' closure used by parallel reference processing.
2488 // An instance of this closure is used in the parallel reference processing
2489 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2490 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2491 // placed on to discovered ref lists once so we can mark and push with no
2492 // need to check whether the object has already been marked. Using the
2493 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2494 // operating on the global mark stack. This means that an individual
2495 // worker would be doing lock-free pushes while it processes its own
2496 // discovered ref list followed by drain call. If the discovered ref lists
2497 // are unbalanced then this could cause interference with the other
2498 // workers. Using a CMTask (and its embedded local data structures)
2499 // avoids that potential interference.
2500 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2501 ConcurrentMark* _cm;
2502 CMTask* _task;
2503 int _ref_counter_limit;
2504 int _ref_counter;
2505 public:
2506 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2507 _cm(cm), _task(task),
2508 _ref_counter_limit(G1RefProcDrainInterval) {
2509 assert(_ref_counter_limit > 0, "sanity");
2510 _ref_counter = _ref_counter_limit;
2511 }
2513 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2514 virtual void do_oop( oop* p) { do_oop_work(p); }
2516 template <class T> void do_oop_work(T* p) {
2517 if (!_cm->has_overflown()) {
2518 oop obj = oopDesc::load_decode_heap_oop(p);
2519 if (_cm->verbose_high()) {
2520 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2521 "*"PTR_FORMAT" = "PTR_FORMAT,
2522 _task->task_id(), p, (void*) obj);
2523 }
2525 _task->deal_with_reference(obj);
2526 _ref_counter--;
2528 if (_ref_counter == 0) {
2529 // We have dealt with _ref_counter_limit references, pushing them and objects
2530 // reachable from them on to the local stack (and possibly the global stack).
2531 // Call do_marking_step() to process these entries. We call the routine in a
2532 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2533 // with the entries that we've pushed as a result of the deal_with_reference
2534 // calls above) or we overflow.
2535 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2536 // while there may still be some work to do. (See the comment at the
2537 // beginning of CMTask::do_marking_step() for those conditions - one of which
2538 // is reaching the specified time target.) It is only when
2539 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2540 // that the marking has completed.
2541 do {
2542 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2543 _task->do_marking_step(mark_step_duration_ms,
2544 false /* do_stealing */,
2545 false /* do_termination */);
2546 } while (_task->has_aborted() && !_cm->has_overflown());
2547 _ref_counter = _ref_counter_limit;
2548 }
2549 } else {
2550 if (_cm->verbose_high()) {
2551 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2552 }
2553 }
2554 }
2555 };
2557 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2558 ConcurrentMark* _cm;
2559 CMTask* _task;
2560 public:
2561 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2562 _cm(cm), _task(task) { }
2564 void do_void() {
2565 do {
2566 if (_cm->verbose_high()) {
2567 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2568 _task->task_id());
2569 }
2571 // We call CMTask::do_marking_step() to completely drain the local and
2572 // global marking stacks. The routine is called in a loop, which we'll
2573 // exit if there's nothing more to do (i.e. we'completely drained the
2574 // entries that were pushed as a result of applying the
2575 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2576 // lists above) or we overflow the global marking stack.
2577 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2578 // while there may still be some work to do. (See the comment at the
2579 // beginning of CMTask::do_marking_step() for those conditions - one of which
2580 // is reaching the specified time target.) It is only when
2581 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2582 // that the marking has completed.
2584 _task->do_marking_step(1000000000.0 /* something very large */,
2585 true /* do_stealing */,
2586 true /* do_termination */);
2587 } while (_task->has_aborted() && !_cm->has_overflown());
2588 }
2589 };
2591 // Implementation of AbstractRefProcTaskExecutor for parallel
2592 // reference processing at the end of G1 concurrent marking
2594 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2595 private:
2596 G1CollectedHeap* _g1h;
2597 ConcurrentMark* _cm;
2598 WorkGang* _workers;
2599 int _active_workers;
2601 public:
2602 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2603 ConcurrentMark* cm,
2604 WorkGang* workers,
2605 int n_workers) :
2606 _g1h(g1h), _cm(cm),
2607 _workers(workers), _active_workers(n_workers) { }
2609 // Executes the given task using concurrent marking worker threads.
2610 virtual void execute(ProcessTask& task);
2611 virtual void execute(EnqueueTask& task);
2612 };
2614 class G1CMRefProcTaskProxy: public AbstractGangTask {
2615 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2616 ProcessTask& _proc_task;
2617 G1CollectedHeap* _g1h;
2618 ConcurrentMark* _cm;
2620 public:
2621 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2622 G1CollectedHeap* g1h,
2623 ConcurrentMark* cm) :
2624 AbstractGangTask("Process reference objects in parallel"),
2625 _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2627 virtual void work(uint worker_id) {
2628 CMTask* marking_task = _cm->task(worker_id);
2629 G1CMIsAliveClosure g1_is_alive(_g1h);
2630 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2631 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2633 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2634 }
2635 };
2637 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2638 assert(_workers != NULL, "Need parallel worker threads.");
2640 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2642 // We need to reset the phase for each task execution so that
2643 // the termination protocol of CMTask::do_marking_step works.
2644 _cm->set_phase(_active_workers, false /* concurrent */);
2645 _g1h->set_par_threads(_active_workers);
2646 _workers->run_task(&proc_task_proxy);
2647 _g1h->set_par_threads(0);
2648 }
2650 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2651 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2652 EnqueueTask& _enq_task;
2654 public:
2655 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2656 AbstractGangTask("Enqueue reference objects in parallel"),
2657 _enq_task(enq_task) { }
2659 virtual void work(uint worker_id) {
2660 _enq_task.work(worker_id);
2661 }
2662 };
2664 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2665 assert(_workers != NULL, "Need parallel worker threads.");
2667 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2669 _g1h->set_par_threads(_active_workers);
2670 _workers->run_task(&enq_task_proxy);
2671 _g1h->set_par_threads(0);
2672 }
2674 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2675 ResourceMark rm;
2676 HandleMark hm;
2678 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2680 // Is alive closure.
2681 G1CMIsAliveClosure g1_is_alive(g1h);
2683 // Inner scope to exclude the cleaning of the string and symbol
2684 // tables from the displayed time.
2685 {
2686 bool verbose = PrintGC && PrintGCDetails;
2687 if (verbose) {
2688 gclog_or_tty->put(' ');
2689 }
2690 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2692 ReferenceProcessor* rp = g1h->ref_processor_cm();
2694 // See the comment in G1CollectedHeap::ref_processing_init()
2695 // about how reference processing currently works in G1.
2697 // Process weak references.
2698 rp->setup_policy(clear_all_soft_refs);
2699 assert(_markStack.isEmpty(), "mark stack should be empty");
2701 G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2702 G1CMDrainMarkingStackClosure
2703 g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2705 // We use the work gang from the G1CollectedHeap and we utilize all
2706 // the worker threads.
2707 uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2708 active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2710 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2711 g1h->workers(), active_workers);
2713 if (rp->processing_is_mt()) {
2714 // Set the degree of MT here. If the discovery is done MT, there
2715 // may have been a different number of threads doing the discovery
2716 // and a different number of discovered lists may have Ref objects.
2717 // That is OK as long as the Reference lists are balanced (see
2718 // balance_all_queues() and balance_queues()).
2719 rp->set_active_mt_degree(active_workers);
2721 rp->process_discovered_references(&g1_is_alive,
2722 &g1_keep_alive,
2723 &g1_drain_mark_stack,
2724 &par_task_executor);
2726 // The work routines of the parallel keep_alive and drain_marking_stack
2727 // will set the has_overflown flag if we overflow the global marking
2728 // stack.
2729 } else {
2730 rp->process_discovered_references(&g1_is_alive,
2731 &g1_keep_alive,
2732 &g1_drain_mark_stack,
2733 NULL);
2734 }
2736 assert(_markStack.overflow() || _markStack.isEmpty(),
2737 "mark stack should be empty (unless it overflowed)");
2738 if (_markStack.overflow()) {
2739 // Should have been done already when we tried to push an
2740 // entry on to the global mark stack. But let's do it again.
2741 set_has_overflown();
2742 }
2744 if (rp->processing_is_mt()) {
2745 assert(rp->num_q() == active_workers, "why not");
2746 rp->enqueue_discovered_references(&par_task_executor);
2747 } else {
2748 rp->enqueue_discovered_references();
2749 }
2751 rp->verify_no_references_recorded();
2752 assert(!rp->discovery_enabled(), "Post condition");
2753 }
2755 // Now clean up stale oops in StringTable
2756 StringTable::unlink(&g1_is_alive);
2757 // Clean up unreferenced symbols in symbol table.
2758 SymbolTable::unlink();
2759 }
2761 void ConcurrentMark::swapMarkBitMaps() {
2762 CMBitMapRO* temp = _prevMarkBitMap;
2763 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2764 _nextMarkBitMap = (CMBitMap*) temp;
2765 }
2767 class CMRemarkTask: public AbstractGangTask {
2768 private:
2769 ConcurrentMark *_cm;
2771 public:
2772 void work(uint worker_id) {
2773 // Since all available tasks are actually started, we should
2774 // only proceed if we're supposed to be actived.
2775 if (worker_id < _cm->active_tasks()) {
2776 CMTask* task = _cm->task(worker_id);
2777 task->record_start_time();
2778 do {
2779 task->do_marking_step(1000000000.0 /* something very large */,
2780 true /* do_stealing */,
2781 true /* do_termination */);
2782 } while (task->has_aborted() && !_cm->has_overflown());
2783 // If we overflow, then we do not want to restart. We instead
2784 // want to abort remark and do concurrent marking again.
2785 task->record_end_time();
2786 }
2787 }
2789 CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2790 AbstractGangTask("Par Remark"), _cm(cm) {
2791 _cm->terminator()->reset_for_reuse(active_workers);
2792 }
2793 };
2795 void ConcurrentMark::checkpointRootsFinalWork() {
2796 ResourceMark rm;
2797 HandleMark hm;
2798 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2800 g1h->ensure_parsability(false);
2802 if (G1CollectedHeap::use_parallel_gc_threads()) {
2803 G1CollectedHeap::StrongRootsScope srs(g1h);
2804 // this is remark, so we'll use up all active threads
2805 uint active_workers = g1h->workers()->active_workers();
2806 if (active_workers == 0) {
2807 assert(active_workers > 0, "Should have been set earlier");
2808 active_workers = (uint) ParallelGCThreads;
2809 g1h->workers()->set_active_workers(active_workers);
2810 }
2811 set_phase(active_workers, false /* concurrent */);
2812 // Leave _parallel_marking_threads at it's
2813 // value originally calculated in the ConcurrentMark
2814 // constructor and pass values of the active workers
2815 // through the gang in the task.
2817 CMRemarkTask remarkTask(this, active_workers);
2818 g1h->set_par_threads(active_workers);
2819 g1h->workers()->run_task(&remarkTask);
2820 g1h->set_par_threads(0);
2821 } else {
2822 G1CollectedHeap::StrongRootsScope srs(g1h);
2823 // this is remark, so we'll use up all available threads
2824 uint active_workers = 1;
2825 set_phase(active_workers, false /* concurrent */);
2827 CMRemarkTask remarkTask(this, active_workers);
2828 // We will start all available threads, even if we decide that the
2829 // active_workers will be fewer. The extra ones will just bail out
2830 // immediately.
2831 remarkTask.work(0);
2832 }
2833 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2834 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2836 print_stats();
2838 #if VERIFY_OBJS_PROCESSED
2839 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2840 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2841 _scan_obj_cl.objs_processed,
2842 ThreadLocalObjQueue::objs_enqueued);
2843 guarantee(_scan_obj_cl.objs_processed ==
2844 ThreadLocalObjQueue::objs_enqueued,
2845 "Different number of objs processed and enqueued.");
2846 }
2847 #endif
2848 }
2850 #ifndef PRODUCT
2852 class PrintReachableOopClosure: public OopClosure {
2853 private:
2854 G1CollectedHeap* _g1h;
2855 outputStream* _out;
2856 VerifyOption _vo;
2857 bool _all;
2859 public:
2860 PrintReachableOopClosure(outputStream* out,
2861 VerifyOption vo,
2862 bool all) :
2863 _g1h(G1CollectedHeap::heap()),
2864 _out(out), _vo(vo), _all(all) { }
2866 void do_oop(narrowOop* p) { do_oop_work(p); }
2867 void do_oop( oop* p) { do_oop_work(p); }
2869 template <class T> void do_oop_work(T* p) {
2870 oop obj = oopDesc::load_decode_heap_oop(p);
2871 const char* str = NULL;
2872 const char* str2 = "";
2874 if (obj == NULL) {
2875 str = "";
2876 } else if (!_g1h->is_in_g1_reserved(obj)) {
2877 str = " O";
2878 } else {
2879 HeapRegion* hr = _g1h->heap_region_containing(obj);
2880 guarantee(hr != NULL, "invariant");
2881 bool over_tams = false;
2882 bool marked = false;
2884 switch (_vo) {
2885 case VerifyOption_G1UsePrevMarking:
2886 over_tams = hr->obj_allocated_since_prev_marking(obj);
2887 marked = _g1h->isMarkedPrev(obj);
2888 break;
2889 case VerifyOption_G1UseNextMarking:
2890 over_tams = hr->obj_allocated_since_next_marking(obj);
2891 marked = _g1h->isMarkedNext(obj);
2892 break;
2893 case VerifyOption_G1UseMarkWord:
2894 marked = obj->is_gc_marked();
2895 break;
2896 default:
2897 ShouldNotReachHere();
2898 }
2900 if (over_tams) {
2901 str = " >";
2902 if (marked) {
2903 str2 = " AND MARKED";
2904 }
2905 } else if (marked) {
2906 str = " M";
2907 } else {
2908 str = " NOT";
2909 }
2910 }
2912 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2913 p, (void*) obj, str, str2);
2914 }
2915 };
2917 class PrintReachableObjectClosure : public ObjectClosure {
2918 private:
2919 G1CollectedHeap* _g1h;
2920 outputStream* _out;
2921 VerifyOption _vo;
2922 bool _all;
2923 HeapRegion* _hr;
2925 public:
2926 PrintReachableObjectClosure(outputStream* out,
2927 VerifyOption vo,
2928 bool all,
2929 HeapRegion* hr) :
2930 _g1h(G1CollectedHeap::heap()),
2931 _out(out), _vo(vo), _all(all), _hr(hr) { }
2933 void do_object(oop o) {
2934 bool over_tams = false;
2935 bool marked = false;
2937 switch (_vo) {
2938 case VerifyOption_G1UsePrevMarking:
2939 over_tams = _hr->obj_allocated_since_prev_marking(o);
2940 marked = _g1h->isMarkedPrev(o);
2941 break;
2942 case VerifyOption_G1UseNextMarking:
2943 over_tams = _hr->obj_allocated_since_next_marking(o);
2944 marked = _g1h->isMarkedNext(o);
2945 break;
2946 case VerifyOption_G1UseMarkWord:
2947 marked = o->is_gc_marked();
2948 break;
2949 default:
2950 ShouldNotReachHere();
2951 }
2952 bool print_it = _all || over_tams || marked;
2954 if (print_it) {
2955 _out->print_cr(" "PTR_FORMAT"%s",
2956 o, (over_tams) ? " >" : (marked) ? " M" : "");
2957 PrintReachableOopClosure oopCl(_out, _vo, _all);
2958 o->oop_iterate(&oopCl);
2959 }
2960 }
2961 };
2963 class PrintReachableRegionClosure : public HeapRegionClosure {
2964 private:
2965 outputStream* _out;
2966 VerifyOption _vo;
2967 bool _all;
2969 public:
2970 bool doHeapRegion(HeapRegion* hr) {
2971 HeapWord* b = hr->bottom();
2972 HeapWord* e = hr->end();
2973 HeapWord* t = hr->top();
2974 HeapWord* p = NULL;
2976 switch (_vo) {
2977 case VerifyOption_G1UsePrevMarking:
2978 p = hr->prev_top_at_mark_start();
2979 break;
2980 case VerifyOption_G1UseNextMarking:
2981 p = hr->next_top_at_mark_start();
2982 break;
2983 case VerifyOption_G1UseMarkWord:
2984 // When we are verifying marking using the mark word
2985 // TAMS has no relevance.
2986 assert(p == NULL, "post-condition");
2987 break;
2988 default:
2989 ShouldNotReachHere();
2990 }
2991 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2992 "TAMS: "PTR_FORMAT, b, e, t, p);
2993 _out->cr();
2995 HeapWord* from = b;
2996 HeapWord* to = t;
2998 if (to > from) {
2999 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
3000 _out->cr();
3001 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
3002 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
3003 _out->cr();
3004 }
3006 return false;
3007 }
3009 PrintReachableRegionClosure(outputStream* out,
3010 VerifyOption vo,
3011 bool all) :
3012 _out(out), _vo(vo), _all(all) { }
3013 };
3015 static const char* verify_option_to_tams(VerifyOption vo) {
3016 switch (vo) {
3017 case VerifyOption_G1UsePrevMarking:
3018 return "PTAMS";
3019 case VerifyOption_G1UseNextMarking:
3020 return "NTAMS";
3021 default:
3022 return "NONE";
3023 }
3024 }
3026 void ConcurrentMark::print_reachable(const char* str,
3027 VerifyOption vo,
3028 bool all) {
3029 gclog_or_tty->cr();
3030 gclog_or_tty->print_cr("== Doing heap dump... ");
3032 if (G1PrintReachableBaseFile == NULL) {
3033 gclog_or_tty->print_cr(" #### error: no base file defined");
3034 return;
3035 }
3037 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
3038 (JVM_MAXPATHLEN - 1)) {
3039 gclog_or_tty->print_cr(" #### error: file name too long");
3040 return;
3041 }
3043 char file_name[JVM_MAXPATHLEN];
3044 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
3045 gclog_or_tty->print_cr(" dumping to file %s", file_name);
3047 fileStream fout(file_name);
3048 if (!fout.is_open()) {
3049 gclog_or_tty->print_cr(" #### error: could not open file");
3050 return;
3051 }
3053 outputStream* out = &fout;
3054 out->print_cr("-- USING %s", verify_option_to_tams(vo));
3055 out->cr();
3057 out->print_cr("--- ITERATING OVER REGIONS");
3058 out->cr();
3059 PrintReachableRegionClosure rcl(out, vo, all);
3060 _g1h->heap_region_iterate(&rcl);
3061 out->cr();
3063 gclog_or_tty->print_cr(" done");
3064 gclog_or_tty->flush();
3065 }
3067 #endif // PRODUCT
3069 // This note is for drainAllSATBBuffers and the code in between.
3070 // In the future we could reuse a task to do this work during an
3071 // evacuation pause (since now tasks are not active and can be claimed
3072 // during an evacuation pause). This was a late change to the code and
3073 // is currently not being taken advantage of.
3075 void ConcurrentMark::deal_with_reference(oop obj) {
3076 if (verbose_high()) {
3077 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
3078 (void*) obj);
3079 }
3081 HeapWord* objAddr = (HeapWord*) obj;
3082 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3083 if (_g1h->is_in_g1_reserved(objAddr)) {
3084 assert(obj != NULL, "null check is implicit");
3085 if (!_nextMarkBitMap->isMarked(objAddr)) {
3086 // Only get the containing region if the object is not marked on the
3087 // bitmap (otherwise, it's a waste of time since we won't do
3088 // anything with it).
3089 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
3090 if (!hr->obj_allocated_since_next_marking(obj)) {
3091 if (verbose_high()) {
3092 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
3093 "marked", (void*) obj);
3094 }
3096 // we need to mark it first
3097 if (_nextMarkBitMap->parMark(objAddr)) {
3098 // No OrderAccess:store_load() is needed. It is implicit in the
3099 // CAS done in parMark(objAddr) above
3100 HeapWord* finger = _finger;
3101 if (objAddr < finger) {
3102 if (verbose_high()) {
3103 gclog_or_tty->print_cr("[global] below the global finger "
3104 "("PTR_FORMAT"), pushing it", finger);
3105 }
3106 if (!mark_stack_push(obj)) {
3107 if (verbose_low()) {
3108 gclog_or_tty->print_cr("[global] global stack overflow during "
3109 "deal_with_reference");
3110 }
3111 }
3112 }
3113 }
3114 }
3115 }
3116 }
3117 }
3119 class CMGlobalObjectClosure : public ObjectClosure {
3120 private:
3121 ConcurrentMark* _cm;
3123 public:
3124 void do_object(oop obj) {
3125 _cm->deal_with_reference(obj);
3126 }
3128 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
3129 };
3131 void ConcurrentMark::drainAllSATBBuffers() {
3132 guarantee(false, "drainAllSATBBuffers(): don't call this any more");
3134 CMGlobalObjectClosure oc(this);
3135 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3136 satb_mq_set.set_closure(&oc);
3138 while (satb_mq_set.apply_closure_to_completed_buffer()) {
3139 if (verbose_medium()) {
3140 gclog_or_tty->print_cr("[global] processed an SATB buffer");
3141 }
3142 }
3144 // no need to check whether we should do this, as this is only
3145 // called during an evacuation pause
3146 satb_mq_set.iterate_closure_all_threads();
3148 satb_mq_set.set_closure(NULL);
3149 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
3150 }
3152 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
3153 // Note we are overriding the read-only view of the prev map here, via
3154 // the cast.
3155 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
3156 }
3158 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
3159 _nextMarkBitMap->clearRange(mr);
3160 }
3162 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
3163 clearRangePrevBitmap(mr);
3164 clearRangeNextBitmap(mr);
3165 }
3167 HeapRegion*
3168 ConcurrentMark::claim_region(int task_num) {
3169 // "checkpoint" the finger
3170 HeapWord* finger = _finger;
3172 // _heap_end will not change underneath our feet; it only changes at
3173 // yield points.
3174 while (finger < _heap_end) {
3175 assert(_g1h->is_in_g1_reserved(finger), "invariant");
3177 // Note on how this code handles humongous regions. In the
3178 // normal case the finger will reach the start of a "starts
3179 // humongous" (SH) region. Its end will either be the end of the
3180 // last "continues humongous" (CH) region in the sequence, or the
3181 // standard end of the SH region (if the SH is the only region in
3182 // the sequence). That way claim_region() will skip over the CH
3183 // regions. However, there is a subtle race between a CM thread
3184 // executing this method and a mutator thread doing a humongous
3185 // object allocation. The two are not mutually exclusive as the CM
3186 // thread does not need to hold the Heap_lock when it gets
3187 // here. So there is a chance that claim_region() will come across
3188 // a free region that's in the progress of becoming a SH or a CH
3189 // region. In the former case, it will either
3190 // a) Miss the update to the region's end, in which case it will
3191 // visit every subsequent CH region, will find their bitmaps
3192 // empty, and do nothing, or
3193 // b) Will observe the update of the region's end (in which case
3194 // it will skip the subsequent CH regions).
3195 // If it comes across a region that suddenly becomes CH, the
3196 // scenario will be similar to b). So, the race between
3197 // claim_region() and a humongous object allocation might force us
3198 // to do a bit of unnecessary work (due to some unnecessary bitmap
3199 // iterations) but it should not introduce and correctness issues.
3200 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
3201 HeapWord* bottom = curr_region->bottom();
3202 HeapWord* end = curr_region->end();
3203 HeapWord* limit = curr_region->next_top_at_mark_start();
3205 if (verbose_low()) {
3206 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
3207 "["PTR_FORMAT", "PTR_FORMAT"), "
3208 "limit = "PTR_FORMAT,
3209 task_num, curr_region, bottom, end, limit);
3210 }
3212 // Is the gap between reading the finger and doing the CAS too long?
3213 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
3214 if (res == finger) {
3215 // we succeeded
3217 // notice that _finger == end cannot be guaranteed here since,
3218 // someone else might have moved the finger even further
3219 assert(_finger >= end, "the finger should have moved forward");
3221 if (verbose_low()) {
3222 gclog_or_tty->print_cr("[%d] we were successful with region = "
3223 PTR_FORMAT, task_num, curr_region);
3224 }
3226 if (limit > bottom) {
3227 if (verbose_low()) {
3228 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
3229 "returning it ", task_num, curr_region);
3230 }
3231 return curr_region;
3232 } else {
3233 assert(limit == bottom,
3234 "the region limit should be at bottom");
3235 if (verbose_low()) {
3236 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
3237 "returning NULL", task_num, curr_region);
3238 }
3239 // we return NULL and the caller should try calling
3240 // claim_region() again.
3241 return NULL;
3242 }
3243 } else {
3244 assert(_finger > finger, "the finger should have moved forward");
3245 if (verbose_low()) {
3246 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
3247 "global finger = "PTR_FORMAT", "
3248 "our finger = "PTR_FORMAT,
3249 task_num, _finger, finger);
3250 }
3252 // read it again
3253 finger = _finger;
3254 }
3255 }
3257 return NULL;
3258 }
3260 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
3261 guarantee(false, "invalidate_aborted_regions_in_cset(): "
3262 "don't call this any more");
3264 bool result = false;
3265 for (int i = 0; i < (int)_max_task_num; ++i) {
3266 CMTask* the_task = _tasks[i];
3267 MemRegion mr = the_task->aborted_region();
3268 if (mr.start() != NULL) {
3269 assert(mr.end() != NULL, "invariant");
3270 assert(mr.word_size() > 0, "invariant");
3271 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3272 assert(hr != NULL, "invariant");
3273 if (hr->in_collection_set()) {
3274 // The region points into the collection set
3275 the_task->set_aborted_region(MemRegion());
3276 result = true;
3277 }
3278 }
3279 }
3280 return result;
3281 }
3283 bool ConcurrentMark::has_aborted_regions() {
3284 for (int i = 0; i < (int)_max_task_num; ++i) {
3285 CMTask* the_task = _tasks[i];
3286 MemRegion mr = the_task->aborted_region();
3287 if (mr.start() != NULL) {
3288 assert(mr.end() != NULL, "invariant");
3289 assert(mr.word_size() > 0, "invariant");
3290 return true;
3291 }
3292 }
3293 return false;
3294 }
3296 void ConcurrentMark::oops_do(OopClosure* cl) {
3297 if (_markStack.size() > 0 && verbose_low()) {
3298 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
3299 "size = %d", _markStack.size());
3300 }
3301 // we first iterate over the contents of the mark stack...
3302 _markStack.oops_do(cl);
3304 for (int i = 0; i < (int)_max_task_num; ++i) {
3305 OopTaskQueue* queue = _task_queues->queue((int)i);
3307 if (queue->size() > 0 && verbose_low()) {
3308 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
3309 "size = %d", i, queue->size());
3310 }
3312 // ...then over the contents of the all the task queues.
3313 queue->oops_do(cl);
3314 }
3315 }
3317 #ifndef PRODUCT
3318 enum VerifyNoCSetOopsPhase {
3319 VerifyNoCSetOopsStack,
3320 VerifyNoCSetOopsQueues,
3321 VerifyNoCSetOopsSATBCompleted,
3322 VerifyNoCSetOopsSATBThread
3323 };
3325 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
3326 private:
3327 G1CollectedHeap* _g1h;
3328 VerifyNoCSetOopsPhase _phase;
3329 int _info;
3331 const char* phase_str() {
3332 switch (_phase) {
3333 case VerifyNoCSetOopsStack: return "Stack";
3334 case VerifyNoCSetOopsQueues: return "Queue";
3335 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3336 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
3337 default: ShouldNotReachHere();
3338 }
3339 return NULL;
3340 }
3342 void do_object_work(oop obj) {
3343 guarantee(!_g1h->obj_in_cs(obj),
3344 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3345 (void*) obj, phase_str(), _info));
3346 }
3348 public:
3349 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3351 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3352 _phase = phase;
3353 _info = info;
3354 }
3356 virtual void do_oop(oop* p) {
3357 oop obj = oopDesc::load_decode_heap_oop(p);
3358 do_object_work(obj);
3359 }
3361 virtual void do_oop(narrowOop* p) {
3362 // We should not come across narrow oops while scanning marking
3363 // stacks and SATB buffers.
3364 ShouldNotReachHere();
3365 }
3367 virtual void do_object(oop obj) {
3368 do_object_work(obj);
3369 }
3370 };
3372 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3373 bool verify_enqueued_buffers,
3374 bool verify_thread_buffers,
3375 bool verify_fingers) {
3376 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3377 if (!G1CollectedHeap::heap()->mark_in_progress()) {
3378 return;
3379 }
3381 VerifyNoCSetOopsClosure cl;
3383 if (verify_stacks) {
3384 // Verify entries on the global mark stack
3385 cl.set_phase(VerifyNoCSetOopsStack);
3386 _markStack.oops_do(&cl);
3388 // Verify entries on the task queues
3389 for (int i = 0; i < (int) _max_task_num; i += 1) {
3390 cl.set_phase(VerifyNoCSetOopsQueues, i);
3391 OopTaskQueue* queue = _task_queues->queue(i);
3392 queue->oops_do(&cl);
3393 }
3394 }
3396 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3398 // Verify entries on the enqueued SATB buffers
3399 if (verify_enqueued_buffers) {
3400 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3401 satb_qs.iterate_completed_buffers_read_only(&cl);
3402 }
3404 // Verify entries on the per-thread SATB buffers
3405 if (verify_thread_buffers) {
3406 cl.set_phase(VerifyNoCSetOopsSATBThread);
3407 satb_qs.iterate_thread_buffers_read_only(&cl);
3408 }
3410 if (verify_fingers) {
3411 // Verify the global finger
3412 HeapWord* global_finger = finger();
3413 if (global_finger != NULL && global_finger < _heap_end) {
3414 // The global finger always points to a heap region boundary. We
3415 // use heap_region_containing_raw() to get the containing region
3416 // given that the global finger could be pointing to a free region
3417 // which subsequently becomes continues humongous. If that
3418 // happens, heap_region_containing() will return the bottom of the
3419 // corresponding starts humongous region and the check below will
3420 // not hold any more.
3421 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3422 guarantee(global_finger == global_hr->bottom(),
3423 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3424 global_finger, HR_FORMAT_PARAMS(global_hr)));
3425 }
3427 // Verify the task fingers
3428 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3429 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3430 CMTask* task = _tasks[i];
3431 HeapWord* task_finger = task->finger();
3432 if (task_finger != NULL && task_finger < _heap_end) {
3433 // See above note on the global finger verification.
3434 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3435 guarantee(task_finger == task_hr->bottom() ||
3436 !task_hr->in_collection_set(),
3437 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3438 task_finger, HR_FORMAT_PARAMS(task_hr)));
3439 }
3440 }
3441 }
3442 }
3443 #endif // PRODUCT
3445 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
3446 _markStack.setEmpty();
3447 _markStack.clear_overflow();
3448 _regionStack.setEmpty();
3449 _regionStack.clear_overflow();
3450 if (clear_overflow) {
3451 clear_has_overflown();
3452 } else {
3453 assert(has_overflown(), "pre-condition");
3454 }
3455 _finger = _heap_start;
3457 for (int i = 0; i < (int)_max_task_num; ++i) {
3458 OopTaskQueue* queue = _task_queues->queue(i);
3459 queue->set_empty();
3460 // Clear any partial regions from the CMTasks
3461 _tasks[i]->clear_aborted_region();
3462 }
3463 }
3465 // Aggregate the counting data that was constructed concurrently
3466 // with marking.
3467 class AggregateCountDataHRClosure: public HeapRegionClosure {
3468 ConcurrentMark* _cm;
3469 BitMap* _cm_card_bm;
3470 size_t _max_task_num;
3472 public:
3473 AggregateCountDataHRClosure(ConcurrentMark *cm,
3474 BitMap* cm_card_bm,
3475 size_t max_task_num) :
3476 _cm(cm), _cm_card_bm(cm_card_bm),
3477 _max_task_num(max_task_num) { }
3479 bool is_card_aligned(HeapWord* p) {
3480 return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
3481 }
3483 bool doHeapRegion(HeapRegion* hr) {
3484 if (hr->continuesHumongous()) {
3485 // We will ignore these here and process them when their
3486 // associated "starts humongous" region is processed.
3487 // Note that we cannot rely on their associated
3488 // "starts humongous" region to have their bit set to 1
3489 // since, due to the region chunking in the parallel region
3490 // iteration, a "continues humongous" region might be visited
3491 // before its associated "starts humongous".
3492 return false;
3493 }
3495 HeapWord* start = hr->bottom();
3496 HeapWord* limit = hr->next_top_at_mark_start();
3497 HeapWord* end = hr->end();
3499 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3500 err_msg("Preconditions not met - "
3501 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3502 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3503 start, limit, hr->top(), hr->end()));
3505 assert(hr->next_marked_bytes() == 0, "Precondition");
3507 if (start == limit) {
3508 // NTAMS of this region has not been set so nothing to do.
3509 return false;
3510 }
3512 assert(is_card_aligned(start), "sanity");
3513 assert(is_card_aligned(end), "sanity");
3515 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3516 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3517 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3519 // If ntams is not card aligned then we bump the index for
3520 // limit so that we get the card spanning ntams.
3521 if (!is_card_aligned(limit)) {
3522 limit_idx += 1;
3523 }
3525 assert(limit_idx <= end_idx, "or else use atomics");
3527 // Aggregate the "stripe" in the count data associated with hr.
3528 size_t hrs_index = hr->hrs_index();
3529 size_t marked_bytes = 0;
3531 for (int i = 0; (size_t)i < _max_task_num; i += 1) {
3532 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3533 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3535 // Fetch the marked_bytes in this region for task i and
3536 // add it to the running total for this region.
3537 marked_bytes += marked_bytes_array[hrs_index];
3539 // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
3540 // into the global card bitmap.
3541 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3543 while (scan_idx < limit_idx) {
3544 assert(task_card_bm->at(scan_idx) == true, "should be");
3545 _cm_card_bm->set_bit(scan_idx);
3546 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3548 // BitMap::get_next_one_offset() can handle the case when
3549 // its left_offset parameter is greater than its right_offset
3550 // parameter. If does, however, have an early exit if
3551 // left_offset == right_offset. So let's limit the value
3552 // passed in for left offset here.
3553 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3554 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3555 }
3556 }
3558 // Update the marked bytes for this region.
3559 hr->add_to_marked_bytes(marked_bytes);
3561 // Now set the top at count to NTAMS.
3562 hr->set_top_at_conc_mark_count(limit);
3564 // Next heap region
3565 return false;
3566 }
3567 };
3569 class G1AggregateCountDataTask: public AbstractGangTask {
3570 protected:
3571 G1CollectedHeap* _g1h;
3572 ConcurrentMark* _cm;
3573 BitMap* _cm_card_bm;
3574 size_t _max_task_num;
3575 int _active_workers;
3577 public:
3578 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3579 ConcurrentMark* cm,
3580 BitMap* cm_card_bm,
3581 size_t max_task_num,
3582 int n_workers) :
3583 AbstractGangTask("Count Aggregation"),
3584 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3585 _max_task_num(max_task_num),
3586 _active_workers(n_workers) { }
3588 void work(uint worker_id) {
3589 AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
3591 if (G1CollectedHeap::use_parallel_gc_threads()) {
3592 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3593 _active_workers,
3594 HeapRegion::AggregateCountClaimValue);
3595 } else {
3596 _g1h->heap_region_iterate(&cl);
3597 }
3598 }
3599 };
3602 void ConcurrentMark::aggregate_count_data() {
3603 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3604 _g1h->workers()->active_workers() :
3605 1);
3607 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3608 _max_task_num, n_workers);
3610 if (G1CollectedHeap::use_parallel_gc_threads()) {
3611 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3612 "sanity check");
3613 _g1h->set_par_threads(n_workers);
3614 _g1h->workers()->run_task(&g1_par_agg_task);
3615 _g1h->set_par_threads(0);
3617 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3618 "sanity check");
3619 _g1h->reset_heap_region_claim_values();
3620 } else {
3621 g1_par_agg_task.work(0);
3622 }
3623 }
3625 // Clear the per-worker arrays used to store the per-region counting data
3626 void ConcurrentMark::clear_all_count_data() {
3627 // Clear the global card bitmap - it will be filled during
3628 // liveness count aggregation (during remark) and the
3629 // final counting task.
3630 _card_bm.clear();
3632 // Clear the global region bitmap - it will be filled as part
3633 // of the final counting task.
3634 _region_bm.clear();
3636 size_t max_regions = _g1h->max_regions();
3637 assert(_max_task_num != 0, "unitialized");
3639 for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3640 BitMap* task_card_bm = count_card_bitmap_for(i);
3641 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3643 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3644 assert(marked_bytes_array != NULL, "uninitialized");
3646 memset(marked_bytes_array, 0, (max_regions * sizeof(size_t)));
3647 task_card_bm->clear();
3648 }
3649 }
3651 void ConcurrentMark::print_stats() {
3652 if (verbose_stats()) {
3653 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3654 for (size_t i = 0; i < _active_tasks; ++i) {
3655 _tasks[i]->print_stats();
3656 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3657 }
3658 }
3659 }
3661 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
3663 class CSetMarkOopClosure: public OopClosure {
3664 friend class CSetMarkBitMapClosure;
3666 G1CollectedHeap* _g1h;
3667 CMBitMap* _bm;
3668 ConcurrentMark* _cm;
3669 oop* _ms;
3670 jint* _array_ind_stack;
3671 int _ms_size;
3672 int _ms_ind;
3673 int _array_increment;
3674 uint _worker_id;
3676 bool push(oop obj, int arr_ind = 0) {
3677 if (_ms_ind == _ms_size) {
3678 gclog_or_tty->print_cr("Mark stack is full.");
3679 return false;
3680 }
3681 _ms[_ms_ind] = obj;
3682 if (obj->is_objArray()) {
3683 _array_ind_stack[_ms_ind] = arr_ind;
3684 }
3685 _ms_ind++;
3686 return true;
3687 }
3689 oop pop() {
3690 if (_ms_ind == 0) {
3691 return NULL;
3692 } else {
3693 _ms_ind--;
3694 return _ms[_ms_ind];
3695 }
3696 }
3698 template <class T> bool drain() {
3699 while (_ms_ind > 0) {
3700 oop obj = pop();
3701 assert(obj != NULL, "Since index was non-zero.");
3702 if (obj->is_objArray()) {
3703 jint arr_ind = _array_ind_stack[_ms_ind];
3704 objArrayOop aobj = objArrayOop(obj);
3705 jint len = aobj->length();
3706 jint next_arr_ind = arr_ind + _array_increment;
3707 if (next_arr_ind < len) {
3708 push(obj, next_arr_ind);
3709 }
3710 // Now process this portion of this one.
3711 int lim = MIN2(next_arr_ind, len);
3712 for (int j = arr_ind; j < lim; j++) {
3713 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
3714 }
3715 } else {
3716 obj->oop_iterate(this);
3717 }
3718 if (abort()) return false;
3719 }
3720 return true;
3721 }
3723 public:
3724 CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, uint worker_id) :
3725 _g1h(G1CollectedHeap::heap()),
3726 _cm(cm),
3727 _bm(cm->nextMarkBitMap()),
3728 _ms_size(ms_size), _ms_ind(0),
3729 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
3730 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
3731 _array_increment(MAX2(ms_size/8, 16)),
3732 _worker_id(worker_id) { }
3734 ~CSetMarkOopClosure() {
3735 FREE_C_HEAP_ARRAY(oop, _ms);
3736 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
3737 }
3739 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3740 virtual void do_oop( oop* p) { do_oop_work(p); }
3742 template <class T> void do_oop_work(T* p) {
3743 T heap_oop = oopDesc::load_heap_oop(p);
3744 if (oopDesc::is_null(heap_oop)) return;
3745 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
3746 if (obj->is_forwarded()) {
3747 // If the object has already been forwarded, we have to make sure
3748 // that it's marked. So follow the forwarding pointer. Note that
3749 // this does the right thing for self-forwarding pointers in the
3750 // evacuation failure case.
3751 obj = obj->forwardee();
3752 }
3753 HeapRegion* hr = _g1h->heap_region_containing(obj);
3754 if (hr != NULL) {
3755 if (hr->in_collection_set()) {
3756 if (_g1h->is_obj_ill(obj)) {
3757 if (_bm->parMark((HeapWord*)obj)) {
3758 if (!push(obj)) {
3759 gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3760 set_abort();
3761 }
3762 }
3763 }
3764 } else {
3765 // Outside the collection set; we need to gray it
3766 _cm->deal_with_reference(obj);
3767 }
3768 }
3769 }
3770 };
3772 class CSetMarkBitMapClosure: public BitMapClosure {
3773 G1CollectedHeap* _g1h;
3774 CMBitMap* _bitMap;
3775 ConcurrentMark* _cm;
3776 CSetMarkOopClosure _oop_cl;
3777 uint _worker_id;
3779 public:
3780 CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_id) :
3781 _g1h(G1CollectedHeap::heap()),
3782 _bitMap(cm->nextMarkBitMap()),
3783 _oop_cl(cm, ms_size, worker_id),
3784 _worker_id(worker_id) { }
3786 bool do_bit(size_t offset) {
3787 // convert offset into a HeapWord*
3788 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3789 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3790 "address out of range");
3791 assert(_bitMap->isMarked(addr), "tautology");
3792 oop obj = oop(addr);
3793 if (!obj->is_forwarded()) {
3794 if (!_oop_cl.push(obj)) return false;
3795 if (UseCompressedOops) {
3796 if (!_oop_cl.drain<narrowOop>()) return false;
3797 } else {
3798 if (!_oop_cl.drain<oop>()) return false;
3799 }
3800 }
3801 // Otherwise...
3802 return true;
3803 }
3804 };
3806 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3807 CMBitMap* _bm;
3808 CSetMarkBitMapClosure _bit_cl;
3809 uint _worker_id;
3811 enum SomePrivateConstants {
3812 MSSize = 1000
3813 };
3815 public:
3816 CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_id) :
3817 _bm(cm->nextMarkBitMap()),
3818 _bit_cl(cm, MSSize, worker_id),
3819 _worker_id(worker_id) { }
3821 bool doHeapRegion(HeapRegion* hr) {
3822 if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3823 // The current worker has successfully claimed the region.
3824 if (!hr->evacuation_failed()) {
3825 MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3826 if (!mr.is_empty()) {
3827 bool done = false;
3828 while (!done) {
3829 done = _bm->iterate(&_bit_cl, mr);
3830 }
3831 }
3832 }
3833 }
3834 return false;
3835 }
3836 };
3838 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3839 protected:
3840 G1CollectedHeap* _g1h;
3841 ConcurrentMark* _cm;
3843 public:
3844 G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3845 ConcurrentMark* cm) :
3846 AbstractGangTask("Complete Mark in CSet"),
3847 _g1h(g1h), _cm(cm) { }
3849 void work(uint worker_id) {
3850 CompleteMarkingInCSetHRClosure cmplt(_cm, worker_id);
3851 HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
3852 _g1h->collection_set_iterate_from(hr, &cmplt);
3853 }
3854 };
3856 void ConcurrentMark::complete_marking_in_collection_set() {
3857 guarantee(false, "complete_marking_in_collection_set(): "
3858 "don't call this any more");
3860 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3862 if (!g1h->mark_in_progress()) {
3863 g1h->g1_policy()->record_mark_closure_time(0.0);
3864 return;
3865 }
3867 double start = os::elapsedTime();
3868 G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3870 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3872 if (G1CollectedHeap::use_parallel_gc_threads()) {
3873 int n_workers = g1h->workers()->active_workers();
3874 g1h->set_par_threads(n_workers);
3875 g1h->workers()->run_task(&complete_mark_task);
3876 g1h->set_par_threads(0);
3877 } else {
3878 complete_mark_task.work(0);
3879 }
3881 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3883 // Reset the claim values in the regions in the collection set.
3884 g1h->reset_cset_heap_region_claim_values();
3886 assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3888 double end_time = os::elapsedTime();
3889 double elapsed_time_ms = (end_time - start) * 1000.0;
3890 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3891 }
3893 // The next two methods deal with the following optimisation. Some
3894 // objects are gray by being marked and located above the finger. If
3895 // they are copied, during an evacuation pause, below the finger then
3896 // the need to be pushed on the stack. The observation is that, if
3897 // there are no regions in the collection set located above the
3898 // finger, then the above cannot happen, hence we do not need to
3899 // explicitly gray any objects when copying them to below the
3900 // finger. The global stack will be scanned to ensure that, if it
3901 // points to objects being copied, it will update their
3902 // location. There is a tricky situation with the gray objects in
3903 // region stack that are being coped, however. See the comment in
3904 // newCSet().
3906 void ConcurrentMark::newCSet() {
3907 guarantee(false, "newCSet(): don't call this any more");
3909 if (!concurrent_marking_in_progress()) {
3910 // nothing to do if marking is not in progress
3911 return;
3912 }
3914 // find what the lowest finger is among the global and local fingers
3915 _min_finger = _finger;
3916 for (int i = 0; i < (int)_max_task_num; ++i) {
3917 CMTask* task = _tasks[i];
3918 HeapWord* task_finger = task->finger();
3919 if (task_finger != NULL && task_finger < _min_finger) {
3920 _min_finger = task_finger;
3921 }
3922 }
3924 _should_gray_objects = false;
3926 // This fixes a very subtle and fustrating bug. It might be the case
3927 // that, during en evacuation pause, heap regions that contain
3928 // objects that are gray (by being in regions contained in the
3929 // region stack) are included in the collection set. Since such gray
3930 // objects will be moved, and because it's not easy to redirect
3931 // region stack entries to point to a new location (because objects
3932 // in one region might be scattered to multiple regions after they
3933 // are copied), one option is to ensure that all marked objects
3934 // copied during a pause are pushed on the stack. Notice, however,
3935 // that this problem can only happen when the region stack is not
3936 // empty during an evacuation pause. So, we make the fix a bit less
3937 // conservative and ensure that regions are pushed on the stack,
3938 // irrespective whether all collection set regions are below the
3939 // finger, if the region stack is not empty. This is expected to be
3940 // a rare case, so I don't think it's necessary to be smarted about it.
3941 if (!region_stack_empty() || has_aborted_regions()) {
3942 _should_gray_objects = true;
3943 }
3944 }
3946 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3947 guarantee(false, "registerCSetRegion(): don't call this any more");
3949 if (!concurrent_marking_in_progress()) return;
3951 HeapWord* region_end = hr->end();
3952 if (region_end > _min_finger) {
3953 _should_gray_objects = true;
3954 }
3955 }
3957 // Resets the region fields of active CMTasks whose values point
3958 // into the collection set.
3959 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3960 guarantee(false, "reset_active_task_region_fields_in_cset(): "
3961 "don't call this any more");
3963 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3964 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3966 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3967 CMTask* task = _tasks[i];
3968 HeapWord* task_finger = task->finger();
3969 if (task_finger != NULL) {
3970 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3971 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3972 if (finger_region->in_collection_set()) {
3973 // The task's current region is in the collection set.
3974 // This region will be evacuated in the current GC and
3975 // the region fields in the task will be stale.
3976 task->giveup_current_region();
3977 }
3978 }
3979 }
3980 }
3982 // abandon current marking iteration due to a Full GC
3983 void ConcurrentMark::abort() {
3984 // Clear all marks to force marking thread to do nothing
3985 _nextMarkBitMap->clearAll();
3986 // Clear the liveness counting data
3987 clear_all_count_data();
3988 // Empty mark stack
3989 clear_marking_state();
3990 for (int i = 0; i < (int)_max_task_num; ++i) {
3991 _tasks[i]->clear_region_fields();
3992 }
3993 _has_aborted = true;
3995 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3996 satb_mq_set.abandon_partial_marking();
3997 // This can be called either during or outside marking, we'll read
3998 // the expected_active value from the SATB queue set.
3999 satb_mq_set.set_active_all_threads(
4000 false, /* new active value */
4001 satb_mq_set.is_active() /* expected_active */);
4002 }
4004 static void print_ms_time_info(const char* prefix, const char* name,
4005 NumberSeq& ns) {
4006 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
4007 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
4008 if (ns.num() > 0) {
4009 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
4010 prefix, ns.sd(), ns.maximum());
4011 }
4012 }
4014 void ConcurrentMark::print_summary_info() {
4015 gclog_or_tty->print_cr(" Concurrent marking:");
4016 print_ms_time_info(" ", "init marks", _init_times);
4017 print_ms_time_info(" ", "remarks", _remark_times);
4018 {
4019 print_ms_time_info(" ", "final marks", _remark_mark_times);
4020 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
4022 }
4023 print_ms_time_info(" ", "cleanups", _cleanup_times);
4024 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
4025 _total_counting_time,
4026 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
4027 (double)_cleanup_times.num()
4028 : 0.0));
4029 if (G1ScrubRemSets) {
4030 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
4031 _total_rs_scrub_time,
4032 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
4033 (double)_cleanup_times.num()
4034 : 0.0));
4035 }
4036 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
4037 (_init_times.sum() + _remark_times.sum() +
4038 _cleanup_times.sum())/1000.0);
4039 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
4040 "(%8.2f s marking).",
4041 cmThread()->vtime_accum(),
4042 cmThread()->vtime_mark_accum());
4043 }
4045 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
4046 _parallel_workers->print_worker_threads_on(st);
4047 }
4049 // We take a break if someone is trying to stop the world.
4050 bool ConcurrentMark::do_yield_check(uint worker_id) {
4051 if (should_yield()) {
4052 if (worker_id == 0) {
4053 _g1h->g1_policy()->record_concurrent_pause();
4054 }
4055 cmThread()->yield();
4056 if (worker_id == 0) {
4057 _g1h->g1_policy()->record_concurrent_pause_end();
4058 }
4059 return true;
4060 } else {
4061 return false;
4062 }
4063 }
4065 bool ConcurrentMark::should_yield() {
4066 return cmThread()->should_yield();
4067 }
4069 bool ConcurrentMark::containing_card_is_marked(void* p) {
4070 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
4071 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
4072 }
4074 bool ConcurrentMark::containing_cards_are_marked(void* start,
4075 void* last) {
4076 return containing_card_is_marked(start) &&
4077 containing_card_is_marked(last);
4078 }
4080 #ifndef PRODUCT
4081 // for debugging purposes
4082 void ConcurrentMark::print_finger() {
4083 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
4084 _heap_start, _heap_end, _finger);
4085 for (int i = 0; i < (int) _max_task_num; ++i) {
4086 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
4087 }
4088 gclog_or_tty->print_cr("");
4089 }
4090 #endif
4092 void CMTask::scan_object(oop obj) {
4093 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
4095 if (_cm->verbose_high()) {
4096 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
4097 _task_id, (void*) obj);
4098 }
4100 size_t obj_size = obj->size();
4101 _words_scanned += obj_size;
4103 obj->oop_iterate(_cm_oop_closure);
4104 statsOnly( ++_objs_scanned );
4105 check_limits();
4106 }
4108 // Closure for iteration over bitmaps
4109 class CMBitMapClosure : public BitMapClosure {
4110 private:
4111 // the bitmap that is being iterated over
4112 CMBitMap* _nextMarkBitMap;
4113 ConcurrentMark* _cm;
4114 CMTask* _task;
4115 // true if we're scanning a heap region claimed by the task (so that
4116 // we move the finger along), false if we're not, i.e. currently when
4117 // scanning a heap region popped from the region stack (so that we
4118 // do not move the task finger along; it'd be a mistake if we did so).
4119 bool _scanning_heap_region;
4121 public:
4122 CMBitMapClosure(CMTask *task,
4123 ConcurrentMark* cm,
4124 CMBitMap* nextMarkBitMap)
4125 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
4127 void set_scanning_heap_region(bool scanning_heap_region) {
4128 _scanning_heap_region = scanning_heap_region;
4129 }
4131 bool do_bit(size_t offset) {
4132 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
4133 assert(_nextMarkBitMap->isMarked(addr), "invariant");
4134 assert( addr < _cm->finger(), "invariant");
4136 if (_scanning_heap_region) {
4137 statsOnly( _task->increase_objs_found_on_bitmap() );
4138 assert(addr >= _task->finger(), "invariant");
4139 // We move that task's local finger along.
4140 _task->move_finger_to(addr);
4141 } else {
4142 // We move the task's region finger along.
4143 _task->move_region_finger_to(addr);
4144 }
4146 _task->scan_object(oop(addr));
4147 // we only partially drain the local queue and global stack
4148 _task->drain_local_queue(true);
4149 _task->drain_global_stack(true);
4151 // if the has_aborted flag has been raised, we need to bail out of
4152 // the iteration
4153 return !_task->has_aborted();
4154 }
4155 };
4157 // Closure for iterating over objects, currently only used for
4158 // processing SATB buffers.
4159 class CMObjectClosure : public ObjectClosure {
4160 private:
4161 CMTask* _task;
4163 public:
4164 void do_object(oop obj) {
4165 _task->deal_with_reference(obj);
4166 }
4168 CMObjectClosure(CMTask* task) : _task(task) { }
4169 };
4171 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
4172 ConcurrentMark* cm,
4173 CMTask* task)
4174 : _g1h(g1h), _cm(cm), _task(task) {
4175 assert(_ref_processor == NULL, "should be initialized to NULL");
4177 if (G1UseConcMarkReferenceProcessing) {
4178 _ref_processor = g1h->ref_processor_cm();
4179 assert(_ref_processor != NULL, "should not be NULL");
4180 }
4181 }
4183 void CMTask::setup_for_region(HeapRegion* hr) {
4184 // Separated the asserts so that we know which one fires.
4185 assert(hr != NULL,
4186 "claim_region() should have filtered out continues humongous regions");
4187 assert(!hr->continuesHumongous(),
4188 "claim_region() should have filtered out continues humongous regions");
4190 if (_cm->verbose_low()) {
4191 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
4192 _task_id, hr);
4193 }
4195 _curr_region = hr;
4196 _finger = hr->bottom();
4197 update_region_limit();
4198 }
4200 void CMTask::update_region_limit() {
4201 HeapRegion* hr = _curr_region;
4202 HeapWord* bottom = hr->bottom();
4203 HeapWord* limit = hr->next_top_at_mark_start();
4205 if (limit == bottom) {
4206 if (_cm->verbose_low()) {
4207 gclog_or_tty->print_cr("[%d] found an empty region "
4208 "["PTR_FORMAT", "PTR_FORMAT")",
4209 _task_id, bottom, limit);
4210 }
4211 // The region was collected underneath our feet.
4212 // We set the finger to bottom to ensure that the bitmap
4213 // iteration that will follow this will not do anything.
4214 // (this is not a condition that holds when we set the region up,
4215 // as the region is not supposed to be empty in the first place)
4216 _finger = bottom;
4217 } else if (limit >= _region_limit) {
4218 assert(limit >= _finger, "peace of mind");
4219 } else {
4220 assert(limit < _region_limit, "only way to get here");
4221 // This can happen under some pretty unusual circumstances. An
4222 // evacuation pause empties the region underneath our feet (NTAMS
4223 // at bottom). We then do some allocation in the region (NTAMS
4224 // stays at bottom), followed by the region being used as a GC
4225 // alloc region (NTAMS will move to top() and the objects
4226 // originally below it will be grayed). All objects now marked in
4227 // the region are explicitly grayed, if below the global finger,
4228 // and we do not need in fact to scan anything else. So, we simply
4229 // set _finger to be limit to ensure that the bitmap iteration
4230 // doesn't do anything.
4231 _finger = limit;
4232 }
4234 _region_limit = limit;
4235 }
4237 void CMTask::giveup_current_region() {
4238 assert(_curr_region != NULL, "invariant");
4239 if (_cm->verbose_low()) {
4240 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
4241 _task_id, _curr_region);
4242 }
4243 clear_region_fields();
4244 }
4246 void CMTask::clear_region_fields() {
4247 // Values for these three fields that indicate that we're not
4248 // holding on to a region.
4249 _curr_region = NULL;
4250 _finger = NULL;
4251 _region_limit = NULL;
4253 _region_finger = NULL;
4254 }
4256 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
4257 if (cm_oop_closure == NULL) {
4258 assert(_cm_oop_closure != NULL, "invariant");
4259 } else {
4260 assert(_cm_oop_closure == NULL, "invariant");
4261 }
4262 _cm_oop_closure = cm_oop_closure;
4263 }
4265 void CMTask::reset(CMBitMap* nextMarkBitMap) {
4266 guarantee(nextMarkBitMap != NULL, "invariant");
4268 if (_cm->verbose_low()) {
4269 gclog_or_tty->print_cr("[%d] resetting", _task_id);
4270 }
4272 _nextMarkBitMap = nextMarkBitMap;
4273 clear_region_fields();
4274 assert(_aborted_region.is_empty(), "should have been cleared");
4276 _calls = 0;
4277 _elapsed_time_ms = 0.0;
4278 _termination_time_ms = 0.0;
4279 _termination_start_time_ms = 0.0;
4281 #if _MARKING_STATS_
4282 _local_pushes = 0;
4283 _local_pops = 0;
4284 _local_max_size = 0;
4285 _objs_scanned = 0;
4286 _global_pushes = 0;
4287 _global_pops = 0;
4288 _global_max_size = 0;
4289 _global_transfers_to = 0;
4290 _global_transfers_from = 0;
4291 _region_stack_pops = 0;
4292 _regions_claimed = 0;
4293 _objs_found_on_bitmap = 0;
4294 _satb_buffers_processed = 0;
4295 _steal_attempts = 0;
4296 _steals = 0;
4297 _aborted = 0;
4298 _aborted_overflow = 0;
4299 _aborted_cm_aborted = 0;
4300 _aborted_yield = 0;
4301 _aborted_timed_out = 0;
4302 _aborted_satb = 0;
4303 _aborted_termination = 0;
4304 #endif // _MARKING_STATS_
4305 }
4307 bool CMTask::should_exit_termination() {
4308 regular_clock_call();
4309 // This is called when we are in the termination protocol. We should
4310 // quit if, for some reason, this task wants to abort or the global
4311 // stack is not empty (this means that we can get work from it).
4312 return !_cm->mark_stack_empty() || has_aborted();
4313 }
4315 void CMTask::reached_limit() {
4316 assert(_words_scanned >= _words_scanned_limit ||
4317 _refs_reached >= _refs_reached_limit ,
4318 "shouldn't have been called otherwise");
4319 regular_clock_call();
4320 }
4322 void CMTask::regular_clock_call() {
4323 if (has_aborted()) return;
4325 // First, we need to recalculate the words scanned and refs reached
4326 // limits for the next clock call.
4327 recalculate_limits();
4329 // During the regular clock call we do the following
4331 // (1) If an overflow has been flagged, then we abort.
4332 if (_cm->has_overflown()) {
4333 set_has_aborted();
4334 return;
4335 }
4337 // If we are not concurrent (i.e. we're doing remark) we don't need
4338 // to check anything else. The other steps are only needed during
4339 // the concurrent marking phase.
4340 if (!concurrent()) return;
4342 // (2) If marking has been aborted for Full GC, then we also abort.
4343 if (_cm->has_aborted()) {
4344 set_has_aborted();
4345 statsOnly( ++_aborted_cm_aborted );
4346 return;
4347 }
4349 double curr_time_ms = os::elapsedVTime() * 1000.0;
4351 // (3) If marking stats are enabled, then we update the step history.
4352 #if _MARKING_STATS_
4353 if (_words_scanned >= _words_scanned_limit) {
4354 ++_clock_due_to_scanning;
4355 }
4356 if (_refs_reached >= _refs_reached_limit) {
4357 ++_clock_due_to_marking;
4358 }
4360 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
4361 _interval_start_time_ms = curr_time_ms;
4362 _all_clock_intervals_ms.add(last_interval_ms);
4364 if (_cm->verbose_medium()) {
4365 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
4366 "scanned = %d%s, refs reached = %d%s",
4367 _task_id, last_interval_ms,
4368 _words_scanned,
4369 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
4370 _refs_reached,
4371 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
4372 }
4373 #endif // _MARKING_STATS_
4375 // (4) We check whether we should yield. If we have to, then we abort.
4376 if (_cm->should_yield()) {
4377 // We should yield. To do this we abort the task. The caller is
4378 // responsible for yielding.
4379 set_has_aborted();
4380 statsOnly( ++_aborted_yield );
4381 return;
4382 }
4384 // (5) We check whether we've reached our time quota. If we have,
4385 // then we abort.
4386 double elapsed_time_ms = curr_time_ms - _start_time_ms;
4387 if (elapsed_time_ms > _time_target_ms) {
4388 set_has_aborted();
4389 _has_timed_out = true;
4390 statsOnly( ++_aborted_timed_out );
4391 return;
4392 }
4394 // (6) Finally, we check whether there are enough completed STAB
4395 // buffers available for processing. If there are, we abort.
4396 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4397 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
4398 if (_cm->verbose_low()) {
4399 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
4400 _task_id);
4401 }
4402 // we do need to process SATB buffers, we'll abort and restart
4403 // the marking task to do so
4404 set_has_aborted();
4405 statsOnly( ++_aborted_satb );
4406 return;
4407 }
4408 }
4410 void CMTask::recalculate_limits() {
4411 _real_words_scanned_limit = _words_scanned + words_scanned_period;
4412 _words_scanned_limit = _real_words_scanned_limit;
4414 _real_refs_reached_limit = _refs_reached + refs_reached_period;
4415 _refs_reached_limit = _real_refs_reached_limit;
4416 }
4418 void CMTask::decrease_limits() {
4419 // This is called when we believe that we're going to do an infrequent
4420 // operation which will increase the per byte scanned cost (i.e. move
4421 // entries to/from the global stack). It basically tries to decrease the
4422 // scanning limit so that the clock is called earlier.
4424 if (_cm->verbose_medium()) {
4425 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
4426 }
4428 _words_scanned_limit = _real_words_scanned_limit -
4429 3 * words_scanned_period / 4;
4430 _refs_reached_limit = _real_refs_reached_limit -
4431 3 * refs_reached_period / 4;
4432 }
4434 void CMTask::move_entries_to_global_stack() {
4435 // local array where we'll store the entries that will be popped
4436 // from the local queue
4437 oop buffer[global_stack_transfer_size];
4439 int n = 0;
4440 oop obj;
4441 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
4442 buffer[n] = obj;
4443 ++n;
4444 }
4446 if (n > 0) {
4447 // we popped at least one entry from the local queue
4449 statsOnly( ++_global_transfers_to; _local_pops += n );
4451 if (!_cm->mark_stack_push(buffer, n)) {
4452 if (_cm->verbose_low()) {
4453 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
4454 _task_id);
4455 }
4456 set_has_aborted();
4457 } else {
4458 // the transfer was successful
4460 if (_cm->verbose_medium()) {
4461 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
4462 _task_id, n);
4463 }
4464 statsOnly( int tmp_size = _cm->mark_stack_size();
4465 if (tmp_size > _global_max_size) {
4466 _global_max_size = tmp_size;
4467 }
4468 _global_pushes += n );
4469 }
4470 }
4472 // this operation was quite expensive, so decrease the limits
4473 decrease_limits();
4474 }
4476 void CMTask::get_entries_from_global_stack() {
4477 // local array where we'll store the entries that will be popped
4478 // from the global stack.
4479 oop buffer[global_stack_transfer_size];
4480 int n;
4481 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
4482 assert(n <= global_stack_transfer_size,
4483 "we should not pop more than the given limit");
4484 if (n > 0) {
4485 // yes, we did actually pop at least one entry
4487 statsOnly( ++_global_transfers_from; _global_pops += n );
4488 if (_cm->verbose_medium()) {
4489 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
4490 _task_id, n);
4491 }
4492 for (int i = 0; i < n; ++i) {
4493 bool success = _task_queue->push(buffer[i]);
4494 // We only call this when the local queue is empty or under a
4495 // given target limit. So, we do not expect this push to fail.
4496 assert(success, "invariant");
4497 }
4499 statsOnly( int tmp_size = _task_queue->size();
4500 if (tmp_size > _local_max_size) {
4501 _local_max_size = tmp_size;
4502 }
4503 _local_pushes += n );
4504 }
4506 // this operation was quite expensive, so decrease the limits
4507 decrease_limits();
4508 }
4510 void CMTask::drain_local_queue(bool partially) {
4511 if (has_aborted()) return;
4513 // Decide what the target size is, depending whether we're going to
4514 // drain it partially (so that other tasks can steal if they run out
4515 // of things to do) or totally (at the very end).
4516 size_t target_size;
4517 if (partially) {
4518 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
4519 } else {
4520 target_size = 0;
4521 }
4523 if (_task_queue->size() > target_size) {
4524 if (_cm->verbose_high()) {
4525 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
4526 _task_id, target_size);
4527 }
4529 oop obj;
4530 bool ret = _task_queue->pop_local(obj);
4531 while (ret) {
4532 statsOnly( ++_local_pops );
4534 if (_cm->verbose_high()) {
4535 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
4536 (void*) obj);
4537 }
4539 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
4540 assert(!_g1h->is_on_master_free_list(
4541 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
4543 scan_object(obj);
4545 if (_task_queue->size() <= target_size || has_aborted()) {
4546 ret = false;
4547 } else {
4548 ret = _task_queue->pop_local(obj);
4549 }
4550 }
4552 if (_cm->verbose_high()) {
4553 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
4554 _task_id, _task_queue->size());
4555 }
4556 }
4557 }
4559 void CMTask::drain_global_stack(bool partially) {
4560 if (has_aborted()) return;
4562 // We have a policy to drain the local queue before we attempt to
4563 // drain the global stack.
4564 assert(partially || _task_queue->size() == 0, "invariant");
4566 // Decide what the target size is, depending whether we're going to
4567 // drain it partially (so that other tasks can steal if they run out
4568 // of things to do) or totally (at the very end). Notice that,
4569 // because we move entries from the global stack in chunks or
4570 // because another task might be doing the same, we might in fact
4571 // drop below the target. But, this is not a problem.
4572 size_t target_size;
4573 if (partially) {
4574 target_size = _cm->partial_mark_stack_size_target();
4575 } else {
4576 target_size = 0;
4577 }
4579 if (_cm->mark_stack_size() > target_size) {
4580 if (_cm->verbose_low()) {
4581 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
4582 _task_id, target_size);
4583 }
4585 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
4586 get_entries_from_global_stack();
4587 drain_local_queue(partially);
4588 }
4590 if (_cm->verbose_low()) {
4591 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
4592 _task_id, _cm->mark_stack_size());
4593 }
4594 }
4595 }
4597 // SATB Queue has several assumptions on whether to call the par or
4598 // non-par versions of the methods. this is why some of the code is
4599 // replicated. We should really get rid of the single-threaded version
4600 // of the code to simplify things.
4601 void CMTask::drain_satb_buffers() {
4602 if (has_aborted()) return;
4604 // We set this so that the regular clock knows that we're in the
4605 // middle of draining buffers and doesn't set the abort flag when it
4606 // notices that SATB buffers are available for draining. It'd be
4607 // very counter productive if it did that. :-)
4608 _draining_satb_buffers = true;
4610 CMObjectClosure oc(this);
4611 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4612 if (G1CollectedHeap::use_parallel_gc_threads()) {
4613 satb_mq_set.set_par_closure(_task_id, &oc);
4614 } else {
4615 satb_mq_set.set_closure(&oc);
4616 }
4618 // This keeps claiming and applying the closure to completed buffers
4619 // until we run out of buffers or we need to abort.
4620 if (G1CollectedHeap::use_parallel_gc_threads()) {
4621 while (!has_aborted() &&
4622 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
4623 if (_cm->verbose_medium()) {
4624 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4625 }
4626 statsOnly( ++_satb_buffers_processed );
4627 regular_clock_call();
4628 }
4629 } else {
4630 while (!has_aborted() &&
4631 satb_mq_set.apply_closure_to_completed_buffer()) {
4632 if (_cm->verbose_medium()) {
4633 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4634 }
4635 statsOnly( ++_satb_buffers_processed );
4636 regular_clock_call();
4637 }
4638 }
4640 if (!concurrent() && !has_aborted()) {
4641 // We should only do this during remark.
4642 if (G1CollectedHeap::use_parallel_gc_threads()) {
4643 satb_mq_set.par_iterate_closure_all_threads(_task_id);
4644 } else {
4645 satb_mq_set.iterate_closure_all_threads();
4646 }
4647 }
4649 _draining_satb_buffers = false;
4651 assert(has_aborted() ||
4652 concurrent() ||
4653 satb_mq_set.completed_buffers_num() == 0, "invariant");
4655 if (G1CollectedHeap::use_parallel_gc_threads()) {
4656 satb_mq_set.set_par_closure(_task_id, NULL);
4657 } else {
4658 satb_mq_set.set_closure(NULL);
4659 }
4661 // again, this was a potentially expensive operation, decrease the
4662 // limits to get the regular clock call early
4663 decrease_limits();
4664 }
4666 void CMTask::drain_region_stack(BitMapClosure* bc) {
4667 assert(_cm->region_stack_empty(), "region stack should be empty");
4668 assert(_aborted_region.is_empty(), "aborted region should be empty");
4669 return;
4671 if (has_aborted()) return;
4673 assert(_region_finger == NULL,
4674 "it should be NULL when we're not scanning a region");
4676 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
4677 if (_cm->verbose_low()) {
4678 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
4679 _task_id, _cm->region_stack_size());
4680 }
4682 MemRegion mr;
4684 if (!_aborted_region.is_empty()) {
4685 mr = _aborted_region;
4686 _aborted_region = MemRegion();
4688 if (_cm->verbose_low()) {
4689 gclog_or_tty->print_cr("[%d] scanning aborted region "
4690 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
4691 _task_id, mr.start(), mr.end());
4692 }
4693 } else {
4694 mr = _cm->region_stack_pop_lock_free();
4695 // it returns MemRegion() if the pop fails
4696 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4697 }
4699 while (mr.start() != NULL) {
4700 if (_cm->verbose_medium()) {
4701 gclog_or_tty->print_cr("[%d] we are scanning region "
4702 "["PTR_FORMAT", "PTR_FORMAT")",
4703 _task_id, mr.start(), mr.end());
4704 }
4706 assert(mr.end() <= _cm->finger(),
4707 "otherwise the region shouldn't be on the stack");
4708 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
4709 if (_nextMarkBitMap->iterate(bc, mr)) {
4710 assert(!has_aborted(),
4711 "cannot abort the task without aborting the bitmap iteration");
4713 // We finished iterating over the region without aborting.
4714 regular_clock_call();
4715 if (has_aborted()) {
4716 mr = MemRegion();
4717 } else {
4718 mr = _cm->region_stack_pop_lock_free();
4719 // it returns MemRegion() if the pop fails
4720 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4721 }
4722 } else {
4723 assert(has_aborted(), "currently the only way to do so");
4725 // The only way to abort the bitmap iteration is to return
4726 // false from the do_bit() method. However, inside the
4727 // do_bit() method we move the _region_finger to point to the
4728 // object currently being looked at. So, if we bail out, we
4729 // have definitely set _region_finger to something non-null.
4730 assert(_region_finger != NULL, "invariant");
4732 // Make sure that any previously aborted region has been
4733 // cleared.
4734 assert(_aborted_region.is_empty(), "aborted region not cleared");
4736 // The iteration was actually aborted. So now _region_finger
4737 // points to the address of the object we last scanned. If we
4738 // leave it there, when we restart this task, we will rescan
4739 // the object. It is easy to avoid this. We move the finger by
4740 // enough to point to the next possible object header (the
4741 // bitmap knows by how much we need to move it as it knows its
4742 // granularity).
4743 MemRegion newRegion =
4744 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4746 if (!newRegion.is_empty()) {
4747 if (_cm->verbose_low()) {
4748 gclog_or_tty->print_cr("[%d] recording unscanned region"
4749 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4750 _task_id,
4751 newRegion.start(), newRegion.end());
4752 }
4753 // Now record the part of the region we didn't scan to
4754 // make sure this task scans it later.
4755 _aborted_region = newRegion;
4756 }
4757 // break from while
4758 mr = MemRegion();
4759 }
4760 _region_finger = NULL;
4761 }
4763 if (_cm->verbose_low()) {
4764 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4765 _task_id, _cm->region_stack_size());
4766 }
4767 }
4768 }
4770 void CMTask::print_stats() {
4771 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4772 _task_id, _calls);
4773 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4774 _elapsed_time_ms, _termination_time_ms);
4775 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4776 _step_times_ms.num(), _step_times_ms.avg(),
4777 _step_times_ms.sd());
4778 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4779 _step_times_ms.maximum(), _step_times_ms.sum());
4781 #if _MARKING_STATS_
4782 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4783 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4784 _all_clock_intervals_ms.sd());
4785 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4786 _all_clock_intervals_ms.maximum(),
4787 _all_clock_intervals_ms.sum());
4788 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4789 _clock_due_to_scanning, _clock_due_to_marking);
4790 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4791 _objs_scanned, _objs_found_on_bitmap);
4792 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4793 _local_pushes, _local_pops, _local_max_size);
4794 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4795 _global_pushes, _global_pops, _global_max_size);
4796 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4797 _global_transfers_to,_global_transfers_from);
4798 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
4799 _regions_claimed, _region_stack_pops);
4800 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4801 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4802 _steal_attempts, _steals);
4803 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4804 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4805 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4806 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4807 _aborted_timed_out, _aborted_satb, _aborted_termination);
4808 #endif // _MARKING_STATS_
4809 }
4811 /*****************************************************************************
4813 The do_marking_step(time_target_ms) method is the building block
4814 of the parallel marking framework. It can be called in parallel
4815 with other invocations of do_marking_step() on different tasks
4816 (but only one per task, obviously) and concurrently with the
4817 mutator threads, or during remark, hence it eliminates the need
4818 for two versions of the code. When called during remark, it will
4819 pick up from where the task left off during the concurrent marking
4820 phase. Interestingly, tasks are also claimable during evacuation
4821 pauses too, since do_marking_step() ensures that it aborts before
4822 it needs to yield.
4824 The data structures that is uses to do marking work are the
4825 following:
4827 (1) Marking Bitmap. If there are gray objects that appear only
4828 on the bitmap (this happens either when dealing with an overflow
4829 or when the initial marking phase has simply marked the roots
4830 and didn't push them on the stack), then tasks claim heap
4831 regions whose bitmap they then scan to find gray objects. A
4832 global finger indicates where the end of the last claimed region
4833 is. A local finger indicates how far into the region a task has
4834 scanned. The two fingers are used to determine how to gray an
4835 object (i.e. whether simply marking it is OK, as it will be
4836 visited by a task in the future, or whether it needs to be also
4837 pushed on a stack).
4839 (2) Local Queue. The local queue of the task which is accessed
4840 reasonably efficiently by the task. Other tasks can steal from
4841 it when they run out of work. Throughout the marking phase, a
4842 task attempts to keep its local queue short but not totally
4843 empty, so that entries are available for stealing by other
4844 tasks. Only when there is no more work, a task will totally
4845 drain its local queue.
4847 (3) Global Mark Stack. This handles local queue overflow. During
4848 marking only sets of entries are moved between it and the local
4849 queues, as access to it requires a mutex and more fine-grain
4850 interaction with it which might cause contention. If it
4851 overflows, then the marking phase should restart and iterate
4852 over the bitmap to identify gray objects. Throughout the marking
4853 phase, tasks attempt to keep the global mark stack at a small
4854 length but not totally empty, so that entries are available for
4855 popping by other tasks. Only when there is no more work, tasks
4856 will totally drain the global mark stack.
4858 (4) Global Region Stack. Entries on it correspond to areas of
4859 the bitmap that need to be scanned since they contain gray
4860 objects. Pushes on the region stack only happen during
4861 evacuation pauses and typically correspond to areas covered by
4862 GC LABS. If it overflows, then the marking phase should restart
4863 and iterate over the bitmap to identify gray objects. Tasks will
4864 try to totally drain the region stack as soon as possible.
4866 (5) SATB Buffer Queue. This is where completed SATB buffers are
4867 made available. Buffers are regularly removed from this queue
4868 and scanned for roots, so that the queue doesn't get too
4869 long. During remark, all completed buffers are processed, as
4870 well as the filled in parts of any uncompleted buffers.
4872 The do_marking_step() method tries to abort when the time target
4873 has been reached. There are a few other cases when the
4874 do_marking_step() method also aborts:
4876 (1) When the marking phase has been aborted (after a Full GC).
4878 (2) When a global overflow (either on the global stack or the
4879 region stack) has been triggered. Before the task aborts, it
4880 will actually sync up with the other tasks to ensure that all
4881 the marking data structures (local queues, stacks, fingers etc.)
4882 are re-initialised so that when do_marking_step() completes,
4883 the marking phase can immediately restart.
4885 (3) When enough completed SATB buffers are available. The
4886 do_marking_step() method only tries to drain SATB buffers right
4887 at the beginning. So, if enough buffers are available, the
4888 marking step aborts and the SATB buffers are processed at
4889 the beginning of the next invocation.
4891 (4) To yield. when we have to yield then we abort and yield
4892 right at the end of do_marking_step(). This saves us from a lot
4893 of hassle as, by yielding we might allow a Full GC. If this
4894 happens then objects will be compacted underneath our feet, the
4895 heap might shrink, etc. We save checking for this by just
4896 aborting and doing the yield right at the end.
4898 From the above it follows that the do_marking_step() method should
4899 be called in a loop (or, otherwise, regularly) until it completes.
4901 If a marking step completes without its has_aborted() flag being
4902 true, it means it has completed the current marking phase (and
4903 also all other marking tasks have done so and have all synced up).
4905 A method called regular_clock_call() is invoked "regularly" (in
4906 sub ms intervals) throughout marking. It is this clock method that
4907 checks all the abort conditions which were mentioned above and
4908 decides when the task should abort. A work-based scheme is used to
4909 trigger this clock method: when the number of object words the
4910 marking phase has scanned or the number of references the marking
4911 phase has visited reach a given limit. Additional invocations to
4912 the method clock have been planted in a few other strategic places
4913 too. The initial reason for the clock method was to avoid calling
4914 vtime too regularly, as it is quite expensive. So, once it was in
4915 place, it was natural to piggy-back all the other conditions on it
4916 too and not constantly check them throughout the code.
4918 *****************************************************************************/
4920 void CMTask::do_marking_step(double time_target_ms,
4921 bool do_stealing,
4922 bool do_termination) {
4923 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4924 assert(concurrent() == _cm->concurrent(), "they should be the same");
4926 assert(concurrent() || _cm->region_stack_empty(),
4927 "the region stack should have been cleared before remark");
4928 assert(concurrent() || !_cm->has_aborted_regions(),
4929 "aborted regions should have been cleared before remark");
4930 assert(_region_finger == NULL,
4931 "this should be non-null only when a region is being scanned");
4933 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4934 assert(_task_queues != NULL, "invariant");
4935 assert(_task_queue != NULL, "invariant");
4936 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4938 assert(!_claimed,
4939 "only one thread should claim this task at any one time");
4941 // OK, this doesn't safeguard again all possible scenarios, as it is
4942 // possible for two threads to set the _claimed flag at the same
4943 // time. But it is only for debugging purposes anyway and it will
4944 // catch most problems.
4945 _claimed = true;
4947 _start_time_ms = os::elapsedVTime() * 1000.0;
4948 statsOnly( _interval_start_time_ms = _start_time_ms );
4950 double diff_prediction_ms =
4951 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4952 _time_target_ms = time_target_ms - diff_prediction_ms;
4954 // set up the variables that are used in the work-based scheme to
4955 // call the regular clock method
4956 _words_scanned = 0;
4957 _refs_reached = 0;
4958 recalculate_limits();
4960 // clear all flags
4961 clear_has_aborted();
4962 _has_timed_out = false;
4963 _draining_satb_buffers = false;
4965 ++_calls;
4967 if (_cm->verbose_low()) {
4968 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4969 "target = %1.2lfms >>>>>>>>>>",
4970 _task_id, _calls, _time_target_ms);
4971 }
4973 // Set up the bitmap and oop closures. Anything that uses them is
4974 // eventually called from this method, so it is OK to allocate these
4975 // statically.
4976 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4977 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4978 set_cm_oop_closure(&cm_oop_closure);
4980 if (_cm->has_overflown()) {
4981 // This can happen if the region stack or the mark stack overflows
4982 // during a GC pause and this task, after a yield point,
4983 // restarts. We have to abort as we need to get into the overflow
4984 // protocol which happens right at the end of this task.
4985 set_has_aborted();
4986 }
4988 // First drain any available SATB buffers. After this, we will not
4989 // look at SATB buffers before the next invocation of this method.
4990 // If enough completed SATB buffers are queued up, the regular clock
4991 // will abort this task so that it restarts.
4992 drain_satb_buffers();
4993 // ...then partially drain the local queue and the global stack
4994 drain_local_queue(true);
4995 drain_global_stack(true);
4997 // Then totally drain the region stack. We will not look at
4998 // it again before the next invocation of this method. Entries on
4999 // the region stack are only added during evacuation pauses, for
5000 // which we have to yield. When we do, we abort the task anyway so
5001 // it will look at the region stack again when it restarts.
5002 bitmap_closure.set_scanning_heap_region(false);
5003 drain_region_stack(&bitmap_closure);
5004 // ...then partially drain the local queue and the global stack
5005 drain_local_queue(true);
5006 drain_global_stack(true);
5008 do {
5009 if (!has_aborted() && _curr_region != NULL) {
5010 // This means that we're already holding on to a region.
5011 assert(_finger != NULL, "if region is not NULL, then the finger "
5012 "should not be NULL either");
5014 // We might have restarted this task after an evacuation pause
5015 // which might have evacuated the region we're holding on to
5016 // underneath our feet. Let's read its limit again to make sure
5017 // that we do not iterate over a region of the heap that
5018 // contains garbage (update_region_limit() will also move
5019 // _finger to the start of the region if it is found empty).
5020 update_region_limit();
5021 // We will start from _finger not from the start of the region,
5022 // as we might be restarting this task after aborting half-way
5023 // through scanning this region. In this case, _finger points to
5024 // the address where we last found a marked object. If this is a
5025 // fresh region, _finger points to start().
5026 MemRegion mr = MemRegion(_finger, _region_limit);
5028 if (_cm->verbose_low()) {
5029 gclog_or_tty->print_cr("[%d] we're scanning part "
5030 "["PTR_FORMAT", "PTR_FORMAT") "
5031 "of region "PTR_FORMAT,
5032 _task_id, _finger, _region_limit, _curr_region);
5033 }
5035 // Let's iterate over the bitmap of the part of the
5036 // region that is left.
5037 bitmap_closure.set_scanning_heap_region(true);
5038 if (mr.is_empty() ||
5039 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
5040 // We successfully completed iterating over the region. Now,
5041 // let's give up the region.
5042 giveup_current_region();
5043 regular_clock_call();
5044 } else {
5045 assert(has_aborted(), "currently the only way to do so");
5046 // The only way to abort the bitmap iteration is to return
5047 // false from the do_bit() method. However, inside the
5048 // do_bit() method we move the _finger to point to the
5049 // object currently being looked at. So, if we bail out, we
5050 // have definitely set _finger to something non-null.
5051 assert(_finger != NULL, "invariant");
5053 // Region iteration was actually aborted. So now _finger
5054 // points to the address of the object we last scanned. If we
5055 // leave it there, when we restart this task, we will rescan
5056 // the object. It is easy to avoid this. We move the finger by
5057 // enough to point to the next possible object header (the
5058 // bitmap knows by how much we need to move it as it knows its
5059 // granularity).
5060 assert(_finger < _region_limit, "invariant");
5061 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
5062 // Check if bitmap iteration was aborted while scanning the last object
5063 if (new_finger >= _region_limit) {
5064 giveup_current_region();
5065 } else {
5066 move_finger_to(new_finger);
5067 }
5068 }
5069 }
5070 // At this point we have either completed iterating over the
5071 // region we were holding on to, or we have aborted.
5073 // We then partially drain the local queue and the global stack.
5074 // (Do we really need this?)
5075 drain_local_queue(true);
5076 drain_global_stack(true);
5078 // Read the note on the claim_region() method on why it might
5079 // return NULL with potentially more regions available for
5080 // claiming and why we have to check out_of_regions() to determine
5081 // whether we're done or not.
5082 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
5083 // We are going to try to claim a new region. We should have
5084 // given up on the previous one.
5085 // Separated the asserts so that we know which one fires.
5086 assert(_curr_region == NULL, "invariant");
5087 assert(_finger == NULL, "invariant");
5088 assert(_region_limit == NULL, "invariant");
5089 if (_cm->verbose_low()) {
5090 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
5091 }
5092 HeapRegion* claimed_region = _cm->claim_region(_task_id);
5093 if (claimed_region != NULL) {
5094 // Yes, we managed to claim one
5095 statsOnly( ++_regions_claimed );
5097 if (_cm->verbose_low()) {
5098 gclog_or_tty->print_cr("[%d] we successfully claimed "
5099 "region "PTR_FORMAT,
5100 _task_id, claimed_region);
5101 }
5103 setup_for_region(claimed_region);
5104 assert(_curr_region == claimed_region, "invariant");
5105 }
5106 // It is important to call the regular clock here. It might take
5107 // a while to claim a region if, for example, we hit a large
5108 // block of empty regions. So we need to call the regular clock
5109 // method once round the loop to make sure it's called
5110 // frequently enough.
5111 regular_clock_call();
5112 }
5114 if (!has_aborted() && _curr_region == NULL) {
5115 assert(_cm->out_of_regions(),
5116 "at this point we should be out of regions");
5117 }
5118 } while ( _curr_region != NULL && !has_aborted());
5120 if (!has_aborted()) {
5121 // We cannot check whether the global stack is empty, since other
5122 // tasks might be pushing objects to it concurrently. We also cannot
5123 // check if the region stack is empty because if a thread is aborting
5124 // it can push a partially done region back.
5125 assert(_cm->out_of_regions(),
5126 "at this point we should be out of regions");
5128 if (_cm->verbose_low()) {
5129 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
5130 }
5132 // Try to reduce the number of available SATB buffers so that
5133 // remark has less work to do.
5134 drain_satb_buffers();
5135 }
5137 // Since we've done everything else, we can now totally drain the
5138 // local queue and global stack.
5139 drain_local_queue(false);
5140 drain_global_stack(false);
5142 // Attempt at work stealing from other task's queues.
5143 if (do_stealing && !has_aborted()) {
5144 // We have not aborted. This means that we have finished all that
5145 // we could. Let's try to do some stealing...
5147 // We cannot check whether the global stack is empty, since other
5148 // tasks might be pushing objects to it concurrently. We also cannot
5149 // check if the region stack is empty because if a thread is aborting
5150 // it can push a partially done region back.
5151 assert(_cm->out_of_regions() && _task_queue->size() == 0,
5152 "only way to reach here");
5154 if (_cm->verbose_low()) {
5155 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
5156 }
5158 while (!has_aborted()) {
5159 oop obj;
5160 statsOnly( ++_steal_attempts );
5162 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
5163 if (_cm->verbose_medium()) {
5164 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
5165 _task_id, (void*) obj);
5166 }
5168 statsOnly( ++_steals );
5170 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
5171 "any stolen object should be marked");
5172 scan_object(obj);
5174 // And since we're towards the end, let's totally drain the
5175 // local queue and global stack.
5176 drain_local_queue(false);
5177 drain_global_stack(false);
5178 } else {
5179 break;
5180 }
5181 }
5182 }
5184 // If we are about to wrap up and go into termination, check if we
5185 // should raise the overflow flag.
5186 if (do_termination && !has_aborted()) {
5187 if (_cm->force_overflow()->should_force()) {
5188 _cm->set_has_overflown();
5189 regular_clock_call();
5190 }
5191 }
5193 // We still haven't aborted. Now, let's try to get into the
5194 // termination protocol.
5195 if (do_termination && !has_aborted()) {
5196 // We cannot check whether the global stack is empty, since other
5197 // tasks might be concurrently pushing objects on it. We also cannot
5198 // check if the region stack is empty because if a thread is aborting
5199 // it can push a partially done region back.
5200 // Separated the asserts so that we know which one fires.
5201 assert(_cm->out_of_regions(), "only way to reach here");
5202 assert(_task_queue->size() == 0, "only way to reach here");
5204 if (_cm->verbose_low()) {
5205 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
5206 }
5208 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
5209 // The CMTask class also extends the TerminatorTerminator class,
5210 // hence its should_exit_termination() method will also decide
5211 // whether to exit the termination protocol or not.
5212 bool finished = _cm->terminator()->offer_termination(this);
5213 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
5214 _termination_time_ms +=
5215 termination_end_time_ms - _termination_start_time_ms;
5217 if (finished) {
5218 // We're all done.
5220 if (_task_id == 0) {
5221 // let's allow task 0 to do this
5222 if (concurrent()) {
5223 assert(_cm->concurrent_marking_in_progress(), "invariant");
5224 // we need to set this to false before the next
5225 // safepoint. This way we ensure that the marking phase
5226 // doesn't observe any more heap expansions.
5227 _cm->clear_concurrent_marking_in_progress();
5228 }
5229 }
5231 // We can now guarantee that the global stack is empty, since
5232 // all other tasks have finished. We separated the guarantees so
5233 // that, if a condition is false, we can immediately find out
5234 // which one.
5235 guarantee(_cm->out_of_regions(), "only way to reach here");
5236 guarantee(_aborted_region.is_empty(), "only way to reach here");
5237 guarantee(_cm->region_stack_empty(), "only way to reach here");
5238 guarantee(_cm->mark_stack_empty(), "only way to reach here");
5239 guarantee(_task_queue->size() == 0, "only way to reach here");
5240 guarantee(!_cm->has_overflown(), "only way to reach here");
5241 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
5242 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
5244 if (_cm->verbose_low()) {
5245 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
5246 }
5247 } else {
5248 // Apparently there's more work to do. Let's abort this task. It
5249 // will restart it and we can hopefully find more things to do.
5251 if (_cm->verbose_low()) {
5252 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
5253 _task_id);
5254 }
5256 set_has_aborted();
5257 statsOnly( ++_aborted_termination );
5258 }
5259 }
5261 // Mainly for debugging purposes to make sure that a pointer to the
5262 // closure which was statically allocated in this frame doesn't
5263 // escape it by accident.
5264 set_cm_oop_closure(NULL);
5265 double end_time_ms = os::elapsedVTime() * 1000.0;
5266 double elapsed_time_ms = end_time_ms - _start_time_ms;
5267 // Update the step history.
5268 _step_times_ms.add(elapsed_time_ms);
5270 if (has_aborted()) {
5271 // The task was aborted for some reason.
5273 statsOnly( ++_aborted );
5275 if (_has_timed_out) {
5276 double diff_ms = elapsed_time_ms - _time_target_ms;
5277 // Keep statistics of how well we did with respect to hitting
5278 // our target only if we actually timed out (if we aborted for
5279 // other reasons, then the results might get skewed).
5280 _marking_step_diffs_ms.add(diff_ms);
5281 }
5283 if (_cm->has_overflown()) {
5284 // This is the interesting one. We aborted because a global
5285 // overflow was raised. This means we have to restart the
5286 // marking phase and start iterating over regions. However, in
5287 // order to do this we have to make sure that all tasks stop
5288 // what they are doing and re-initialise in a safe manner. We
5289 // will achieve this with the use of two barrier sync points.
5291 if (_cm->verbose_low()) {
5292 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
5293 }
5295 _cm->enter_first_sync_barrier(_task_id);
5296 // When we exit this sync barrier we know that all tasks have
5297 // stopped doing marking work. So, it's now safe to
5298 // re-initialise our data structures. At the end of this method,
5299 // task 0 will clear the global data structures.
5301 statsOnly( ++_aborted_overflow );
5303 // We clear the local state of this task...
5304 clear_region_fields();
5306 // ...and enter the second barrier.
5307 _cm->enter_second_sync_barrier(_task_id);
5308 // At this point everything has bee re-initialised and we're
5309 // ready to restart.
5310 }
5312 if (_cm->verbose_low()) {
5313 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
5314 "elapsed = %1.2lfms <<<<<<<<<<",
5315 _task_id, _time_target_ms, elapsed_time_ms);
5316 if (_cm->has_aborted()) {
5317 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
5318 _task_id);
5319 }
5320 }
5321 } else {
5322 if (_cm->verbose_low()) {
5323 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
5324 "elapsed = %1.2lfms <<<<<<<<<<",
5325 _task_id, _time_target_ms, elapsed_time_ms);
5326 }
5327 }
5329 _claimed = false;
5330 }
5332 CMTask::CMTask(int task_id,
5333 ConcurrentMark* cm,
5334 size_t* marked_bytes,
5335 BitMap* card_bm,
5336 CMTaskQueue* task_queue,
5337 CMTaskQueueSet* task_queues)
5338 : _g1h(G1CollectedHeap::heap()),
5339 _task_id(task_id), _cm(cm),
5340 _claimed(false),
5341 _nextMarkBitMap(NULL), _hash_seed(17),
5342 _task_queue(task_queue),
5343 _task_queues(task_queues),
5344 _cm_oop_closure(NULL),
5345 _aborted_region(MemRegion()),
5346 _marked_bytes_array(marked_bytes),
5347 _card_bm(card_bm) {
5348 guarantee(task_queue != NULL, "invariant");
5349 guarantee(task_queues != NULL, "invariant");
5351 statsOnly( _clock_due_to_scanning = 0;
5352 _clock_due_to_marking = 0 );
5354 _marking_step_diffs_ms.add(0.5);
5355 }
5357 // These are formatting macros that are used below to ensure
5358 // consistent formatting. The *_H_* versions are used to format the
5359 // header for a particular value and they should be kept consistent
5360 // with the corresponding macro. Also note that most of the macros add
5361 // the necessary white space (as a prefix) which makes them a bit
5362 // easier to compose.
5364 // All the output lines are prefixed with this string to be able to
5365 // identify them easily in a large log file.
5366 #define G1PPRL_LINE_PREFIX "###"
5368 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
5369 #ifdef _LP64
5370 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
5371 #else // _LP64
5372 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
5373 #endif // _LP64
5375 // For per-region info
5376 #define G1PPRL_TYPE_FORMAT " %-4s"
5377 #define G1PPRL_TYPE_H_FORMAT " %4s"
5378 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
5379 #define G1PPRL_BYTE_H_FORMAT " %9s"
5380 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
5381 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
5383 // For summary info
5384 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
5385 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
5386 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
5387 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
5389 G1PrintRegionLivenessInfoClosure::
5390 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
5391 : _out(out),
5392 _total_used_bytes(0), _total_capacity_bytes(0),
5393 _total_prev_live_bytes(0), _total_next_live_bytes(0),
5394 _hum_used_bytes(0), _hum_capacity_bytes(0),
5395 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
5396 G1CollectedHeap* g1h = G1CollectedHeap::heap();
5397 MemRegion g1_committed = g1h->g1_committed();
5398 MemRegion g1_reserved = g1h->g1_reserved();
5399 double now = os::elapsedTime();
5401 // Print the header of the output.
5402 _out->cr();
5403 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
5404 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
5405 G1PPRL_SUM_ADDR_FORMAT("committed")
5406 G1PPRL_SUM_ADDR_FORMAT("reserved")
5407 G1PPRL_SUM_BYTE_FORMAT("region-size"),
5408 g1_committed.start(), g1_committed.end(),
5409 g1_reserved.start(), g1_reserved.end(),
5410 HeapRegion::GrainBytes);
5411 _out->print_cr(G1PPRL_LINE_PREFIX);
5412 _out->print_cr(G1PPRL_LINE_PREFIX
5413 G1PPRL_TYPE_H_FORMAT
5414 G1PPRL_ADDR_BASE_H_FORMAT
5415 G1PPRL_BYTE_H_FORMAT
5416 G1PPRL_BYTE_H_FORMAT
5417 G1PPRL_BYTE_H_FORMAT
5418 G1PPRL_DOUBLE_H_FORMAT,
5419 "type", "address-range",
5420 "used", "prev-live", "next-live", "gc-eff");
5421 _out->print_cr(G1PPRL_LINE_PREFIX
5422 G1PPRL_TYPE_H_FORMAT
5423 G1PPRL_ADDR_BASE_H_FORMAT
5424 G1PPRL_BYTE_H_FORMAT
5425 G1PPRL_BYTE_H_FORMAT
5426 G1PPRL_BYTE_H_FORMAT
5427 G1PPRL_DOUBLE_H_FORMAT,
5428 "", "",
5429 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
5430 }
5432 // It takes as a parameter a reference to one of the _hum_* fields, it
5433 // deduces the corresponding value for a region in a humongous region
5434 // series (either the region size, or what's left if the _hum_* field
5435 // is < the region size), and updates the _hum_* field accordingly.
5436 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
5437 size_t bytes = 0;
5438 // The > 0 check is to deal with the prev and next live bytes which
5439 // could be 0.
5440 if (*hum_bytes > 0) {
5441 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
5442 *hum_bytes -= bytes;
5443 }
5444 return bytes;
5445 }
5447 // It deduces the values for a region in a humongous region series
5448 // from the _hum_* fields and updates those accordingly. It assumes
5449 // that that _hum_* fields have already been set up from the "starts
5450 // humongous" region and we visit the regions in address order.
5451 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
5452 size_t* capacity_bytes,
5453 size_t* prev_live_bytes,
5454 size_t* next_live_bytes) {
5455 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
5456 *used_bytes = get_hum_bytes(&_hum_used_bytes);
5457 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
5458 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
5459 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
5460 }
5462 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
5463 const char* type = "";
5464 HeapWord* bottom = r->bottom();
5465 HeapWord* end = r->end();
5466 size_t capacity_bytes = r->capacity();
5467 size_t used_bytes = r->used();
5468 size_t prev_live_bytes = r->live_bytes();
5469 size_t next_live_bytes = r->next_live_bytes();
5470 double gc_eff = r->gc_efficiency();
5471 if (r->used() == 0) {
5472 type = "FREE";
5473 } else if (r->is_survivor()) {
5474 type = "SURV";
5475 } else if (r->is_young()) {
5476 type = "EDEN";
5477 } else if (r->startsHumongous()) {
5478 type = "HUMS";
5480 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
5481 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
5482 "they should have been zeroed after the last time we used them");
5483 // Set up the _hum_* fields.
5484 _hum_capacity_bytes = capacity_bytes;
5485 _hum_used_bytes = used_bytes;
5486 _hum_prev_live_bytes = prev_live_bytes;
5487 _hum_next_live_bytes = next_live_bytes;
5488 get_hum_bytes(&used_bytes, &capacity_bytes,
5489 &prev_live_bytes, &next_live_bytes);
5490 end = bottom + HeapRegion::GrainWords;
5491 } else if (r->continuesHumongous()) {
5492 type = "HUMC";
5493 get_hum_bytes(&used_bytes, &capacity_bytes,
5494 &prev_live_bytes, &next_live_bytes);
5495 assert(end == bottom + HeapRegion::GrainWords, "invariant");
5496 } else {
5497 type = "OLD";
5498 }
5500 _total_used_bytes += used_bytes;
5501 _total_capacity_bytes += capacity_bytes;
5502 _total_prev_live_bytes += prev_live_bytes;
5503 _total_next_live_bytes += next_live_bytes;
5505 // Print a line for this particular region.
5506 _out->print_cr(G1PPRL_LINE_PREFIX
5507 G1PPRL_TYPE_FORMAT
5508 G1PPRL_ADDR_BASE_FORMAT
5509 G1PPRL_BYTE_FORMAT
5510 G1PPRL_BYTE_FORMAT
5511 G1PPRL_BYTE_FORMAT
5512 G1PPRL_DOUBLE_FORMAT,
5513 type, bottom, end,
5514 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
5516 return false;
5517 }
5519 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
5520 // Print the footer of the output.
5521 _out->print_cr(G1PPRL_LINE_PREFIX);
5522 _out->print_cr(G1PPRL_LINE_PREFIX
5523 " SUMMARY"
5524 G1PPRL_SUM_MB_FORMAT("capacity")
5525 G1PPRL_SUM_MB_PERC_FORMAT("used")
5526 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
5527 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
5528 bytes_to_mb(_total_capacity_bytes),
5529 bytes_to_mb(_total_used_bytes),
5530 perc(_total_used_bytes, _total_capacity_bytes),
5531 bytes_to_mb(_total_prev_live_bytes),
5532 perc(_total_prev_live_bytes, _total_capacity_bytes),
5533 bytes_to_mb(_total_next_live_bytes),
5534 perc(_total_next_live_bytes, _total_capacity_bytes));
5535 _out->cr();
5536 }