Tue, 02 Sep 2014 15:04:00 +0200
8055919: Remove dead code in G1 concurrent marking code
Reviewed-by: jmasa, jwilhelm
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "code/codeCache.hpp"
28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
33 #include "gc_implementation/g1/g1Log.hpp"
34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
35 #include "gc_implementation/g1/g1RemSet.hpp"
36 #include "gc_implementation/g1/heapRegion.inline.hpp"
37 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
38 #include "gc_implementation/g1/heapRegionRemSet.hpp"
39 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
40 #include "gc_implementation/shared/vmGCOperations.hpp"
41 #include "gc_implementation/shared/gcTimer.hpp"
42 #include "gc_implementation/shared/gcTrace.hpp"
43 #include "gc_implementation/shared/gcTraceTime.hpp"
44 #include "memory/allocation.hpp"
45 #include "memory/genOopClosures.inline.hpp"
46 #include "memory/referencePolicy.hpp"
47 #include "memory/resourceArea.hpp"
48 #include "oops/oop.inline.hpp"
49 #include "runtime/handles.inline.hpp"
50 #include "runtime/java.hpp"
51 #include "runtime/prefetch.inline.hpp"
52 #include "services/memTracker.hpp"
54 // Concurrent marking bit map wrapper
56 CMBitMapRO::CMBitMapRO(int shifter) :
57 _bm(),
58 _shifter(shifter) {
59 _bmStartWord = 0;
60 _bmWordSize = 0;
61 }
63 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
64 const HeapWord* limit) const {
65 // First we must round addr *up* to a possible object boundary.
66 addr = (HeapWord*)align_size_up((intptr_t)addr,
67 HeapWordSize << _shifter);
68 size_t addrOffset = heapWordToOffset(addr);
69 if (limit == NULL) {
70 limit = _bmStartWord + _bmWordSize;
71 }
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
82 const HeapWord* limit) const {
83 size_t addrOffset = heapWordToOffset(addr);
84 if (limit == NULL) {
85 limit = _bmStartWord + _bmWordSize;
86 }
87 size_t limitOffset = heapWordToOffset(limit);
88 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
89 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
90 assert(nextAddr >= addr, "get_next_one postcondition");
91 assert(nextAddr == limit || !isMarked(nextAddr),
92 "get_next_one postcondition");
93 return nextAddr;
94 }
96 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
97 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
98 return (int) (diff >> _shifter);
99 }
101 #ifndef PRODUCT
102 bool CMBitMapRO::covers(MemRegion heap_rs) const {
103 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
104 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
105 "size inconsistency");
106 return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
107 _bmWordSize == heap_rs.word_size();
108 }
109 #endif
111 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
112 _bm.print_on_error(st, prefix);
113 }
115 size_t CMBitMap::compute_size(size_t heap_size) {
116 return heap_size / mark_distance();
117 }
119 size_t CMBitMap::mark_distance() {
120 return MinObjAlignmentInBytes * BitsPerByte;
121 }
123 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
124 _bmStartWord = heap.start();
125 _bmWordSize = heap.word_size();
127 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
128 _bm.set_size(_bmWordSize >> _shifter);
130 storage->set_mapping_changed_listener(&_listener);
131 }
133 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) {
134 // We need to clear the bitmap on commit, removing any existing information.
135 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
136 _bm->clearRange(mr);
137 }
139 // Closure used for clearing the given mark bitmap.
140 class ClearBitmapHRClosure : public HeapRegionClosure {
141 private:
142 ConcurrentMark* _cm;
143 CMBitMap* _bitmap;
144 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration.
145 public:
146 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
147 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
148 }
150 virtual bool doHeapRegion(HeapRegion* r) {
151 size_t const chunk_size_in_words = M / HeapWordSize;
153 HeapWord* cur = r->bottom();
154 HeapWord* const end = r->end();
156 while (cur < end) {
157 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
158 _bitmap->clearRange(mr);
160 cur += chunk_size_in_words;
162 // Abort iteration if after yielding the marking has been aborted.
163 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
164 return true;
165 }
166 // Repeat the asserts from before the start of the closure. We will do them
167 // as asserts here to minimize their overhead on the product. However, we
168 // will have them as guarantees at the beginning / end of the bitmap
169 // clearing to get some checking in the product.
170 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
171 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
172 }
174 return false;
175 }
176 };
178 void CMBitMap::clearAll() {
179 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
180 G1CollectedHeap::heap()->heap_region_iterate(&cl);
181 guarantee(cl.complete(), "Must have completed iteration.");
182 return;
183 }
185 void CMBitMap::markRange(MemRegion mr) {
186 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
187 assert(!mr.is_empty(), "unexpected empty region");
188 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
189 ((HeapWord *) mr.end())),
190 "markRange memory region end is not card aligned");
191 // convert address range into offset range
192 _bm.at_put_range(heapWordToOffset(mr.start()),
193 heapWordToOffset(mr.end()), true);
194 }
196 void CMBitMap::clearRange(MemRegion mr) {
197 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
198 assert(!mr.is_empty(), "unexpected empty region");
199 // convert address range into offset range
200 _bm.at_put_range(heapWordToOffset(mr.start()),
201 heapWordToOffset(mr.end()), false);
202 }
204 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
205 HeapWord* end_addr) {
206 HeapWord* start = getNextMarkedWordAddress(addr);
207 start = MIN2(start, end_addr);
208 HeapWord* end = getNextUnmarkedWordAddress(start);
209 end = MIN2(end, end_addr);
210 assert(start <= end, "Consistency check");
211 MemRegion mr(start, end);
212 if (!mr.is_empty()) {
213 clearRange(mr);
214 }
215 return mr;
216 }
218 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
219 _base(NULL), _cm(cm)
220 #ifdef ASSERT
221 , _drain_in_progress(false)
222 , _drain_in_progress_yields(false)
223 #endif
224 {}
226 bool CMMarkStack::allocate(size_t capacity) {
227 // allocate a stack of the requisite depth
228 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
229 if (!rs.is_reserved()) {
230 warning("ConcurrentMark MarkStack allocation failure");
231 return false;
232 }
233 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
234 if (!_virtual_space.initialize(rs, rs.size())) {
235 warning("ConcurrentMark MarkStack backing store failure");
236 // Release the virtual memory reserved for the marking stack
237 rs.release();
238 return false;
239 }
240 assert(_virtual_space.committed_size() == rs.size(),
241 "Didn't reserve backing store for all of ConcurrentMark stack?");
242 _base = (oop*) _virtual_space.low();
243 setEmpty();
244 _capacity = (jint) capacity;
245 _saved_index = -1;
246 _should_expand = false;
247 NOT_PRODUCT(_max_depth = 0);
248 return true;
249 }
251 void CMMarkStack::expand() {
252 // Called, during remark, if we've overflown the marking stack during marking.
253 assert(isEmpty(), "stack should been emptied while handling overflow");
254 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
255 // Clear expansion flag
256 _should_expand = false;
257 if (_capacity == (jint) MarkStackSizeMax) {
258 if (PrintGCDetails && Verbose) {
259 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
260 }
261 return;
262 }
263 // Double capacity if possible
264 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
265 // Do not give up existing stack until we have managed to
266 // get the double capacity that we desired.
267 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
268 sizeof(oop)));
269 if (rs.is_reserved()) {
270 // Release the backing store associated with old stack
271 _virtual_space.release();
272 // Reinitialize virtual space for new stack
273 if (!_virtual_space.initialize(rs, rs.size())) {
274 fatal("Not enough swap for expanded marking stack capacity");
275 }
276 _base = (oop*)(_virtual_space.low());
277 _index = 0;
278 _capacity = new_capacity;
279 } else {
280 if (PrintGCDetails && Verbose) {
281 // Failed to double capacity, continue;
282 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
283 SIZE_FORMAT"K to " SIZE_FORMAT"K",
284 _capacity / K, new_capacity / K);
285 }
286 }
287 }
289 void CMMarkStack::set_should_expand() {
290 // If we're resetting the marking state because of an
291 // marking stack overflow, record that we should, if
292 // possible, expand the stack.
293 _should_expand = _cm->has_overflown();
294 }
296 CMMarkStack::~CMMarkStack() {
297 if (_base != NULL) {
298 _base = NULL;
299 _virtual_space.release();
300 }
301 }
303 void CMMarkStack::par_push(oop ptr) {
304 while (true) {
305 if (isFull()) {
306 _overflow = true;
307 return;
308 }
309 // Otherwise...
310 jint index = _index;
311 jint next_index = index+1;
312 jint res = Atomic::cmpxchg(next_index, &_index, index);
313 if (res == index) {
314 _base[index] = ptr;
315 // Note that we don't maintain this atomically. We could, but it
316 // doesn't seem necessary.
317 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
318 return;
319 }
320 // Otherwise, we need to try again.
321 }
322 }
324 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
325 while (true) {
326 if (isFull()) {
327 _overflow = true;
328 return;
329 }
330 // Otherwise...
331 jint index = _index;
332 jint next_index = index + n;
333 if (next_index > _capacity) {
334 _overflow = true;
335 return;
336 }
337 jint res = Atomic::cmpxchg(next_index, &_index, index);
338 if (res == index) {
339 for (int i = 0; i < n; i++) {
340 int ind = index + i;
341 assert(ind < _capacity, "By overflow test above.");
342 _base[ind] = ptr_arr[i];
343 }
344 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
345 return;
346 }
347 // Otherwise, we need to try again.
348 }
349 }
351 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
352 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
353 jint start = _index;
354 jint next_index = start + n;
355 if (next_index > _capacity) {
356 _overflow = true;
357 return;
358 }
359 // Otherwise.
360 _index = next_index;
361 for (int i = 0; i < n; i++) {
362 int ind = start + i;
363 assert(ind < _capacity, "By overflow test above.");
364 _base[ind] = ptr_arr[i];
365 }
366 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
367 }
369 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
370 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
371 jint index = _index;
372 if (index == 0) {
373 *n = 0;
374 return false;
375 } else {
376 int k = MIN2(max, index);
377 jint new_ind = index - k;
378 for (int j = 0; j < k; j++) {
379 ptr_arr[j] = _base[new_ind + j];
380 }
381 _index = new_ind;
382 *n = k;
383 return true;
384 }
385 }
387 template<class OopClosureClass>
388 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
389 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
390 || SafepointSynchronize::is_at_safepoint(),
391 "Drain recursion must be yield-safe.");
392 bool res = true;
393 debug_only(_drain_in_progress = true);
394 debug_only(_drain_in_progress_yields = yield_after);
395 while (!isEmpty()) {
396 oop newOop = pop();
397 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
398 assert(newOop->is_oop(), "Expected an oop");
399 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
400 "only grey objects on this stack");
401 newOop->oop_iterate(cl);
402 if (yield_after && _cm->do_yield_check()) {
403 res = false;
404 break;
405 }
406 }
407 debug_only(_drain_in_progress = false);
408 return res;
409 }
411 void CMMarkStack::note_start_of_gc() {
412 assert(_saved_index == -1,
413 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
414 _saved_index = _index;
415 }
417 void CMMarkStack::note_end_of_gc() {
418 // This is intentionally a guarantee, instead of an assert. If we
419 // accidentally add something to the mark stack during GC, it
420 // will be a correctness issue so it's better if we crash. we'll
421 // only check this once per GC anyway, so it won't be a performance
422 // issue in any way.
423 guarantee(_saved_index == _index,
424 err_msg("saved index: %d index: %d", _saved_index, _index));
425 _saved_index = -1;
426 }
428 void CMMarkStack::oops_do(OopClosure* f) {
429 assert(_saved_index == _index,
430 err_msg("saved index: %d index: %d", _saved_index, _index));
431 for (int i = 0; i < _index; i += 1) {
432 f->do_oop(&_base[i]);
433 }
434 }
436 CMRootRegions::CMRootRegions() :
437 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
438 _should_abort(false), _next_survivor(NULL) { }
440 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
441 _young_list = g1h->young_list();
442 _cm = cm;
443 }
445 void CMRootRegions::prepare_for_scan() {
446 assert(!scan_in_progress(), "pre-condition");
448 // Currently, only survivors can be root regions.
449 assert(_next_survivor == NULL, "pre-condition");
450 _next_survivor = _young_list->first_survivor_region();
451 _scan_in_progress = (_next_survivor != NULL);
452 _should_abort = false;
453 }
455 HeapRegion* CMRootRegions::claim_next() {
456 if (_should_abort) {
457 // If someone has set the should_abort flag, we return NULL to
458 // force the caller to bail out of their loop.
459 return NULL;
460 }
462 // Currently, only survivors can be root regions.
463 HeapRegion* res = _next_survivor;
464 if (res != NULL) {
465 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
466 // Read it again in case it changed while we were waiting for the lock.
467 res = _next_survivor;
468 if (res != NULL) {
469 if (res == _young_list->last_survivor_region()) {
470 // We just claimed the last survivor so store NULL to indicate
471 // that we're done.
472 _next_survivor = NULL;
473 } else {
474 _next_survivor = res->get_next_young_region();
475 }
476 } else {
477 // Someone else claimed the last survivor while we were trying
478 // to take the lock so nothing else to do.
479 }
480 }
481 assert(res == NULL || res->is_survivor(), "post-condition");
483 return res;
484 }
486 void CMRootRegions::scan_finished() {
487 assert(scan_in_progress(), "pre-condition");
489 // Currently, only survivors can be root regions.
490 if (!_should_abort) {
491 assert(_next_survivor == NULL, "we should have claimed all survivors");
492 }
493 _next_survivor = NULL;
495 {
496 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
497 _scan_in_progress = false;
498 RootRegionScan_lock->notify_all();
499 }
500 }
502 bool CMRootRegions::wait_until_scan_finished() {
503 if (!scan_in_progress()) return false;
505 {
506 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
507 while (scan_in_progress()) {
508 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
509 }
510 }
511 return true;
512 }
514 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
515 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
516 #endif // _MSC_VER
518 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
519 return MAX2((n_par_threads + 2) / 4, 1U);
520 }
522 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
523 _g1h(g1h),
524 _markBitMap1(),
525 _markBitMap2(),
526 _parallel_marking_threads(0),
527 _max_parallel_marking_threads(0),
528 _sleep_factor(0.0),
529 _marking_task_overhead(1.0),
530 _cleanup_sleep_factor(0.0),
531 _cleanup_task_overhead(1.0),
532 _cleanup_list("Cleanup List"),
533 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
534 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
535 CardTableModRefBS::card_shift,
536 false /* in_resource_area*/),
538 _prevMarkBitMap(&_markBitMap1),
539 _nextMarkBitMap(&_markBitMap2),
541 _markStack(this),
542 // _finger set in set_non_marking_state
544 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
545 // _active_tasks set in set_non_marking_state
546 // _tasks set inside the constructor
547 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
548 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
550 _has_overflown(false),
551 _concurrent(false),
552 _has_aborted(false),
553 _aborted_gc_id(GCId::undefined()),
554 _restart_for_overflow(false),
555 _concurrent_marking_in_progress(false),
557 // _verbose_level set below
559 _init_times(),
560 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
561 _cleanup_times(),
562 _total_counting_time(0.0),
563 _total_rs_scrub_time(0.0),
565 _parallel_workers(NULL),
567 _count_card_bitmaps(NULL),
568 _count_marked_bytes(NULL),
569 _completed_initialization(false) {
570 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
571 if (verbose_level < no_verbose) {
572 verbose_level = no_verbose;
573 }
574 if (verbose_level > high_verbose) {
575 verbose_level = high_verbose;
576 }
577 _verbose_level = verbose_level;
579 if (verbose_low()) {
580 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
581 "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
582 }
584 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
585 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
587 // Create & start a ConcurrentMark thread.
588 _cmThread = new ConcurrentMarkThread(this);
589 assert(cmThread() != NULL, "CM Thread should have been created");
590 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
591 if (_cmThread->osthread() == NULL) {
592 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
593 }
595 assert(CGC_lock != NULL, "Where's the CGC_lock?");
596 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
597 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
599 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
600 satb_qs.set_buffer_size(G1SATBBufferSize);
602 _root_regions.init(_g1h, this);
604 if (ConcGCThreads > ParallelGCThreads) {
605 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
606 "than ParallelGCThreads (" UINTX_FORMAT ").",
607 ConcGCThreads, ParallelGCThreads);
608 return;
609 }
610 if (ParallelGCThreads == 0) {
611 // if we are not running with any parallel GC threads we will not
612 // spawn any marking threads either
613 _parallel_marking_threads = 0;
614 _max_parallel_marking_threads = 0;
615 _sleep_factor = 0.0;
616 _marking_task_overhead = 1.0;
617 } else {
618 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
619 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
620 // if both are set
621 _sleep_factor = 0.0;
622 _marking_task_overhead = 1.0;
623 } else if (G1MarkingOverheadPercent > 0) {
624 // We will calculate the number of parallel marking threads based
625 // on a target overhead with respect to the soft real-time goal
626 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
627 double overall_cm_overhead =
628 (double) MaxGCPauseMillis * marking_overhead /
629 (double) GCPauseIntervalMillis;
630 double cpu_ratio = 1.0 / (double) os::processor_count();
631 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
632 double marking_task_overhead =
633 overall_cm_overhead / marking_thread_num *
634 (double) os::processor_count();
635 double sleep_factor =
636 (1.0 - marking_task_overhead) / marking_task_overhead;
638 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
639 _sleep_factor = sleep_factor;
640 _marking_task_overhead = marking_task_overhead;
641 } else {
642 // Calculate the number of parallel marking threads by scaling
643 // the number of parallel GC threads.
644 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
645 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
646 _sleep_factor = 0.0;
647 _marking_task_overhead = 1.0;
648 }
650 assert(ConcGCThreads > 0, "Should have been set");
651 _parallel_marking_threads = (uint) ConcGCThreads;
652 _max_parallel_marking_threads = _parallel_marking_threads;
654 if (parallel_marking_threads() > 1) {
655 _cleanup_task_overhead = 1.0;
656 } else {
657 _cleanup_task_overhead = marking_task_overhead();
658 }
659 _cleanup_sleep_factor =
660 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
662 #if 0
663 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
664 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
665 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
666 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
667 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
668 #endif
670 guarantee(parallel_marking_threads() > 0, "peace of mind");
671 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
672 _max_parallel_marking_threads, false, true);
673 if (_parallel_workers == NULL) {
674 vm_exit_during_initialization("Failed necessary allocation.");
675 } else {
676 _parallel_workers->initialize_workers();
677 }
678 }
680 if (FLAG_IS_DEFAULT(MarkStackSize)) {
681 uintx mark_stack_size =
682 MIN2(MarkStackSizeMax,
683 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
684 // Verify that the calculated value for MarkStackSize is in range.
685 // It would be nice to use the private utility routine from Arguments.
686 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
687 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
688 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
689 mark_stack_size, (uintx) 1, MarkStackSizeMax);
690 return;
691 }
692 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
693 } else {
694 // Verify MarkStackSize is in range.
695 if (FLAG_IS_CMDLINE(MarkStackSize)) {
696 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
697 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
698 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
699 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
700 MarkStackSize, (uintx) 1, MarkStackSizeMax);
701 return;
702 }
703 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
704 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
705 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
706 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
707 MarkStackSize, MarkStackSizeMax);
708 return;
709 }
710 }
711 }
712 }
714 if (!_markStack.allocate(MarkStackSize)) {
715 warning("Failed to allocate CM marking stack");
716 return;
717 }
719 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
720 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
722 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
723 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
725 BitMap::idx_t card_bm_size = _card_bm.size();
727 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
728 _active_tasks = _max_worker_id;
730 size_t max_regions = (size_t) _g1h->max_regions();
731 for (uint i = 0; i < _max_worker_id; ++i) {
732 CMTaskQueue* task_queue = new CMTaskQueue();
733 task_queue->initialize();
734 _task_queues->register_queue(i, task_queue);
736 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
737 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
739 _tasks[i] = new CMTask(i, this,
740 _count_marked_bytes[i],
741 &_count_card_bitmaps[i],
742 task_queue, _task_queues);
744 _accum_task_vtime[i] = 0.0;
745 }
747 // Calculate the card number for the bottom of the heap. Used
748 // in biasing indexes into the accounting card bitmaps.
749 _heap_bottom_card_num =
750 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
751 CardTableModRefBS::card_shift);
753 // Clear all the liveness counting data
754 clear_all_count_data();
756 // so that the call below can read a sensible value
757 _heap_start = g1h->reserved_region().start();
758 set_non_marking_state();
759 _completed_initialization = true;
760 }
762 void ConcurrentMark::reset() {
763 // Starting values for these two. This should be called in a STW
764 // phase.
765 MemRegion reserved = _g1h->g1_reserved();
766 _heap_start = reserved.start();
767 _heap_end = reserved.end();
769 // Separated the asserts so that we know which one fires.
770 assert(_heap_start != NULL, "heap bounds should look ok");
771 assert(_heap_end != NULL, "heap bounds should look ok");
772 assert(_heap_start < _heap_end, "heap bounds should look ok");
774 // Reset all the marking data structures and any necessary flags
775 reset_marking_state();
777 if (verbose_low()) {
778 gclog_or_tty->print_cr("[global] resetting");
779 }
781 // We do reset all of them, since different phases will use
782 // different number of active threads. So, it's easiest to have all
783 // of them ready.
784 for (uint i = 0; i < _max_worker_id; ++i) {
785 _tasks[i]->reset(_nextMarkBitMap);
786 }
788 // we need this to make sure that the flag is on during the evac
789 // pause with initial mark piggy-backed
790 set_concurrent_marking_in_progress();
791 }
794 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
795 _markStack.set_should_expand();
796 _markStack.setEmpty(); // Also clears the _markStack overflow flag
797 if (clear_overflow) {
798 clear_has_overflown();
799 } else {
800 assert(has_overflown(), "pre-condition");
801 }
802 _finger = _heap_start;
804 for (uint i = 0; i < _max_worker_id; ++i) {
805 CMTaskQueue* queue = _task_queues->queue(i);
806 queue->set_empty();
807 }
808 }
810 void ConcurrentMark::set_concurrency(uint active_tasks) {
811 assert(active_tasks <= _max_worker_id, "we should not have more");
813 _active_tasks = active_tasks;
814 // Need to update the three data structures below according to the
815 // number of active threads for this phase.
816 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
817 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
818 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
819 }
821 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
822 set_concurrency(active_tasks);
824 _concurrent = concurrent;
825 // We propagate this to all tasks, not just the active ones.
826 for (uint i = 0; i < _max_worker_id; ++i)
827 _tasks[i]->set_concurrent(concurrent);
829 if (concurrent) {
830 set_concurrent_marking_in_progress();
831 } else {
832 // We currently assume that the concurrent flag has been set to
833 // false before we start remark. At this point we should also be
834 // in a STW phase.
835 assert(!concurrent_marking_in_progress(), "invariant");
836 assert(out_of_regions(),
837 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
838 p2i(_finger), p2i(_heap_end)));
839 }
840 }
842 void ConcurrentMark::set_non_marking_state() {
843 // We set the global marking state to some default values when we're
844 // not doing marking.
845 reset_marking_state();
846 _active_tasks = 0;
847 clear_concurrent_marking_in_progress();
848 }
850 ConcurrentMark::~ConcurrentMark() {
851 // The ConcurrentMark instance is never freed.
852 ShouldNotReachHere();
853 }
855 void ConcurrentMark::clearNextBitmap() {
856 G1CollectedHeap* g1h = G1CollectedHeap::heap();
858 // Make sure that the concurrent mark thread looks to still be in
859 // the current cycle.
860 guarantee(cmThread()->during_cycle(), "invariant");
862 // We are finishing up the current cycle by clearing the next
863 // marking bitmap and getting it ready for the next cycle. During
864 // this time no other cycle can start. So, let's make sure that this
865 // is the case.
866 guarantee(!g1h->mark_in_progress(), "invariant");
868 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
869 g1h->heap_region_iterate(&cl);
871 // Clear the liveness counting data. If the marking has been aborted, the abort()
872 // call already did that.
873 if (cl.complete()) {
874 clear_all_count_data();
875 }
877 // Repeat the asserts from above.
878 guarantee(cmThread()->during_cycle(), "invariant");
879 guarantee(!g1h->mark_in_progress(), "invariant");
880 }
882 class CheckBitmapClearHRClosure : public HeapRegionClosure {
883 CMBitMap* _bitmap;
884 bool _error;
885 public:
886 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
887 }
889 virtual bool doHeapRegion(HeapRegion* r) {
890 return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end();
891 }
892 };
894 bool ConcurrentMark::nextMarkBitmapIsClear() {
895 CheckBitmapClearHRClosure cl(_nextMarkBitMap);
896 _g1h->heap_region_iterate(&cl);
897 return cl.complete();
898 }
900 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
901 public:
902 bool doHeapRegion(HeapRegion* r) {
903 if (!r->continuesHumongous()) {
904 r->note_start_of_marking();
905 }
906 return false;
907 }
908 };
910 void ConcurrentMark::checkpointRootsInitialPre() {
911 G1CollectedHeap* g1h = G1CollectedHeap::heap();
912 G1CollectorPolicy* g1p = g1h->g1_policy();
914 _has_aborted = false;
916 #ifndef PRODUCT
917 if (G1PrintReachableAtInitialMark) {
918 print_reachable("at-cycle-start",
919 VerifyOption_G1UsePrevMarking, true /* all */);
920 }
921 #endif
923 // Initialise marking structures. This has to be done in a STW phase.
924 reset();
926 // For each region note start of marking.
927 NoteStartOfMarkHRClosure startcl;
928 g1h->heap_region_iterate(&startcl);
929 }
932 void ConcurrentMark::checkpointRootsInitialPost() {
933 G1CollectedHeap* g1h = G1CollectedHeap::heap();
935 // If we force an overflow during remark, the remark operation will
936 // actually abort and we'll restart concurrent marking. If we always
937 // force an oveflow during remark we'll never actually complete the
938 // marking phase. So, we initilize this here, at the start of the
939 // cycle, so that at the remaining overflow number will decrease at
940 // every remark and we'll eventually not need to cause one.
941 force_overflow_stw()->init();
943 // Start Concurrent Marking weak-reference discovery.
944 ReferenceProcessor* rp = g1h->ref_processor_cm();
945 // enable ("weak") refs discovery
946 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
947 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
949 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
950 // This is the start of the marking cycle, we're expected all
951 // threads to have SATB queues with active set to false.
952 satb_mq_set.set_active_all_threads(true, /* new active value */
953 false /* expected_active */);
955 _root_regions.prepare_for_scan();
957 // update_g1_committed() will be called at the end of an evac pause
958 // when marking is on. So, it's also called at the end of the
959 // initial-mark pause to update the heap end, if the heap expands
960 // during it. No need to call it here.
961 }
963 /*
964 * Notice that in the next two methods, we actually leave the STS
965 * during the barrier sync and join it immediately afterwards. If we
966 * do not do this, the following deadlock can occur: one thread could
967 * be in the barrier sync code, waiting for the other thread to also
968 * sync up, whereas another one could be trying to yield, while also
969 * waiting for the other threads to sync up too.
970 *
971 * Note, however, that this code is also used during remark and in
972 * this case we should not attempt to leave / enter the STS, otherwise
973 * we'll either hit an asseert (debug / fastdebug) or deadlock
974 * (product). So we should only leave / enter the STS if we are
975 * operating concurrently.
976 *
977 * Because the thread that does the sync barrier has left the STS, it
978 * is possible to be suspended for a Full GC or an evacuation pause
979 * could occur. This is actually safe, since the entering the sync
980 * barrier is one of the last things do_marking_step() does, and it
981 * doesn't manipulate any data structures afterwards.
982 */
984 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
985 if (verbose_low()) {
986 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
987 }
989 if (concurrent()) {
990 SuspendibleThreadSet::leave();
991 }
993 bool barrier_aborted = !_first_overflow_barrier_sync.enter();
995 if (concurrent()) {
996 SuspendibleThreadSet::join();
997 }
998 // at this point everyone should have synced up and not be doing any
999 // more work
1001 if (verbose_low()) {
1002 if (barrier_aborted) {
1003 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1004 } else {
1005 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1006 }
1007 }
1009 if (barrier_aborted) {
1010 // If the barrier aborted we ignore the overflow condition and
1011 // just abort the whole marking phase as quickly as possible.
1012 return;
1013 }
1015 // If we're executing the concurrent phase of marking, reset the marking
1016 // state; otherwise the marking state is reset after reference processing,
1017 // during the remark pause.
1018 // If we reset here as a result of an overflow during the remark we will
1019 // see assertion failures from any subsequent set_concurrency_and_phase()
1020 // calls.
1021 if (concurrent()) {
1022 // let the task associated with with worker 0 do this
1023 if (worker_id == 0) {
1024 // task 0 is responsible for clearing the global data structures
1025 // We should be here because of an overflow. During STW we should
1026 // not clear the overflow flag since we rely on it being true when
1027 // we exit this method to abort the pause and restart concurent
1028 // marking.
1029 reset_marking_state(true /* clear_overflow */);
1030 force_overflow()->update();
1032 if (G1Log::fine()) {
1033 gclog_or_tty->gclog_stamp(concurrent_gc_id());
1034 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1035 }
1036 }
1037 }
1039 // after this, each task should reset its own data structures then
1040 // then go into the second barrier
1041 }
1043 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1044 if (verbose_low()) {
1045 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1046 }
1048 if (concurrent()) {
1049 SuspendibleThreadSet::leave();
1050 }
1052 bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1054 if (concurrent()) {
1055 SuspendibleThreadSet::join();
1056 }
1057 // at this point everything should be re-initialized and ready to go
1059 if (verbose_low()) {
1060 if (barrier_aborted) {
1061 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1062 } else {
1063 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1064 }
1065 }
1066 }
1068 #ifndef PRODUCT
1069 void ForceOverflowSettings::init() {
1070 _num_remaining = G1ConcMarkForceOverflow;
1071 _force = false;
1072 update();
1073 }
1075 void ForceOverflowSettings::update() {
1076 if (_num_remaining > 0) {
1077 _num_remaining -= 1;
1078 _force = true;
1079 } else {
1080 _force = false;
1081 }
1082 }
1084 bool ForceOverflowSettings::should_force() {
1085 if (_force) {
1086 _force = false;
1087 return true;
1088 } else {
1089 return false;
1090 }
1091 }
1092 #endif // !PRODUCT
1094 class CMConcurrentMarkingTask: public AbstractGangTask {
1095 private:
1096 ConcurrentMark* _cm;
1097 ConcurrentMarkThread* _cmt;
1099 public:
1100 void work(uint worker_id) {
1101 assert(Thread::current()->is_ConcurrentGC_thread(),
1102 "this should only be done by a conc GC thread");
1103 ResourceMark rm;
1105 double start_vtime = os::elapsedVTime();
1107 SuspendibleThreadSet::join();
1109 assert(worker_id < _cm->active_tasks(), "invariant");
1110 CMTask* the_task = _cm->task(worker_id);
1111 the_task->record_start_time();
1112 if (!_cm->has_aborted()) {
1113 do {
1114 double start_vtime_sec = os::elapsedVTime();
1115 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1117 the_task->do_marking_step(mark_step_duration_ms,
1118 true /* do_termination */,
1119 false /* is_serial*/);
1121 double end_vtime_sec = os::elapsedVTime();
1122 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1123 _cm->clear_has_overflown();
1125 _cm->do_yield_check(worker_id);
1127 jlong sleep_time_ms;
1128 if (!_cm->has_aborted() && the_task->has_aborted()) {
1129 sleep_time_ms =
1130 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1131 SuspendibleThreadSet::leave();
1132 os::sleep(Thread::current(), sleep_time_ms, false);
1133 SuspendibleThreadSet::join();
1134 }
1135 } while (!_cm->has_aborted() && the_task->has_aborted());
1136 }
1137 the_task->record_end_time();
1138 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1140 SuspendibleThreadSet::leave();
1142 double end_vtime = os::elapsedVTime();
1143 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1144 }
1146 CMConcurrentMarkingTask(ConcurrentMark* cm,
1147 ConcurrentMarkThread* cmt) :
1148 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1150 ~CMConcurrentMarkingTask() { }
1151 };
1153 // Calculates the number of active workers for a concurrent
1154 // phase.
1155 uint ConcurrentMark::calc_parallel_marking_threads() {
1156 if (G1CollectedHeap::use_parallel_gc_threads()) {
1157 uint n_conc_workers = 0;
1158 if (!UseDynamicNumberOfGCThreads ||
1159 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1160 !ForceDynamicNumberOfGCThreads)) {
1161 n_conc_workers = max_parallel_marking_threads();
1162 } else {
1163 n_conc_workers =
1164 AdaptiveSizePolicy::calc_default_active_workers(
1165 max_parallel_marking_threads(),
1166 1, /* Minimum workers */
1167 parallel_marking_threads(),
1168 Threads::number_of_non_daemon_threads());
1169 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1170 // that scaling has already gone into "_max_parallel_marking_threads".
1171 }
1172 assert(n_conc_workers > 0, "Always need at least 1");
1173 return n_conc_workers;
1174 }
1175 // If we are not running with any parallel GC threads we will not
1176 // have spawned any marking threads either. Hence the number of
1177 // concurrent workers should be 0.
1178 return 0;
1179 }
1181 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1182 // Currently, only survivors can be root regions.
1183 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1184 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1186 const uintx interval = PrefetchScanIntervalInBytes;
1187 HeapWord* curr = hr->bottom();
1188 const HeapWord* end = hr->top();
1189 while (curr < end) {
1190 Prefetch::read(curr, interval);
1191 oop obj = oop(curr);
1192 int size = obj->oop_iterate(&cl);
1193 assert(size == obj->size(), "sanity");
1194 curr += size;
1195 }
1196 }
1198 class CMRootRegionScanTask : public AbstractGangTask {
1199 private:
1200 ConcurrentMark* _cm;
1202 public:
1203 CMRootRegionScanTask(ConcurrentMark* cm) :
1204 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1206 void work(uint worker_id) {
1207 assert(Thread::current()->is_ConcurrentGC_thread(),
1208 "this should only be done by a conc GC thread");
1210 CMRootRegions* root_regions = _cm->root_regions();
1211 HeapRegion* hr = root_regions->claim_next();
1212 while (hr != NULL) {
1213 _cm->scanRootRegion(hr, worker_id);
1214 hr = root_regions->claim_next();
1215 }
1216 }
1217 };
1219 void ConcurrentMark::scanRootRegions() {
1220 // Start of concurrent marking.
1221 ClassLoaderDataGraph::clear_claimed_marks();
1223 // scan_in_progress() will have been set to true only if there was
1224 // at least one root region to scan. So, if it's false, we
1225 // should not attempt to do any further work.
1226 if (root_regions()->scan_in_progress()) {
1227 _parallel_marking_threads = calc_parallel_marking_threads();
1228 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1229 "Maximum number of marking threads exceeded");
1230 uint active_workers = MAX2(1U, parallel_marking_threads());
1232 CMRootRegionScanTask task(this);
1233 if (use_parallel_marking_threads()) {
1234 _parallel_workers->set_active_workers((int) active_workers);
1235 _parallel_workers->run_task(&task);
1236 } else {
1237 task.work(0);
1238 }
1240 // It's possible that has_aborted() is true here without actually
1241 // aborting the survivor scan earlier. This is OK as it's
1242 // mainly used for sanity checking.
1243 root_regions()->scan_finished();
1244 }
1245 }
1247 void ConcurrentMark::markFromRoots() {
1248 // we might be tempted to assert that:
1249 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1250 // "inconsistent argument?");
1251 // However that wouldn't be right, because it's possible that
1252 // a safepoint is indeed in progress as a younger generation
1253 // stop-the-world GC happens even as we mark in this generation.
1255 _restart_for_overflow = false;
1256 force_overflow_conc()->init();
1258 // _g1h has _n_par_threads
1259 _parallel_marking_threads = calc_parallel_marking_threads();
1260 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1261 "Maximum number of marking threads exceeded");
1263 uint active_workers = MAX2(1U, parallel_marking_threads());
1265 // Parallel task terminator is set in "set_concurrency_and_phase()"
1266 set_concurrency_and_phase(active_workers, true /* concurrent */);
1268 CMConcurrentMarkingTask markingTask(this, cmThread());
1269 if (use_parallel_marking_threads()) {
1270 _parallel_workers->set_active_workers((int)active_workers);
1271 // Don't set _n_par_threads because it affects MT in process_roots()
1272 // and the decisions on that MT processing is made elsewhere.
1273 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1274 _parallel_workers->run_task(&markingTask);
1275 } else {
1276 markingTask.work(0);
1277 }
1278 print_stats();
1279 }
1281 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1282 // world is stopped at this checkpoint
1283 assert(SafepointSynchronize::is_at_safepoint(),
1284 "world should be stopped");
1286 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1288 // If a full collection has happened, we shouldn't do this.
1289 if (has_aborted()) {
1290 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1291 return;
1292 }
1294 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1296 if (VerifyDuringGC) {
1297 HandleMark hm; // handle scope
1298 Universe::heap()->prepare_for_verify();
1299 Universe::verify(VerifyOption_G1UsePrevMarking,
1300 " VerifyDuringGC:(before)");
1301 }
1302 g1h->check_bitmaps("Remark Start");
1304 G1CollectorPolicy* g1p = g1h->g1_policy();
1305 g1p->record_concurrent_mark_remark_start();
1307 double start = os::elapsedTime();
1309 checkpointRootsFinalWork();
1311 double mark_work_end = os::elapsedTime();
1313 weakRefsWork(clear_all_soft_refs);
1315 if (has_overflown()) {
1316 // Oops. We overflowed. Restart concurrent marking.
1317 _restart_for_overflow = true;
1318 if (G1TraceMarkStackOverflow) {
1319 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1320 }
1322 // Verify the heap w.r.t. the previous marking bitmap.
1323 if (VerifyDuringGC) {
1324 HandleMark hm; // handle scope
1325 Universe::heap()->prepare_for_verify();
1326 Universe::verify(VerifyOption_G1UsePrevMarking,
1327 " VerifyDuringGC:(overflow)");
1328 }
1330 // Clear the marking state because we will be restarting
1331 // marking due to overflowing the global mark stack.
1332 reset_marking_state();
1333 } else {
1334 // Aggregate the per-task counting data that we have accumulated
1335 // while marking.
1336 aggregate_count_data();
1338 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1339 // We're done with marking.
1340 // This is the end of the marking cycle, we're expected all
1341 // threads to have SATB queues with active set to true.
1342 satb_mq_set.set_active_all_threads(false, /* new active value */
1343 true /* expected_active */);
1345 if (VerifyDuringGC) {
1346 HandleMark hm; // handle scope
1347 Universe::heap()->prepare_for_verify();
1348 Universe::verify(VerifyOption_G1UseNextMarking,
1349 " VerifyDuringGC:(after)");
1350 }
1351 g1h->check_bitmaps("Remark End");
1352 assert(!restart_for_overflow(), "sanity");
1353 // Completely reset the marking state since marking completed
1354 set_non_marking_state();
1355 }
1357 // Expand the marking stack, if we have to and if we can.
1358 if (_markStack.should_expand()) {
1359 _markStack.expand();
1360 }
1362 // Statistics
1363 double now = os::elapsedTime();
1364 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1365 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1366 _remark_times.add((now - start) * 1000.0);
1368 g1p->record_concurrent_mark_remark_end();
1370 G1CMIsAliveClosure is_alive(g1h);
1371 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1372 }
1374 // Base class of the closures that finalize and verify the
1375 // liveness counting data.
1376 class CMCountDataClosureBase: public HeapRegionClosure {
1377 protected:
1378 G1CollectedHeap* _g1h;
1379 ConcurrentMark* _cm;
1380 CardTableModRefBS* _ct_bs;
1382 BitMap* _region_bm;
1383 BitMap* _card_bm;
1385 // Takes a region that's not empty (i.e., it has at least one
1386 // live object in it and sets its corresponding bit on the region
1387 // bitmap to 1. If the region is "starts humongous" it will also set
1388 // to 1 the bits on the region bitmap that correspond to its
1389 // associated "continues humongous" regions.
1390 void set_bit_for_region(HeapRegion* hr) {
1391 assert(!hr->continuesHumongous(), "should have filtered those out");
1393 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1394 if (!hr->startsHumongous()) {
1395 // Normal (non-humongous) case: just set the bit.
1396 _region_bm->par_at_put(index, true);
1397 } else {
1398 // Starts humongous case: calculate how many regions are part of
1399 // this humongous region and then set the bit range.
1400 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1401 _region_bm->par_at_put_range(index, end_index, true);
1402 }
1403 }
1405 public:
1406 CMCountDataClosureBase(G1CollectedHeap* g1h,
1407 BitMap* region_bm, BitMap* card_bm):
1408 _g1h(g1h), _cm(g1h->concurrent_mark()),
1409 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1410 _region_bm(region_bm), _card_bm(card_bm) { }
1411 };
1413 // Closure that calculates the # live objects per region. Used
1414 // for verification purposes during the cleanup pause.
1415 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1416 CMBitMapRO* _bm;
1417 size_t _region_marked_bytes;
1419 public:
1420 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1421 BitMap* region_bm, BitMap* card_bm) :
1422 CMCountDataClosureBase(g1h, region_bm, card_bm),
1423 _bm(bm), _region_marked_bytes(0) { }
1425 bool doHeapRegion(HeapRegion* hr) {
1427 if (hr->continuesHumongous()) {
1428 // We will ignore these here and process them when their
1429 // associated "starts humongous" region is processed (see
1430 // set_bit_for_heap_region()). Note that we cannot rely on their
1431 // associated "starts humongous" region to have their bit set to
1432 // 1 since, due to the region chunking in the parallel region
1433 // iteration, a "continues humongous" region might be visited
1434 // before its associated "starts humongous".
1435 return false;
1436 }
1438 HeapWord* ntams = hr->next_top_at_mark_start();
1439 HeapWord* start = hr->bottom();
1441 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1442 err_msg("Preconditions not met - "
1443 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1444 p2i(start), p2i(ntams), p2i(hr->end())));
1446 // Find the first marked object at or after "start".
1447 start = _bm->getNextMarkedWordAddress(start, ntams);
1449 size_t marked_bytes = 0;
1451 while (start < ntams) {
1452 oop obj = oop(start);
1453 int obj_sz = obj->size();
1454 HeapWord* obj_end = start + obj_sz;
1456 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1457 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1459 // Note: if we're looking at the last region in heap - obj_end
1460 // could be actually just beyond the end of the heap; end_idx
1461 // will then correspond to a (non-existent) card that is also
1462 // just beyond the heap.
1463 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1464 // end of object is not card aligned - increment to cover
1465 // all the cards spanned by the object
1466 end_idx += 1;
1467 }
1469 // Set the bits in the card BM for the cards spanned by this object.
1470 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1472 // Add the size of this object to the number of marked bytes.
1473 marked_bytes += (size_t)obj_sz * HeapWordSize;
1475 // Find the next marked object after this one.
1476 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1477 }
1479 // Mark the allocated-since-marking portion...
1480 HeapWord* top = hr->top();
1481 if (ntams < top) {
1482 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1483 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1485 // Note: if we're looking at the last region in heap - top
1486 // could be actually just beyond the end of the heap; end_idx
1487 // will then correspond to a (non-existent) card that is also
1488 // just beyond the heap.
1489 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1490 // end of object is not card aligned - increment to cover
1491 // all the cards spanned by the object
1492 end_idx += 1;
1493 }
1494 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1496 // This definitely means the region has live objects.
1497 set_bit_for_region(hr);
1498 }
1500 // Update the live region bitmap.
1501 if (marked_bytes > 0) {
1502 set_bit_for_region(hr);
1503 }
1505 // Set the marked bytes for the current region so that
1506 // it can be queried by a calling verificiation routine
1507 _region_marked_bytes = marked_bytes;
1509 return false;
1510 }
1512 size_t region_marked_bytes() const { return _region_marked_bytes; }
1513 };
1515 // Heap region closure used for verifying the counting data
1516 // that was accumulated concurrently and aggregated during
1517 // the remark pause. This closure is applied to the heap
1518 // regions during the STW cleanup pause.
1520 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1521 G1CollectedHeap* _g1h;
1522 ConcurrentMark* _cm;
1523 CalcLiveObjectsClosure _calc_cl;
1524 BitMap* _region_bm; // Region BM to be verified
1525 BitMap* _card_bm; // Card BM to be verified
1526 bool _verbose; // verbose output?
1528 BitMap* _exp_region_bm; // Expected Region BM values
1529 BitMap* _exp_card_bm; // Expected card BM values
1531 int _failures;
1533 public:
1534 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1535 BitMap* region_bm,
1536 BitMap* card_bm,
1537 BitMap* exp_region_bm,
1538 BitMap* exp_card_bm,
1539 bool verbose) :
1540 _g1h(g1h), _cm(g1h->concurrent_mark()),
1541 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1542 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1543 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1544 _failures(0) { }
1546 int failures() const { return _failures; }
1548 bool doHeapRegion(HeapRegion* hr) {
1549 if (hr->continuesHumongous()) {
1550 // We will ignore these here and process them when their
1551 // associated "starts humongous" region is processed (see
1552 // set_bit_for_heap_region()). Note that we cannot rely on their
1553 // associated "starts humongous" region to have their bit set to
1554 // 1 since, due to the region chunking in the parallel region
1555 // iteration, a "continues humongous" region might be visited
1556 // before its associated "starts humongous".
1557 return false;
1558 }
1560 int failures = 0;
1562 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1563 // this region and set the corresponding bits in the expected region
1564 // and card bitmaps.
1565 bool res = _calc_cl.doHeapRegion(hr);
1566 assert(res == false, "should be continuing");
1568 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1569 Mutex::_no_safepoint_check_flag);
1571 // Verify the marked bytes for this region.
1572 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1573 size_t act_marked_bytes = hr->next_marked_bytes();
1575 // We're not OK if expected marked bytes > actual marked bytes. It means
1576 // we have missed accounting some objects during the actual marking.
1577 if (exp_marked_bytes > act_marked_bytes) {
1578 if (_verbose) {
1579 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1580 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1581 hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1582 }
1583 failures += 1;
1584 }
1586 // Verify the bit, for this region, in the actual and expected
1587 // (which was just calculated) region bit maps.
1588 // We're not OK if the bit in the calculated expected region
1589 // bitmap is set and the bit in the actual region bitmap is not.
1590 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1592 bool expected = _exp_region_bm->at(index);
1593 bool actual = _region_bm->at(index);
1594 if (expected && !actual) {
1595 if (_verbose) {
1596 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1597 "expected: %s, actual: %s",
1598 hr->hrm_index(),
1599 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1600 }
1601 failures += 1;
1602 }
1604 // Verify that the card bit maps for the cards spanned by the current
1605 // region match. We have an error if we have a set bit in the expected
1606 // bit map and the corresponding bit in the actual bitmap is not set.
1608 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1609 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1611 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1612 expected = _exp_card_bm->at(i);
1613 actual = _card_bm->at(i);
1615 if (expected && !actual) {
1616 if (_verbose) {
1617 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1618 "expected: %s, actual: %s",
1619 hr->hrm_index(), i,
1620 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1621 }
1622 failures += 1;
1623 }
1624 }
1626 if (failures > 0 && _verbose) {
1627 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1628 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1629 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1630 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1631 }
1633 _failures += failures;
1635 // We could stop iteration over the heap when we
1636 // find the first violating region by returning true.
1637 return false;
1638 }
1639 };
1641 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1642 protected:
1643 G1CollectedHeap* _g1h;
1644 ConcurrentMark* _cm;
1645 BitMap* _actual_region_bm;
1646 BitMap* _actual_card_bm;
1648 uint _n_workers;
1650 BitMap* _expected_region_bm;
1651 BitMap* _expected_card_bm;
1653 int _failures;
1654 bool _verbose;
1656 public:
1657 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1658 BitMap* region_bm, BitMap* card_bm,
1659 BitMap* expected_region_bm, BitMap* expected_card_bm)
1660 : AbstractGangTask("G1 verify final counting"),
1661 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1662 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1663 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1664 _failures(0), _verbose(false),
1665 _n_workers(0) {
1666 assert(VerifyDuringGC, "don't call this otherwise");
1668 // Use the value already set as the number of active threads
1669 // in the call to run_task().
1670 if (G1CollectedHeap::use_parallel_gc_threads()) {
1671 assert( _g1h->workers()->active_workers() > 0,
1672 "Should have been previously set");
1673 _n_workers = _g1h->workers()->active_workers();
1674 } else {
1675 _n_workers = 1;
1676 }
1678 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1679 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1681 _verbose = _cm->verbose_medium();
1682 }
1684 void work(uint worker_id) {
1685 assert(worker_id < _n_workers, "invariant");
1687 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1688 _actual_region_bm, _actual_card_bm,
1689 _expected_region_bm,
1690 _expected_card_bm,
1691 _verbose);
1693 if (G1CollectedHeap::use_parallel_gc_threads()) {
1694 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1695 worker_id,
1696 _n_workers,
1697 HeapRegion::VerifyCountClaimValue);
1698 } else {
1699 _g1h->heap_region_iterate(&verify_cl);
1700 }
1702 Atomic::add(verify_cl.failures(), &_failures);
1703 }
1705 int failures() const { return _failures; }
1706 };
1708 // Closure that finalizes the liveness counting data.
1709 // Used during the cleanup pause.
1710 // Sets the bits corresponding to the interval [NTAMS, top]
1711 // (which contains the implicitly live objects) in the
1712 // card liveness bitmap. Also sets the bit for each region,
1713 // containing live data, in the region liveness bitmap.
1715 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1716 public:
1717 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1718 BitMap* region_bm,
1719 BitMap* card_bm) :
1720 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1722 bool doHeapRegion(HeapRegion* hr) {
1724 if (hr->continuesHumongous()) {
1725 // We will ignore these here and process them when their
1726 // associated "starts humongous" region is processed (see
1727 // set_bit_for_heap_region()). Note that we cannot rely on their
1728 // associated "starts humongous" region to have their bit set to
1729 // 1 since, due to the region chunking in the parallel region
1730 // iteration, a "continues humongous" region might be visited
1731 // before its associated "starts humongous".
1732 return false;
1733 }
1735 HeapWord* ntams = hr->next_top_at_mark_start();
1736 HeapWord* top = hr->top();
1738 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1740 // Mark the allocated-since-marking portion...
1741 if (ntams < top) {
1742 // This definitely means the region has live objects.
1743 set_bit_for_region(hr);
1745 // Now set the bits in the card bitmap for [ntams, top)
1746 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1747 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1749 // Note: if we're looking at the last region in heap - top
1750 // could be actually just beyond the end of the heap; end_idx
1751 // will then correspond to a (non-existent) card that is also
1752 // just beyond the heap.
1753 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1754 // end of object is not card aligned - increment to cover
1755 // all the cards spanned by the object
1756 end_idx += 1;
1757 }
1759 assert(end_idx <= _card_bm->size(),
1760 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1761 end_idx, _card_bm->size()));
1762 assert(start_idx < _card_bm->size(),
1763 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1764 start_idx, _card_bm->size()));
1766 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1767 }
1769 // Set the bit for the region if it contains live data
1770 if (hr->next_marked_bytes() > 0) {
1771 set_bit_for_region(hr);
1772 }
1774 return false;
1775 }
1776 };
1778 class G1ParFinalCountTask: public AbstractGangTask {
1779 protected:
1780 G1CollectedHeap* _g1h;
1781 ConcurrentMark* _cm;
1782 BitMap* _actual_region_bm;
1783 BitMap* _actual_card_bm;
1785 uint _n_workers;
1787 public:
1788 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1789 : AbstractGangTask("G1 final counting"),
1790 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1791 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1792 _n_workers(0) {
1793 // Use the value already set as the number of active threads
1794 // in the call to run_task().
1795 if (G1CollectedHeap::use_parallel_gc_threads()) {
1796 assert( _g1h->workers()->active_workers() > 0,
1797 "Should have been previously set");
1798 _n_workers = _g1h->workers()->active_workers();
1799 } else {
1800 _n_workers = 1;
1801 }
1802 }
1804 void work(uint worker_id) {
1805 assert(worker_id < _n_workers, "invariant");
1807 FinalCountDataUpdateClosure final_update_cl(_g1h,
1808 _actual_region_bm,
1809 _actual_card_bm);
1811 if (G1CollectedHeap::use_parallel_gc_threads()) {
1812 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1813 worker_id,
1814 _n_workers,
1815 HeapRegion::FinalCountClaimValue);
1816 } else {
1817 _g1h->heap_region_iterate(&final_update_cl);
1818 }
1819 }
1820 };
1822 class G1ParNoteEndTask;
1824 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1825 G1CollectedHeap* _g1;
1826 size_t _max_live_bytes;
1827 uint _regions_claimed;
1828 size_t _freed_bytes;
1829 FreeRegionList* _local_cleanup_list;
1830 HeapRegionSetCount _old_regions_removed;
1831 HeapRegionSetCount _humongous_regions_removed;
1832 HRRSCleanupTask* _hrrs_cleanup_task;
1833 double _claimed_region_time;
1834 double _max_region_time;
1836 public:
1837 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1838 FreeRegionList* local_cleanup_list,
1839 HRRSCleanupTask* hrrs_cleanup_task) :
1840 _g1(g1),
1841 _max_live_bytes(0), _regions_claimed(0),
1842 _freed_bytes(0),
1843 _claimed_region_time(0.0), _max_region_time(0.0),
1844 _local_cleanup_list(local_cleanup_list),
1845 _old_regions_removed(),
1846 _humongous_regions_removed(),
1847 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1849 size_t freed_bytes() { return _freed_bytes; }
1850 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1851 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1853 bool doHeapRegion(HeapRegion *hr) {
1854 if (hr->continuesHumongous()) {
1855 return false;
1856 }
1857 // We use a claim value of zero here because all regions
1858 // were claimed with value 1 in the FinalCount task.
1859 _g1->reset_gc_time_stamps(hr);
1860 double start = os::elapsedTime();
1861 _regions_claimed++;
1862 hr->note_end_of_marking();
1863 _max_live_bytes += hr->max_live_bytes();
1865 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1866 _freed_bytes += hr->used();
1867 hr->set_containing_set(NULL);
1868 if (hr->isHumongous()) {
1869 assert(hr->startsHumongous(), "we should only see starts humongous");
1870 _humongous_regions_removed.increment(1u, hr->capacity());
1871 _g1->free_humongous_region(hr, _local_cleanup_list, true);
1872 } else {
1873 _old_regions_removed.increment(1u, hr->capacity());
1874 _g1->free_region(hr, _local_cleanup_list, true);
1875 }
1876 } else {
1877 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1878 }
1880 double region_time = (os::elapsedTime() - start);
1881 _claimed_region_time += region_time;
1882 if (region_time > _max_region_time) {
1883 _max_region_time = region_time;
1884 }
1885 return false;
1886 }
1888 size_t max_live_bytes() { return _max_live_bytes; }
1889 uint regions_claimed() { return _regions_claimed; }
1890 double claimed_region_time_sec() { return _claimed_region_time; }
1891 double max_region_time_sec() { return _max_region_time; }
1892 };
1894 class G1ParNoteEndTask: public AbstractGangTask {
1895 friend class G1NoteEndOfConcMarkClosure;
1897 protected:
1898 G1CollectedHeap* _g1h;
1899 size_t _max_live_bytes;
1900 size_t _freed_bytes;
1901 FreeRegionList* _cleanup_list;
1903 public:
1904 G1ParNoteEndTask(G1CollectedHeap* g1h,
1905 FreeRegionList* cleanup_list) :
1906 AbstractGangTask("G1 note end"), _g1h(g1h),
1907 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1909 void work(uint worker_id) {
1910 double start = os::elapsedTime();
1911 FreeRegionList local_cleanup_list("Local Cleanup List");
1912 HRRSCleanupTask hrrs_cleanup_task;
1913 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1914 &hrrs_cleanup_task);
1915 if (G1CollectedHeap::use_parallel_gc_threads()) {
1916 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1917 _g1h->workers()->active_workers(),
1918 HeapRegion::NoteEndClaimValue);
1919 } else {
1920 _g1h->heap_region_iterate(&g1_note_end);
1921 }
1922 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1924 // Now update the lists
1925 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1926 {
1927 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1928 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1929 _max_live_bytes += g1_note_end.max_live_bytes();
1930 _freed_bytes += g1_note_end.freed_bytes();
1932 // If we iterate over the global cleanup list at the end of
1933 // cleanup to do this printing we will not guarantee to only
1934 // generate output for the newly-reclaimed regions (the list
1935 // might not be empty at the beginning of cleanup; we might
1936 // still be working on its previous contents). So we do the
1937 // printing here, before we append the new regions to the global
1938 // cleanup list.
1940 G1HRPrinter* hr_printer = _g1h->hr_printer();
1941 if (hr_printer->is_active()) {
1942 FreeRegionListIterator iter(&local_cleanup_list);
1943 while (iter.more_available()) {
1944 HeapRegion* hr = iter.get_next();
1945 hr_printer->cleanup(hr);
1946 }
1947 }
1949 _cleanup_list->add_ordered(&local_cleanup_list);
1950 assert(local_cleanup_list.is_empty(), "post-condition");
1952 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1953 }
1954 }
1955 size_t max_live_bytes() { return _max_live_bytes; }
1956 size_t freed_bytes() { return _freed_bytes; }
1957 };
1959 class G1ParScrubRemSetTask: public AbstractGangTask {
1960 protected:
1961 G1RemSet* _g1rs;
1962 BitMap* _region_bm;
1963 BitMap* _card_bm;
1964 public:
1965 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1966 BitMap* region_bm, BitMap* card_bm) :
1967 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1968 _region_bm(region_bm), _card_bm(card_bm) { }
1970 void work(uint worker_id) {
1971 if (G1CollectedHeap::use_parallel_gc_threads()) {
1972 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1973 HeapRegion::ScrubRemSetClaimValue);
1974 } else {
1975 _g1rs->scrub(_region_bm, _card_bm);
1976 }
1977 }
1979 };
1981 void ConcurrentMark::cleanup() {
1982 // world is stopped at this checkpoint
1983 assert(SafepointSynchronize::is_at_safepoint(),
1984 "world should be stopped");
1985 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1987 // If a full collection has happened, we shouldn't do this.
1988 if (has_aborted()) {
1989 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1990 return;
1991 }
1993 g1h->verify_region_sets_optional();
1995 if (VerifyDuringGC) {
1996 HandleMark hm; // handle scope
1997 Universe::heap()->prepare_for_verify();
1998 Universe::verify(VerifyOption_G1UsePrevMarking,
1999 " VerifyDuringGC:(before)");
2000 }
2001 g1h->check_bitmaps("Cleanup Start");
2003 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2004 g1p->record_concurrent_mark_cleanup_start();
2006 double start = os::elapsedTime();
2008 HeapRegionRemSet::reset_for_cleanup_tasks();
2010 uint n_workers;
2012 // Do counting once more with the world stopped for good measure.
2013 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2015 if (G1CollectedHeap::use_parallel_gc_threads()) {
2016 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2017 "sanity check");
2019 g1h->set_par_threads();
2020 n_workers = g1h->n_par_threads();
2021 assert(g1h->n_par_threads() == n_workers,
2022 "Should not have been reset");
2023 g1h->workers()->run_task(&g1_par_count_task);
2024 // Done with the parallel phase so reset to 0.
2025 g1h->set_par_threads(0);
2027 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2028 "sanity check");
2029 } else {
2030 n_workers = 1;
2031 g1_par_count_task.work(0);
2032 }
2034 if (VerifyDuringGC) {
2035 // Verify that the counting data accumulated during marking matches
2036 // that calculated by walking the marking bitmap.
2038 // Bitmaps to hold expected values
2039 BitMap expected_region_bm(_region_bm.size(), true);
2040 BitMap expected_card_bm(_card_bm.size(), true);
2042 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2043 &_region_bm,
2044 &_card_bm,
2045 &expected_region_bm,
2046 &expected_card_bm);
2048 if (G1CollectedHeap::use_parallel_gc_threads()) {
2049 g1h->set_par_threads((int)n_workers);
2050 g1h->workers()->run_task(&g1_par_verify_task);
2051 // Done with the parallel phase so reset to 0.
2052 g1h->set_par_threads(0);
2054 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2055 "sanity check");
2056 } else {
2057 g1_par_verify_task.work(0);
2058 }
2060 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2061 }
2063 size_t start_used_bytes = g1h->used();
2064 g1h->set_marking_complete();
2066 double count_end = os::elapsedTime();
2067 double this_final_counting_time = (count_end - start);
2068 _total_counting_time += this_final_counting_time;
2070 if (G1PrintRegionLivenessInfo) {
2071 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2072 _g1h->heap_region_iterate(&cl);
2073 }
2075 // Install newly created mark bitMap as "prev".
2076 swapMarkBitMaps();
2078 g1h->reset_gc_time_stamp();
2080 // Note end of marking in all heap regions.
2081 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2082 if (G1CollectedHeap::use_parallel_gc_threads()) {
2083 g1h->set_par_threads((int)n_workers);
2084 g1h->workers()->run_task(&g1_par_note_end_task);
2085 g1h->set_par_threads(0);
2087 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2088 "sanity check");
2089 } else {
2090 g1_par_note_end_task.work(0);
2091 }
2092 g1h->check_gc_time_stamps();
2094 if (!cleanup_list_is_empty()) {
2095 // The cleanup list is not empty, so we'll have to process it
2096 // concurrently. Notify anyone else that might be wanting free
2097 // regions that there will be more free regions coming soon.
2098 g1h->set_free_regions_coming();
2099 }
2101 // call below, since it affects the metric by which we sort the heap
2102 // regions.
2103 if (G1ScrubRemSets) {
2104 double rs_scrub_start = os::elapsedTime();
2105 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2106 if (G1CollectedHeap::use_parallel_gc_threads()) {
2107 g1h->set_par_threads((int)n_workers);
2108 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2109 g1h->set_par_threads(0);
2111 assert(g1h->check_heap_region_claim_values(
2112 HeapRegion::ScrubRemSetClaimValue),
2113 "sanity check");
2114 } else {
2115 g1_par_scrub_rs_task.work(0);
2116 }
2118 double rs_scrub_end = os::elapsedTime();
2119 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2120 _total_rs_scrub_time += this_rs_scrub_time;
2121 }
2123 // this will also free any regions totally full of garbage objects,
2124 // and sort the regions.
2125 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2127 // Statistics.
2128 double end = os::elapsedTime();
2129 _cleanup_times.add((end - start) * 1000.0);
2131 if (G1Log::fine()) {
2132 g1h->print_size_transition(gclog_or_tty,
2133 start_used_bytes,
2134 g1h->used(),
2135 g1h->capacity());
2136 }
2138 // Clean up will have freed any regions completely full of garbage.
2139 // Update the soft reference policy with the new heap occupancy.
2140 Universe::update_heap_info_at_gc();
2142 if (VerifyDuringGC) {
2143 HandleMark hm; // handle scope
2144 Universe::heap()->prepare_for_verify();
2145 Universe::verify(VerifyOption_G1UsePrevMarking,
2146 " VerifyDuringGC:(after)");
2147 }
2148 g1h->check_bitmaps("Cleanup End");
2150 g1h->verify_region_sets_optional();
2152 // We need to make this be a "collection" so any collection pause that
2153 // races with it goes around and waits for completeCleanup to finish.
2154 g1h->increment_total_collections();
2156 // Clean out dead classes and update Metaspace sizes.
2157 if (ClassUnloadingWithConcurrentMark) {
2158 ClassLoaderDataGraph::purge();
2159 }
2160 MetaspaceGC::compute_new_size();
2162 // We reclaimed old regions so we should calculate the sizes to make
2163 // sure we update the old gen/space data.
2164 g1h->g1mm()->update_sizes();
2166 g1h->trace_heap_after_concurrent_cycle();
2167 }
2169 void ConcurrentMark::completeCleanup() {
2170 if (has_aborted()) return;
2172 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2174 _cleanup_list.verify_optional();
2175 FreeRegionList tmp_free_list("Tmp Free List");
2177 if (G1ConcRegionFreeingVerbose) {
2178 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2179 "cleanup list has %u entries",
2180 _cleanup_list.length());
2181 }
2183 // No one else should be accessing the _cleanup_list at this point,
2184 // so it is not necessary to take any locks
2185 while (!_cleanup_list.is_empty()) {
2186 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2187 assert(hr != NULL, "Got NULL from a non-empty list");
2188 hr->par_clear();
2189 tmp_free_list.add_ordered(hr);
2191 // Instead of adding one region at a time to the secondary_free_list,
2192 // we accumulate them in the local list and move them a few at a
2193 // time. This also cuts down on the number of notify_all() calls
2194 // we do during this process. We'll also append the local list when
2195 // _cleanup_list is empty (which means we just removed the last
2196 // region from the _cleanup_list).
2197 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2198 _cleanup_list.is_empty()) {
2199 if (G1ConcRegionFreeingVerbose) {
2200 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2201 "appending %u entries to the secondary_free_list, "
2202 "cleanup list still has %u entries",
2203 tmp_free_list.length(),
2204 _cleanup_list.length());
2205 }
2207 {
2208 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2209 g1h->secondary_free_list_add(&tmp_free_list);
2210 SecondaryFreeList_lock->notify_all();
2211 }
2213 if (G1StressConcRegionFreeing) {
2214 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2215 os::sleep(Thread::current(), (jlong) 1, false);
2216 }
2217 }
2218 }
2219 }
2220 assert(tmp_free_list.is_empty(), "post-condition");
2221 }
2223 // Supporting Object and Oop closures for reference discovery
2224 // and processing in during marking
2226 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2227 HeapWord* addr = (HeapWord*)obj;
2228 return addr != NULL &&
2229 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2230 }
2232 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2233 // Uses the CMTask associated with a worker thread (for serial reference
2234 // processing the CMTask for worker 0 is used) to preserve (mark) and
2235 // trace referent objects.
2236 //
2237 // Using the CMTask and embedded local queues avoids having the worker
2238 // threads operating on the global mark stack. This reduces the risk
2239 // of overflowing the stack - which we would rather avoid at this late
2240 // state. Also using the tasks' local queues removes the potential
2241 // of the workers interfering with each other that could occur if
2242 // operating on the global stack.
2244 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2245 ConcurrentMark* _cm;
2246 CMTask* _task;
2247 int _ref_counter_limit;
2248 int _ref_counter;
2249 bool _is_serial;
2250 public:
2251 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2252 _cm(cm), _task(task), _is_serial(is_serial),
2253 _ref_counter_limit(G1RefProcDrainInterval) {
2254 assert(_ref_counter_limit > 0, "sanity");
2255 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2256 _ref_counter = _ref_counter_limit;
2257 }
2259 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2260 virtual void do_oop( oop* p) { do_oop_work(p); }
2262 template <class T> void do_oop_work(T* p) {
2263 if (!_cm->has_overflown()) {
2264 oop obj = oopDesc::load_decode_heap_oop(p);
2265 if (_cm->verbose_high()) {
2266 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2267 "*"PTR_FORMAT" = "PTR_FORMAT,
2268 _task->worker_id(), p2i(p), p2i((void*) obj));
2269 }
2271 _task->deal_with_reference(obj);
2272 _ref_counter--;
2274 if (_ref_counter == 0) {
2275 // We have dealt with _ref_counter_limit references, pushing them
2276 // and objects reachable from them on to the local stack (and
2277 // possibly the global stack). Call CMTask::do_marking_step() to
2278 // process these entries.
2279 //
2280 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2281 // there's nothing more to do (i.e. we're done with the entries that
2282 // were pushed as a result of the CMTask::deal_with_reference() calls
2283 // above) or we overflow.
2284 //
2285 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2286 // flag while there may still be some work to do. (See the comment at
2287 // the beginning of CMTask::do_marking_step() for those conditions -
2288 // one of which is reaching the specified time target.) It is only
2289 // when CMTask::do_marking_step() returns without setting the
2290 // has_aborted() flag that the marking step has completed.
2291 do {
2292 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2293 _task->do_marking_step(mark_step_duration_ms,
2294 false /* do_termination */,
2295 _is_serial);
2296 } while (_task->has_aborted() && !_cm->has_overflown());
2297 _ref_counter = _ref_counter_limit;
2298 }
2299 } else {
2300 if (_cm->verbose_high()) {
2301 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2302 }
2303 }
2304 }
2305 };
2307 // 'Drain' oop closure used by both serial and parallel reference processing.
2308 // Uses the CMTask associated with a given worker thread (for serial
2309 // reference processing the CMtask for worker 0 is used). Calls the
2310 // do_marking_step routine, with an unbelievably large timeout value,
2311 // to drain the marking data structures of the remaining entries
2312 // added by the 'keep alive' oop closure above.
2314 class G1CMDrainMarkingStackClosure: public VoidClosure {
2315 ConcurrentMark* _cm;
2316 CMTask* _task;
2317 bool _is_serial;
2318 public:
2319 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2320 _cm(cm), _task(task), _is_serial(is_serial) {
2321 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2322 }
2324 void do_void() {
2325 do {
2326 if (_cm->verbose_high()) {
2327 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2328 _task->worker_id(), BOOL_TO_STR(_is_serial));
2329 }
2331 // We call CMTask::do_marking_step() to completely drain the local
2332 // and global marking stacks of entries pushed by the 'keep alive'
2333 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2334 //
2335 // CMTask::do_marking_step() is called in a loop, which we'll exit
2336 // if there's nothing more to do (i.e. we'completely drained the
2337 // entries that were pushed as a a result of applying the 'keep alive'
2338 // closure to the entries on the discovered ref lists) or we overflow
2339 // the global marking stack.
2340 //
2341 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2342 // flag while there may still be some work to do. (See the comment at
2343 // the beginning of CMTask::do_marking_step() for those conditions -
2344 // one of which is reaching the specified time target.) It is only
2345 // when CMTask::do_marking_step() returns without setting the
2346 // has_aborted() flag that the marking step has completed.
2348 _task->do_marking_step(1000000000.0 /* something very large */,
2349 true /* do_termination */,
2350 _is_serial);
2351 } while (_task->has_aborted() && !_cm->has_overflown());
2352 }
2353 };
2355 // Implementation of AbstractRefProcTaskExecutor for parallel
2356 // reference processing at the end of G1 concurrent marking
2358 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2359 private:
2360 G1CollectedHeap* _g1h;
2361 ConcurrentMark* _cm;
2362 WorkGang* _workers;
2363 int _active_workers;
2365 public:
2366 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2367 ConcurrentMark* cm,
2368 WorkGang* workers,
2369 int n_workers) :
2370 _g1h(g1h), _cm(cm),
2371 _workers(workers), _active_workers(n_workers) { }
2373 // Executes the given task using concurrent marking worker threads.
2374 virtual void execute(ProcessTask& task);
2375 virtual void execute(EnqueueTask& task);
2376 };
2378 class G1CMRefProcTaskProxy: public AbstractGangTask {
2379 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2380 ProcessTask& _proc_task;
2381 G1CollectedHeap* _g1h;
2382 ConcurrentMark* _cm;
2384 public:
2385 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2386 G1CollectedHeap* g1h,
2387 ConcurrentMark* cm) :
2388 AbstractGangTask("Process reference objects in parallel"),
2389 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2390 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2391 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2392 }
2394 virtual void work(uint worker_id) {
2395 ResourceMark rm;
2396 HandleMark hm;
2397 CMTask* task = _cm->task(worker_id);
2398 G1CMIsAliveClosure g1_is_alive(_g1h);
2399 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2400 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2402 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2403 }
2404 };
2406 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2407 assert(_workers != NULL, "Need parallel worker threads.");
2408 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2410 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2412 // We need to reset the concurrency level before each
2413 // proxy task execution, so that the termination protocol
2414 // and overflow handling in CMTask::do_marking_step() knows
2415 // how many workers to wait for.
2416 _cm->set_concurrency(_active_workers);
2417 _g1h->set_par_threads(_active_workers);
2418 _workers->run_task(&proc_task_proxy);
2419 _g1h->set_par_threads(0);
2420 }
2422 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2423 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2424 EnqueueTask& _enq_task;
2426 public:
2427 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2428 AbstractGangTask("Enqueue reference objects in parallel"),
2429 _enq_task(enq_task) { }
2431 virtual void work(uint worker_id) {
2432 _enq_task.work(worker_id);
2433 }
2434 };
2436 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2437 assert(_workers != NULL, "Need parallel worker threads.");
2438 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2440 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2442 // Not strictly necessary but...
2443 //
2444 // We need to reset the concurrency level before each
2445 // proxy task execution, so that the termination protocol
2446 // and overflow handling in CMTask::do_marking_step() knows
2447 // how many workers to wait for.
2448 _cm->set_concurrency(_active_workers);
2449 _g1h->set_par_threads(_active_workers);
2450 _workers->run_task(&enq_task_proxy);
2451 _g1h->set_par_threads(0);
2452 }
2454 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2455 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2456 }
2458 // Helper class to get rid of some boilerplate code.
2459 class G1RemarkGCTraceTime : public GCTraceTime {
2460 static bool doit_and_prepend(bool doit) {
2461 if (doit) {
2462 gclog_or_tty->put(' ');
2463 }
2464 return doit;
2465 }
2467 public:
2468 G1RemarkGCTraceTime(const char* title, bool doit)
2469 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2470 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2471 }
2472 };
2474 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2475 if (has_overflown()) {
2476 // Skip processing the discovered references if we have
2477 // overflown the global marking stack. Reference objects
2478 // only get discovered once so it is OK to not
2479 // de-populate the discovered reference lists. We could have,
2480 // but the only benefit would be that, when marking restarts,
2481 // less reference objects are discovered.
2482 return;
2483 }
2485 ResourceMark rm;
2486 HandleMark hm;
2488 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2490 // Is alive closure.
2491 G1CMIsAliveClosure g1_is_alive(g1h);
2493 // Inner scope to exclude the cleaning of the string and symbol
2494 // tables from the displayed time.
2495 {
2496 if (G1Log::finer()) {
2497 gclog_or_tty->put(' ');
2498 }
2499 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2501 ReferenceProcessor* rp = g1h->ref_processor_cm();
2503 // See the comment in G1CollectedHeap::ref_processing_init()
2504 // about how reference processing currently works in G1.
2506 // Set the soft reference policy
2507 rp->setup_policy(clear_all_soft_refs);
2508 assert(_markStack.isEmpty(), "mark stack should be empty");
2510 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2511 // in serial reference processing. Note these closures are also
2512 // used for serially processing (by the the current thread) the
2513 // JNI references during parallel reference processing.
2514 //
2515 // These closures do not need to synchronize with the worker
2516 // threads involved in parallel reference processing as these
2517 // instances are executed serially by the current thread (e.g.
2518 // reference processing is not multi-threaded and is thus
2519 // performed by the current thread instead of a gang worker).
2520 //
2521 // The gang tasks involved in parallel reference procssing create
2522 // their own instances of these closures, which do their own
2523 // synchronization among themselves.
2524 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2525 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2527 // We need at least one active thread. If reference processing
2528 // is not multi-threaded we use the current (VMThread) thread,
2529 // otherwise we use the work gang from the G1CollectedHeap and
2530 // we utilize all the worker threads we can.
2531 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2532 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2533 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2535 // Parallel processing task executor.
2536 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2537 g1h->workers(), active_workers);
2538 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2540 // Set the concurrency level. The phase was already set prior to
2541 // executing the remark task.
2542 set_concurrency(active_workers);
2544 // Set the degree of MT processing here. If the discovery was done MT,
2545 // the number of threads involved during discovery could differ from
2546 // the number of active workers. This is OK as long as the discovered
2547 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2548 rp->set_active_mt_degree(active_workers);
2550 // Process the weak references.
2551 const ReferenceProcessorStats& stats =
2552 rp->process_discovered_references(&g1_is_alive,
2553 &g1_keep_alive,
2554 &g1_drain_mark_stack,
2555 executor,
2556 g1h->gc_timer_cm(),
2557 concurrent_gc_id());
2558 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2560 // The do_oop work routines of the keep_alive and drain_marking_stack
2561 // oop closures will set the has_overflown flag if we overflow the
2562 // global marking stack.
2564 assert(_markStack.overflow() || _markStack.isEmpty(),
2565 "mark stack should be empty (unless it overflowed)");
2567 if (_markStack.overflow()) {
2568 // This should have been done already when we tried to push an
2569 // entry on to the global mark stack. But let's do it again.
2570 set_has_overflown();
2571 }
2573 assert(rp->num_q() == active_workers, "why not");
2575 rp->enqueue_discovered_references(executor);
2577 rp->verify_no_references_recorded();
2578 assert(!rp->discovery_enabled(), "Post condition");
2579 }
2581 if (has_overflown()) {
2582 // We can not trust g1_is_alive if the marking stack overflowed
2583 return;
2584 }
2586 assert(_markStack.isEmpty(), "Marking should have completed");
2588 // Unload Klasses, String, Symbols, Code Cache, etc.
2589 {
2590 G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2592 if (ClassUnloadingWithConcurrentMark) {
2593 bool purged_classes;
2595 {
2596 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2597 purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2598 }
2600 {
2601 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2602 weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2603 }
2604 }
2606 if (G1StringDedup::is_enabled()) {
2607 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2608 G1StringDedup::unlink(&g1_is_alive);
2609 }
2610 }
2611 }
2613 void ConcurrentMark::swapMarkBitMaps() {
2614 CMBitMapRO* temp = _prevMarkBitMap;
2615 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2616 _nextMarkBitMap = (CMBitMap*) temp;
2617 }
2619 class CMObjectClosure;
2621 // Closure for iterating over objects, currently only used for
2622 // processing SATB buffers.
2623 class CMObjectClosure : public ObjectClosure {
2624 private:
2625 CMTask* _task;
2627 public:
2628 void do_object(oop obj) {
2629 _task->deal_with_reference(obj);
2630 }
2632 CMObjectClosure(CMTask* task) : _task(task) { }
2633 };
2635 class G1RemarkThreadsClosure : public ThreadClosure {
2636 CMObjectClosure _cm_obj;
2637 G1CMOopClosure _cm_cl;
2638 MarkingCodeBlobClosure _code_cl;
2639 int _thread_parity;
2640 bool _is_par;
2642 public:
2643 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2644 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2645 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2647 void do_thread(Thread* thread) {
2648 if (thread->is_Java_thread()) {
2649 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2650 JavaThread* jt = (JavaThread*)thread;
2652 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2653 // however the liveness of oops reachable from nmethods have very complex lifecycles:
2654 // * Alive if on the stack of an executing method
2655 // * Weakly reachable otherwise
2656 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2657 // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2658 jt->nmethods_do(&_code_cl);
2660 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2661 }
2662 } else if (thread->is_VM_thread()) {
2663 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2664 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2665 }
2666 }
2667 }
2668 };
2670 class CMRemarkTask: public AbstractGangTask {
2671 private:
2672 ConcurrentMark* _cm;
2673 bool _is_serial;
2674 public:
2675 void work(uint worker_id) {
2676 // Since all available tasks are actually started, we should
2677 // only proceed if we're supposed to be actived.
2678 if (worker_id < _cm->active_tasks()) {
2679 CMTask* task = _cm->task(worker_id);
2680 task->record_start_time();
2681 {
2682 ResourceMark rm;
2683 HandleMark hm;
2685 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2686 Threads::threads_do(&threads_f);
2687 }
2689 do {
2690 task->do_marking_step(1000000000.0 /* something very large */,
2691 true /* do_termination */,
2692 _is_serial);
2693 } while (task->has_aborted() && !_cm->has_overflown());
2694 // If we overflow, then we do not want to restart. We instead
2695 // want to abort remark and do concurrent marking again.
2696 task->record_end_time();
2697 }
2698 }
2700 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2701 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2702 _cm->terminator()->reset_for_reuse(active_workers);
2703 }
2704 };
2706 void ConcurrentMark::checkpointRootsFinalWork() {
2707 ResourceMark rm;
2708 HandleMark hm;
2709 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2711 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2713 g1h->ensure_parsability(false);
2715 if (G1CollectedHeap::use_parallel_gc_threads()) {
2716 G1CollectedHeap::StrongRootsScope srs(g1h);
2717 // this is remark, so we'll use up all active threads
2718 uint active_workers = g1h->workers()->active_workers();
2719 if (active_workers == 0) {
2720 assert(active_workers > 0, "Should have been set earlier");
2721 active_workers = (uint) ParallelGCThreads;
2722 g1h->workers()->set_active_workers(active_workers);
2723 }
2724 set_concurrency_and_phase(active_workers, false /* concurrent */);
2725 // Leave _parallel_marking_threads at it's
2726 // value originally calculated in the ConcurrentMark
2727 // constructor and pass values of the active workers
2728 // through the gang in the task.
2730 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2731 // We will start all available threads, even if we decide that the
2732 // active_workers will be fewer. The extra ones will just bail out
2733 // immediately.
2734 g1h->set_par_threads(active_workers);
2735 g1h->workers()->run_task(&remarkTask);
2736 g1h->set_par_threads(0);
2737 } else {
2738 G1CollectedHeap::StrongRootsScope srs(g1h);
2739 uint active_workers = 1;
2740 set_concurrency_and_phase(active_workers, false /* concurrent */);
2742 // Note - if there's no work gang then the VMThread will be
2743 // the thread to execute the remark - serially. We have
2744 // to pass true for the is_serial parameter so that
2745 // CMTask::do_marking_step() doesn't enter the sync
2746 // barriers in the event of an overflow. Doing so will
2747 // cause an assert that the current thread is not a
2748 // concurrent GC thread.
2749 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2750 remarkTask.work(0);
2751 }
2752 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2753 guarantee(has_overflown() ||
2754 satb_mq_set.completed_buffers_num() == 0,
2755 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2756 BOOL_TO_STR(has_overflown()),
2757 satb_mq_set.completed_buffers_num()));
2759 print_stats();
2760 }
2762 #ifndef PRODUCT
2764 class PrintReachableOopClosure: public OopClosure {
2765 private:
2766 G1CollectedHeap* _g1h;
2767 outputStream* _out;
2768 VerifyOption _vo;
2769 bool _all;
2771 public:
2772 PrintReachableOopClosure(outputStream* out,
2773 VerifyOption vo,
2774 bool all) :
2775 _g1h(G1CollectedHeap::heap()),
2776 _out(out), _vo(vo), _all(all) { }
2778 void do_oop(narrowOop* p) { do_oop_work(p); }
2779 void do_oop( oop* p) { do_oop_work(p); }
2781 template <class T> void do_oop_work(T* p) {
2782 oop obj = oopDesc::load_decode_heap_oop(p);
2783 const char* str = NULL;
2784 const char* str2 = "";
2786 if (obj == NULL) {
2787 str = "";
2788 } else if (!_g1h->is_in_g1_reserved(obj)) {
2789 str = " O";
2790 } else {
2791 HeapRegion* hr = _g1h->heap_region_containing(obj);
2792 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2793 bool marked = _g1h->is_marked(obj, _vo);
2795 if (over_tams) {
2796 str = " >";
2797 if (marked) {
2798 str2 = " AND MARKED";
2799 }
2800 } else if (marked) {
2801 str = " M";
2802 } else {
2803 str = " NOT";
2804 }
2805 }
2807 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2808 p2i(p), p2i((void*) obj), str, str2);
2809 }
2810 };
2812 class PrintReachableObjectClosure : public ObjectClosure {
2813 private:
2814 G1CollectedHeap* _g1h;
2815 outputStream* _out;
2816 VerifyOption _vo;
2817 bool _all;
2818 HeapRegion* _hr;
2820 public:
2821 PrintReachableObjectClosure(outputStream* out,
2822 VerifyOption vo,
2823 bool all,
2824 HeapRegion* hr) :
2825 _g1h(G1CollectedHeap::heap()),
2826 _out(out), _vo(vo), _all(all), _hr(hr) { }
2828 void do_object(oop o) {
2829 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2830 bool marked = _g1h->is_marked(o, _vo);
2831 bool print_it = _all || over_tams || marked;
2833 if (print_it) {
2834 _out->print_cr(" "PTR_FORMAT"%s",
2835 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2836 PrintReachableOopClosure oopCl(_out, _vo, _all);
2837 o->oop_iterate_no_header(&oopCl);
2838 }
2839 }
2840 };
2842 class PrintReachableRegionClosure : public HeapRegionClosure {
2843 private:
2844 G1CollectedHeap* _g1h;
2845 outputStream* _out;
2846 VerifyOption _vo;
2847 bool _all;
2849 public:
2850 bool doHeapRegion(HeapRegion* hr) {
2851 HeapWord* b = hr->bottom();
2852 HeapWord* e = hr->end();
2853 HeapWord* t = hr->top();
2854 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2855 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2856 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2857 _out->cr();
2859 HeapWord* from = b;
2860 HeapWord* to = t;
2862 if (to > from) {
2863 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2864 _out->cr();
2865 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2866 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2867 _out->cr();
2868 }
2870 return false;
2871 }
2873 PrintReachableRegionClosure(outputStream* out,
2874 VerifyOption vo,
2875 bool all) :
2876 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2877 };
2879 void ConcurrentMark::print_reachable(const char* str,
2880 VerifyOption vo,
2881 bool all) {
2882 gclog_or_tty->cr();
2883 gclog_or_tty->print_cr("== Doing heap dump... ");
2885 if (G1PrintReachableBaseFile == NULL) {
2886 gclog_or_tty->print_cr(" #### error: no base file defined");
2887 return;
2888 }
2890 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2891 (JVM_MAXPATHLEN - 1)) {
2892 gclog_or_tty->print_cr(" #### error: file name too long");
2893 return;
2894 }
2896 char file_name[JVM_MAXPATHLEN];
2897 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2898 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2900 fileStream fout(file_name);
2901 if (!fout.is_open()) {
2902 gclog_or_tty->print_cr(" #### error: could not open file");
2903 return;
2904 }
2906 outputStream* out = &fout;
2907 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2908 out->cr();
2910 out->print_cr("--- ITERATING OVER REGIONS");
2911 out->cr();
2912 PrintReachableRegionClosure rcl(out, vo, all);
2913 _g1h->heap_region_iterate(&rcl);
2914 out->cr();
2916 gclog_or_tty->print_cr(" done");
2917 gclog_or_tty->flush();
2918 }
2920 #endif // PRODUCT
2922 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2923 // Note we are overriding the read-only view of the prev map here, via
2924 // the cast.
2925 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2926 }
2928 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2929 _nextMarkBitMap->clearRange(mr);
2930 }
2932 HeapRegion*
2933 ConcurrentMark::claim_region(uint worker_id) {
2934 // "checkpoint" the finger
2935 HeapWord* finger = _finger;
2937 // _heap_end will not change underneath our feet; it only changes at
2938 // yield points.
2939 while (finger < _heap_end) {
2940 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2942 // Note on how this code handles humongous regions. In the
2943 // normal case the finger will reach the start of a "starts
2944 // humongous" (SH) region. Its end will either be the end of the
2945 // last "continues humongous" (CH) region in the sequence, or the
2946 // standard end of the SH region (if the SH is the only region in
2947 // the sequence). That way claim_region() will skip over the CH
2948 // regions. However, there is a subtle race between a CM thread
2949 // executing this method and a mutator thread doing a humongous
2950 // object allocation. The two are not mutually exclusive as the CM
2951 // thread does not need to hold the Heap_lock when it gets
2952 // here. So there is a chance that claim_region() will come across
2953 // a free region that's in the progress of becoming a SH or a CH
2954 // region. In the former case, it will either
2955 // a) Miss the update to the region's end, in which case it will
2956 // visit every subsequent CH region, will find their bitmaps
2957 // empty, and do nothing, or
2958 // b) Will observe the update of the region's end (in which case
2959 // it will skip the subsequent CH regions).
2960 // If it comes across a region that suddenly becomes CH, the
2961 // scenario will be similar to b). So, the race between
2962 // claim_region() and a humongous object allocation might force us
2963 // to do a bit of unnecessary work (due to some unnecessary bitmap
2964 // iterations) but it should not introduce and correctness issues.
2965 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2967 // Above heap_region_containing_raw may return NULL as we always scan claim
2968 // until the end of the heap. In this case, just jump to the next region.
2969 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2971 // Is the gap between reading the finger and doing the CAS too long?
2972 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2973 if (res == finger && curr_region != NULL) {
2974 // we succeeded
2975 HeapWord* bottom = curr_region->bottom();
2976 HeapWord* limit = curr_region->next_top_at_mark_start();
2978 if (verbose_low()) {
2979 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2980 "["PTR_FORMAT", "PTR_FORMAT"), "
2981 "limit = "PTR_FORMAT,
2982 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2983 }
2985 // notice that _finger == end cannot be guaranteed here since,
2986 // someone else might have moved the finger even further
2987 assert(_finger >= end, "the finger should have moved forward");
2989 if (verbose_low()) {
2990 gclog_or_tty->print_cr("[%u] we were successful with region = "
2991 PTR_FORMAT, worker_id, p2i(curr_region));
2992 }
2994 if (limit > bottom) {
2995 if (verbose_low()) {
2996 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2997 "returning it ", worker_id, p2i(curr_region));
2998 }
2999 return curr_region;
3000 } else {
3001 assert(limit == bottom,
3002 "the region limit should be at bottom");
3003 if (verbose_low()) {
3004 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3005 "returning NULL", worker_id, p2i(curr_region));
3006 }
3007 // we return NULL and the caller should try calling
3008 // claim_region() again.
3009 return NULL;
3010 }
3011 } else {
3012 assert(_finger > finger, "the finger should have moved forward");
3013 if (verbose_low()) {
3014 if (curr_region == NULL) {
3015 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3016 "global finger = "PTR_FORMAT", "
3017 "our finger = "PTR_FORMAT,
3018 worker_id, p2i(_finger), p2i(finger));
3019 } else {
3020 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3021 "global finger = "PTR_FORMAT", "
3022 "our finger = "PTR_FORMAT,
3023 worker_id, p2i(_finger), p2i(finger));
3024 }
3025 }
3027 // read it again
3028 finger = _finger;
3029 }
3030 }
3032 return NULL;
3033 }
3035 #ifndef PRODUCT
3036 enum VerifyNoCSetOopsPhase {
3037 VerifyNoCSetOopsStack,
3038 VerifyNoCSetOopsQueues,
3039 VerifyNoCSetOopsSATBCompleted,
3040 VerifyNoCSetOopsSATBThread
3041 };
3043 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
3044 private:
3045 G1CollectedHeap* _g1h;
3046 VerifyNoCSetOopsPhase _phase;
3047 int _info;
3049 const char* phase_str() {
3050 switch (_phase) {
3051 case VerifyNoCSetOopsStack: return "Stack";
3052 case VerifyNoCSetOopsQueues: return "Queue";
3053 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3054 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
3055 default: ShouldNotReachHere();
3056 }
3057 return NULL;
3058 }
3060 void do_object_work(oop obj) {
3061 guarantee(!_g1h->obj_in_cs(obj),
3062 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3063 p2i((void*) obj), phase_str(), _info));
3064 }
3066 public:
3067 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3069 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3070 _phase = phase;
3071 _info = info;
3072 }
3074 virtual void do_oop(oop* p) {
3075 oop obj = oopDesc::load_decode_heap_oop(p);
3076 do_object_work(obj);
3077 }
3079 virtual void do_oop(narrowOop* p) {
3080 // We should not come across narrow oops while scanning marking
3081 // stacks and SATB buffers.
3082 ShouldNotReachHere();
3083 }
3085 virtual void do_object(oop obj) {
3086 do_object_work(obj);
3087 }
3088 };
3090 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3091 bool verify_enqueued_buffers,
3092 bool verify_thread_buffers,
3093 bool verify_fingers) {
3094 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3095 if (!G1CollectedHeap::heap()->mark_in_progress()) {
3096 return;
3097 }
3099 VerifyNoCSetOopsClosure cl;
3101 if (verify_stacks) {
3102 // Verify entries on the global mark stack
3103 cl.set_phase(VerifyNoCSetOopsStack);
3104 _markStack.oops_do(&cl);
3106 // Verify entries on the task queues
3107 for (uint i = 0; i < _max_worker_id; i += 1) {
3108 cl.set_phase(VerifyNoCSetOopsQueues, i);
3109 CMTaskQueue* queue = _task_queues->queue(i);
3110 queue->oops_do(&cl);
3111 }
3112 }
3114 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3116 // Verify entries on the enqueued SATB buffers
3117 if (verify_enqueued_buffers) {
3118 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3119 satb_qs.iterate_completed_buffers_read_only(&cl);
3120 }
3122 // Verify entries on the per-thread SATB buffers
3123 if (verify_thread_buffers) {
3124 cl.set_phase(VerifyNoCSetOopsSATBThread);
3125 satb_qs.iterate_thread_buffers_read_only(&cl);
3126 }
3128 if (verify_fingers) {
3129 // Verify the global finger
3130 HeapWord* global_finger = finger();
3131 if (global_finger != NULL && global_finger < _heap_end) {
3132 // The global finger always points to a heap region boundary. We
3133 // use heap_region_containing_raw() to get the containing region
3134 // given that the global finger could be pointing to a free region
3135 // which subsequently becomes continues humongous. If that
3136 // happens, heap_region_containing() will return the bottom of the
3137 // corresponding starts humongous region and the check below will
3138 // not hold any more.
3139 // Since we always iterate over all regions, we might get a NULL HeapRegion
3140 // here.
3141 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3142 guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3143 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3144 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3145 }
3147 // Verify the task fingers
3148 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3149 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3150 CMTask* task = _tasks[i];
3151 HeapWord* task_finger = task->finger();
3152 if (task_finger != NULL && task_finger < _heap_end) {
3153 // See above note on the global finger verification.
3154 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3155 guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3156 !task_hr->in_collection_set(),
3157 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3158 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3159 }
3160 }
3161 }
3162 }
3163 #endif // PRODUCT
3165 // Aggregate the counting data that was constructed concurrently
3166 // with marking.
3167 class AggregateCountDataHRClosure: public HeapRegionClosure {
3168 G1CollectedHeap* _g1h;
3169 ConcurrentMark* _cm;
3170 CardTableModRefBS* _ct_bs;
3171 BitMap* _cm_card_bm;
3172 uint _max_worker_id;
3174 public:
3175 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3176 BitMap* cm_card_bm,
3177 uint max_worker_id) :
3178 _g1h(g1h), _cm(g1h->concurrent_mark()),
3179 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3180 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3182 bool doHeapRegion(HeapRegion* hr) {
3183 if (hr->continuesHumongous()) {
3184 // We will ignore these here and process them when their
3185 // associated "starts humongous" region is processed.
3186 // Note that we cannot rely on their associated
3187 // "starts humongous" region to have their bit set to 1
3188 // since, due to the region chunking in the parallel region
3189 // iteration, a "continues humongous" region might be visited
3190 // before its associated "starts humongous".
3191 return false;
3192 }
3194 HeapWord* start = hr->bottom();
3195 HeapWord* limit = hr->next_top_at_mark_start();
3196 HeapWord* end = hr->end();
3198 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3199 err_msg("Preconditions not met - "
3200 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3201 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3202 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3204 assert(hr->next_marked_bytes() == 0, "Precondition");
3206 if (start == limit) {
3207 // NTAMS of this region has not been set so nothing to do.
3208 return false;
3209 }
3211 // 'start' should be in the heap.
3212 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3213 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3214 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3216 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3217 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3218 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3220 // If ntams is not card aligned then we bump card bitmap index
3221 // for limit so that we get the all the cards spanned by
3222 // the object ending at ntams.
3223 // Note: if this is the last region in the heap then ntams
3224 // could be actually just beyond the end of the the heap;
3225 // limit_idx will then correspond to a (non-existent) card
3226 // that is also outside the heap.
3227 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3228 limit_idx += 1;
3229 }
3231 assert(limit_idx <= end_idx, "or else use atomics");
3233 // Aggregate the "stripe" in the count data associated with hr.
3234 uint hrm_index = hr->hrm_index();
3235 size_t marked_bytes = 0;
3237 for (uint i = 0; i < _max_worker_id; i += 1) {
3238 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3239 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3241 // Fetch the marked_bytes in this region for task i and
3242 // add it to the running total for this region.
3243 marked_bytes += marked_bytes_array[hrm_index];
3245 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3246 // into the global card bitmap.
3247 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3249 while (scan_idx < limit_idx) {
3250 assert(task_card_bm->at(scan_idx) == true, "should be");
3251 _cm_card_bm->set_bit(scan_idx);
3252 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3254 // BitMap::get_next_one_offset() can handle the case when
3255 // its left_offset parameter is greater than its right_offset
3256 // parameter. It does, however, have an early exit if
3257 // left_offset == right_offset. So let's limit the value
3258 // passed in for left offset here.
3259 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3260 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3261 }
3262 }
3264 // Update the marked bytes for this region.
3265 hr->add_to_marked_bytes(marked_bytes);
3267 // Next heap region
3268 return false;
3269 }
3270 };
3272 class G1AggregateCountDataTask: public AbstractGangTask {
3273 protected:
3274 G1CollectedHeap* _g1h;
3275 ConcurrentMark* _cm;
3276 BitMap* _cm_card_bm;
3277 uint _max_worker_id;
3278 int _active_workers;
3280 public:
3281 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3282 ConcurrentMark* cm,
3283 BitMap* cm_card_bm,
3284 uint max_worker_id,
3285 int n_workers) :
3286 AbstractGangTask("Count Aggregation"),
3287 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3288 _max_worker_id(max_worker_id),
3289 _active_workers(n_workers) { }
3291 void work(uint worker_id) {
3292 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3294 if (G1CollectedHeap::use_parallel_gc_threads()) {
3295 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3296 _active_workers,
3297 HeapRegion::AggregateCountClaimValue);
3298 } else {
3299 _g1h->heap_region_iterate(&cl);
3300 }
3301 }
3302 };
3305 void ConcurrentMark::aggregate_count_data() {
3306 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3307 _g1h->workers()->active_workers() :
3308 1);
3310 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3311 _max_worker_id, n_workers);
3313 if (G1CollectedHeap::use_parallel_gc_threads()) {
3314 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3315 "sanity check");
3316 _g1h->set_par_threads(n_workers);
3317 _g1h->workers()->run_task(&g1_par_agg_task);
3318 _g1h->set_par_threads(0);
3320 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3321 "sanity check");
3322 _g1h->reset_heap_region_claim_values();
3323 } else {
3324 g1_par_agg_task.work(0);
3325 }
3326 }
3328 // Clear the per-worker arrays used to store the per-region counting data
3329 void ConcurrentMark::clear_all_count_data() {
3330 // Clear the global card bitmap - it will be filled during
3331 // liveness count aggregation (during remark) and the
3332 // final counting task.
3333 _card_bm.clear();
3335 // Clear the global region bitmap - it will be filled as part
3336 // of the final counting task.
3337 _region_bm.clear();
3339 uint max_regions = _g1h->max_regions();
3340 assert(_max_worker_id > 0, "uninitialized");
3342 for (uint i = 0; i < _max_worker_id; i += 1) {
3343 BitMap* task_card_bm = count_card_bitmap_for(i);
3344 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3346 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3347 assert(marked_bytes_array != NULL, "uninitialized");
3349 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3350 task_card_bm->clear();
3351 }
3352 }
3354 void ConcurrentMark::print_stats() {
3355 if (verbose_stats()) {
3356 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3357 for (size_t i = 0; i < _active_tasks; ++i) {
3358 _tasks[i]->print_stats();
3359 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3360 }
3361 }
3362 }
3364 // abandon current marking iteration due to a Full GC
3365 void ConcurrentMark::abort() {
3366 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3367 // concurrent bitmap clearing.
3368 _nextMarkBitMap->clearAll();
3370 // Note we cannot clear the previous marking bitmap here
3371 // since VerifyDuringGC verifies the objects marked during
3372 // a full GC against the previous bitmap.
3374 // Clear the liveness counting data
3375 clear_all_count_data();
3376 // Empty mark stack
3377 reset_marking_state();
3378 for (uint i = 0; i < _max_worker_id; ++i) {
3379 _tasks[i]->clear_region_fields();
3380 }
3381 _first_overflow_barrier_sync.abort();
3382 _second_overflow_barrier_sync.abort();
3383 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3384 if (!gc_id.is_undefined()) {
3385 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3386 // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3387 _aborted_gc_id = gc_id;
3388 }
3389 _has_aborted = true;
3391 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3392 satb_mq_set.abandon_partial_marking();
3393 // This can be called either during or outside marking, we'll read
3394 // the expected_active value from the SATB queue set.
3395 satb_mq_set.set_active_all_threads(
3396 false, /* new active value */
3397 satb_mq_set.is_active() /* expected_active */);
3399 _g1h->trace_heap_after_concurrent_cycle();
3400 _g1h->register_concurrent_cycle_end();
3401 }
3403 const GCId& ConcurrentMark::concurrent_gc_id() {
3404 if (has_aborted()) {
3405 return _aborted_gc_id;
3406 }
3407 return _g1h->gc_tracer_cm()->gc_id();
3408 }
3410 static void print_ms_time_info(const char* prefix, const char* name,
3411 NumberSeq& ns) {
3412 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3413 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3414 if (ns.num() > 0) {
3415 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3416 prefix, ns.sd(), ns.maximum());
3417 }
3418 }
3420 void ConcurrentMark::print_summary_info() {
3421 gclog_or_tty->print_cr(" Concurrent marking:");
3422 print_ms_time_info(" ", "init marks", _init_times);
3423 print_ms_time_info(" ", "remarks", _remark_times);
3424 {
3425 print_ms_time_info(" ", "final marks", _remark_mark_times);
3426 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3428 }
3429 print_ms_time_info(" ", "cleanups", _cleanup_times);
3430 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3431 _total_counting_time,
3432 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3433 (double)_cleanup_times.num()
3434 : 0.0));
3435 if (G1ScrubRemSets) {
3436 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3437 _total_rs_scrub_time,
3438 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3439 (double)_cleanup_times.num()
3440 : 0.0));
3441 }
3442 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3443 (_init_times.sum() + _remark_times.sum() +
3444 _cleanup_times.sum())/1000.0);
3445 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3446 "(%8.2f s marking).",
3447 cmThread()->vtime_accum(),
3448 cmThread()->vtime_mark_accum());
3449 }
3451 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3452 if (use_parallel_marking_threads()) {
3453 _parallel_workers->print_worker_threads_on(st);
3454 }
3455 }
3457 void ConcurrentMark::print_on_error(outputStream* st) const {
3458 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3459 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3460 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3461 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3462 }
3464 // We take a break if someone is trying to stop the world.
3465 bool ConcurrentMark::do_yield_check(uint worker_id) {
3466 if (SuspendibleThreadSet::should_yield()) {
3467 if (worker_id == 0) {
3468 _g1h->g1_policy()->record_concurrent_pause();
3469 }
3470 SuspendibleThreadSet::yield();
3471 return true;
3472 } else {
3473 return false;
3474 }
3475 }
3477 #ifndef PRODUCT
3478 // for debugging purposes
3479 void ConcurrentMark::print_finger() {
3480 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3481 p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3482 for (uint i = 0; i < _max_worker_id; ++i) {
3483 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3484 }
3485 gclog_or_tty->cr();
3486 }
3487 #endif
3489 void CMTask::scan_object(oop obj) {
3490 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3492 if (_cm->verbose_high()) {
3493 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3494 _worker_id, p2i((void*) obj));
3495 }
3497 size_t obj_size = obj->size();
3498 _words_scanned += obj_size;
3500 obj->oop_iterate(_cm_oop_closure);
3501 statsOnly( ++_objs_scanned );
3502 check_limits();
3503 }
3505 // Closure for iteration over bitmaps
3506 class CMBitMapClosure : public BitMapClosure {
3507 private:
3508 // the bitmap that is being iterated over
3509 CMBitMap* _nextMarkBitMap;
3510 ConcurrentMark* _cm;
3511 CMTask* _task;
3513 public:
3514 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3515 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3517 bool do_bit(size_t offset) {
3518 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3519 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3520 assert( addr < _cm->finger(), "invariant");
3522 statsOnly( _task->increase_objs_found_on_bitmap() );
3523 assert(addr >= _task->finger(), "invariant");
3525 // We move that task's local finger along.
3526 _task->move_finger_to(addr);
3528 _task->scan_object(oop(addr));
3529 // we only partially drain the local queue and global stack
3530 _task->drain_local_queue(true);
3531 _task->drain_global_stack(true);
3533 // if the has_aborted flag has been raised, we need to bail out of
3534 // the iteration
3535 return !_task->has_aborted();
3536 }
3537 };
3539 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3540 ConcurrentMark* cm,
3541 CMTask* task)
3542 : _g1h(g1h), _cm(cm), _task(task) {
3543 assert(_ref_processor == NULL, "should be initialized to NULL");
3545 if (G1UseConcMarkReferenceProcessing) {
3546 _ref_processor = g1h->ref_processor_cm();
3547 assert(_ref_processor != NULL, "should not be NULL");
3548 }
3549 }
3551 void CMTask::setup_for_region(HeapRegion* hr) {
3552 assert(hr != NULL,
3553 "claim_region() should have filtered out NULL regions");
3554 assert(!hr->continuesHumongous(),
3555 "claim_region() should have filtered out continues humongous regions");
3557 if (_cm->verbose_low()) {
3558 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3559 _worker_id, p2i(hr));
3560 }
3562 _curr_region = hr;
3563 _finger = hr->bottom();
3564 update_region_limit();
3565 }
3567 void CMTask::update_region_limit() {
3568 HeapRegion* hr = _curr_region;
3569 HeapWord* bottom = hr->bottom();
3570 HeapWord* limit = hr->next_top_at_mark_start();
3572 if (limit == bottom) {
3573 if (_cm->verbose_low()) {
3574 gclog_or_tty->print_cr("[%u] found an empty region "
3575 "["PTR_FORMAT", "PTR_FORMAT")",
3576 _worker_id, p2i(bottom), p2i(limit));
3577 }
3578 // The region was collected underneath our feet.
3579 // We set the finger to bottom to ensure that the bitmap
3580 // iteration that will follow this will not do anything.
3581 // (this is not a condition that holds when we set the region up,
3582 // as the region is not supposed to be empty in the first place)
3583 _finger = bottom;
3584 } else if (limit >= _region_limit) {
3585 assert(limit >= _finger, "peace of mind");
3586 } else {
3587 assert(limit < _region_limit, "only way to get here");
3588 // This can happen under some pretty unusual circumstances. An
3589 // evacuation pause empties the region underneath our feet (NTAMS
3590 // at bottom). We then do some allocation in the region (NTAMS
3591 // stays at bottom), followed by the region being used as a GC
3592 // alloc region (NTAMS will move to top() and the objects
3593 // originally below it will be grayed). All objects now marked in
3594 // the region are explicitly grayed, if below the global finger,
3595 // and we do not need in fact to scan anything else. So, we simply
3596 // set _finger to be limit to ensure that the bitmap iteration
3597 // doesn't do anything.
3598 _finger = limit;
3599 }
3601 _region_limit = limit;
3602 }
3604 void CMTask::giveup_current_region() {
3605 assert(_curr_region != NULL, "invariant");
3606 if (_cm->verbose_low()) {
3607 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3608 _worker_id, p2i(_curr_region));
3609 }
3610 clear_region_fields();
3611 }
3613 void CMTask::clear_region_fields() {
3614 // Values for these three fields that indicate that we're not
3615 // holding on to a region.
3616 _curr_region = NULL;
3617 _finger = NULL;
3618 _region_limit = NULL;
3619 }
3621 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3622 if (cm_oop_closure == NULL) {
3623 assert(_cm_oop_closure != NULL, "invariant");
3624 } else {
3625 assert(_cm_oop_closure == NULL, "invariant");
3626 }
3627 _cm_oop_closure = cm_oop_closure;
3628 }
3630 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3631 guarantee(nextMarkBitMap != NULL, "invariant");
3633 if (_cm->verbose_low()) {
3634 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3635 }
3637 _nextMarkBitMap = nextMarkBitMap;
3638 clear_region_fields();
3640 _calls = 0;
3641 _elapsed_time_ms = 0.0;
3642 _termination_time_ms = 0.0;
3643 _termination_start_time_ms = 0.0;
3645 #if _MARKING_STATS_
3646 _local_pushes = 0;
3647 _local_pops = 0;
3648 _local_max_size = 0;
3649 _objs_scanned = 0;
3650 _global_pushes = 0;
3651 _global_pops = 0;
3652 _global_max_size = 0;
3653 _global_transfers_to = 0;
3654 _global_transfers_from = 0;
3655 _regions_claimed = 0;
3656 _objs_found_on_bitmap = 0;
3657 _satb_buffers_processed = 0;
3658 _steal_attempts = 0;
3659 _steals = 0;
3660 _aborted = 0;
3661 _aborted_overflow = 0;
3662 _aborted_cm_aborted = 0;
3663 _aborted_yield = 0;
3664 _aborted_timed_out = 0;
3665 _aborted_satb = 0;
3666 _aborted_termination = 0;
3667 #endif // _MARKING_STATS_
3668 }
3670 bool CMTask::should_exit_termination() {
3671 regular_clock_call();
3672 // This is called when we are in the termination protocol. We should
3673 // quit if, for some reason, this task wants to abort or the global
3674 // stack is not empty (this means that we can get work from it).
3675 return !_cm->mark_stack_empty() || has_aborted();
3676 }
3678 void CMTask::reached_limit() {
3679 assert(_words_scanned >= _words_scanned_limit ||
3680 _refs_reached >= _refs_reached_limit ,
3681 "shouldn't have been called otherwise");
3682 regular_clock_call();
3683 }
3685 void CMTask::regular_clock_call() {
3686 if (has_aborted()) return;
3688 // First, we need to recalculate the words scanned and refs reached
3689 // limits for the next clock call.
3690 recalculate_limits();
3692 // During the regular clock call we do the following
3694 // (1) If an overflow has been flagged, then we abort.
3695 if (_cm->has_overflown()) {
3696 set_has_aborted();
3697 return;
3698 }
3700 // If we are not concurrent (i.e. we're doing remark) we don't need
3701 // to check anything else. The other steps are only needed during
3702 // the concurrent marking phase.
3703 if (!concurrent()) return;
3705 // (2) If marking has been aborted for Full GC, then we also abort.
3706 if (_cm->has_aborted()) {
3707 set_has_aborted();
3708 statsOnly( ++_aborted_cm_aborted );
3709 return;
3710 }
3712 double curr_time_ms = os::elapsedVTime() * 1000.0;
3714 // (3) If marking stats are enabled, then we update the step history.
3715 #if _MARKING_STATS_
3716 if (_words_scanned >= _words_scanned_limit) {
3717 ++_clock_due_to_scanning;
3718 }
3719 if (_refs_reached >= _refs_reached_limit) {
3720 ++_clock_due_to_marking;
3721 }
3723 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3724 _interval_start_time_ms = curr_time_ms;
3725 _all_clock_intervals_ms.add(last_interval_ms);
3727 if (_cm->verbose_medium()) {
3728 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3729 "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3730 _worker_id, last_interval_ms,
3731 _words_scanned,
3732 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3733 _refs_reached,
3734 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3735 }
3736 #endif // _MARKING_STATS_
3738 // (4) We check whether we should yield. If we have to, then we abort.
3739 if (SuspendibleThreadSet::should_yield()) {
3740 // We should yield. To do this we abort the task. The caller is
3741 // responsible for yielding.
3742 set_has_aborted();
3743 statsOnly( ++_aborted_yield );
3744 return;
3745 }
3747 // (5) We check whether we've reached our time quota. If we have,
3748 // then we abort.
3749 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3750 if (elapsed_time_ms > _time_target_ms) {
3751 set_has_aborted();
3752 _has_timed_out = true;
3753 statsOnly( ++_aborted_timed_out );
3754 return;
3755 }
3757 // (6) Finally, we check whether there are enough completed STAB
3758 // buffers available for processing. If there are, we abort.
3759 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3760 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3761 if (_cm->verbose_low()) {
3762 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3763 _worker_id);
3764 }
3765 // we do need to process SATB buffers, we'll abort and restart
3766 // the marking task to do so
3767 set_has_aborted();
3768 statsOnly( ++_aborted_satb );
3769 return;
3770 }
3771 }
3773 void CMTask::recalculate_limits() {
3774 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3775 _words_scanned_limit = _real_words_scanned_limit;
3777 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3778 _refs_reached_limit = _real_refs_reached_limit;
3779 }
3781 void CMTask::decrease_limits() {
3782 // This is called when we believe that we're going to do an infrequent
3783 // operation which will increase the per byte scanned cost (i.e. move
3784 // entries to/from the global stack). It basically tries to decrease the
3785 // scanning limit so that the clock is called earlier.
3787 if (_cm->verbose_medium()) {
3788 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3789 }
3791 _words_scanned_limit = _real_words_scanned_limit -
3792 3 * words_scanned_period / 4;
3793 _refs_reached_limit = _real_refs_reached_limit -
3794 3 * refs_reached_period / 4;
3795 }
3797 void CMTask::move_entries_to_global_stack() {
3798 // local array where we'll store the entries that will be popped
3799 // from the local queue
3800 oop buffer[global_stack_transfer_size];
3802 int n = 0;
3803 oop obj;
3804 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3805 buffer[n] = obj;
3806 ++n;
3807 }
3809 if (n > 0) {
3810 // we popped at least one entry from the local queue
3812 statsOnly( ++_global_transfers_to; _local_pops += n );
3814 if (!_cm->mark_stack_push(buffer, n)) {
3815 if (_cm->verbose_low()) {
3816 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3817 _worker_id);
3818 }
3819 set_has_aborted();
3820 } else {
3821 // the transfer was successful
3823 if (_cm->verbose_medium()) {
3824 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3825 _worker_id, n);
3826 }
3827 statsOnly( int tmp_size = _cm->mark_stack_size();
3828 if (tmp_size > _global_max_size) {
3829 _global_max_size = tmp_size;
3830 }
3831 _global_pushes += n );
3832 }
3833 }
3835 // this operation was quite expensive, so decrease the limits
3836 decrease_limits();
3837 }
3839 void CMTask::get_entries_from_global_stack() {
3840 // local array where we'll store the entries that will be popped
3841 // from the global stack.
3842 oop buffer[global_stack_transfer_size];
3843 int n;
3844 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3845 assert(n <= global_stack_transfer_size,
3846 "we should not pop more than the given limit");
3847 if (n > 0) {
3848 // yes, we did actually pop at least one entry
3850 statsOnly( ++_global_transfers_from; _global_pops += n );
3851 if (_cm->verbose_medium()) {
3852 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3853 _worker_id, n);
3854 }
3855 for (int i = 0; i < n; ++i) {
3856 bool success = _task_queue->push(buffer[i]);
3857 // We only call this when the local queue is empty or under a
3858 // given target limit. So, we do not expect this push to fail.
3859 assert(success, "invariant");
3860 }
3862 statsOnly( int tmp_size = _task_queue->size();
3863 if (tmp_size > _local_max_size) {
3864 _local_max_size = tmp_size;
3865 }
3866 _local_pushes += n );
3867 }
3869 // this operation was quite expensive, so decrease the limits
3870 decrease_limits();
3871 }
3873 void CMTask::drain_local_queue(bool partially) {
3874 if (has_aborted()) return;
3876 // Decide what the target size is, depending whether we're going to
3877 // drain it partially (so that other tasks can steal if they run out
3878 // of things to do) or totally (at the very end).
3879 size_t target_size;
3880 if (partially) {
3881 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3882 } else {
3883 target_size = 0;
3884 }
3886 if (_task_queue->size() > target_size) {
3887 if (_cm->verbose_high()) {
3888 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3889 _worker_id, target_size);
3890 }
3892 oop obj;
3893 bool ret = _task_queue->pop_local(obj);
3894 while (ret) {
3895 statsOnly( ++_local_pops );
3897 if (_cm->verbose_high()) {
3898 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3899 p2i((void*) obj));
3900 }
3902 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3903 assert(!_g1h->is_on_master_free_list(
3904 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3906 scan_object(obj);
3908 if (_task_queue->size() <= target_size || has_aborted()) {
3909 ret = false;
3910 } else {
3911 ret = _task_queue->pop_local(obj);
3912 }
3913 }
3915 if (_cm->verbose_high()) {
3916 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3917 _worker_id, _task_queue->size());
3918 }
3919 }
3920 }
3922 void CMTask::drain_global_stack(bool partially) {
3923 if (has_aborted()) return;
3925 // We have a policy to drain the local queue before we attempt to
3926 // drain the global stack.
3927 assert(partially || _task_queue->size() == 0, "invariant");
3929 // Decide what the target size is, depending whether we're going to
3930 // drain it partially (so that other tasks can steal if they run out
3931 // of things to do) or totally (at the very end). Notice that,
3932 // because we move entries from the global stack in chunks or
3933 // because another task might be doing the same, we might in fact
3934 // drop below the target. But, this is not a problem.
3935 size_t target_size;
3936 if (partially) {
3937 target_size = _cm->partial_mark_stack_size_target();
3938 } else {
3939 target_size = 0;
3940 }
3942 if (_cm->mark_stack_size() > target_size) {
3943 if (_cm->verbose_low()) {
3944 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3945 _worker_id, target_size);
3946 }
3948 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3949 get_entries_from_global_stack();
3950 drain_local_queue(partially);
3951 }
3953 if (_cm->verbose_low()) {
3954 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3955 _worker_id, _cm->mark_stack_size());
3956 }
3957 }
3958 }
3960 // SATB Queue has several assumptions on whether to call the par or
3961 // non-par versions of the methods. this is why some of the code is
3962 // replicated. We should really get rid of the single-threaded version
3963 // of the code to simplify things.
3964 void CMTask::drain_satb_buffers() {
3965 if (has_aborted()) return;
3967 // We set this so that the regular clock knows that we're in the
3968 // middle of draining buffers and doesn't set the abort flag when it
3969 // notices that SATB buffers are available for draining. It'd be
3970 // very counter productive if it did that. :-)
3971 _draining_satb_buffers = true;
3973 CMObjectClosure oc(this);
3974 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3975 if (G1CollectedHeap::use_parallel_gc_threads()) {
3976 satb_mq_set.set_par_closure(_worker_id, &oc);
3977 } else {
3978 satb_mq_set.set_closure(&oc);
3979 }
3981 // This keeps claiming and applying the closure to completed buffers
3982 // until we run out of buffers or we need to abort.
3983 if (G1CollectedHeap::use_parallel_gc_threads()) {
3984 while (!has_aborted() &&
3985 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3986 if (_cm->verbose_medium()) {
3987 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3988 }
3989 statsOnly( ++_satb_buffers_processed );
3990 regular_clock_call();
3991 }
3992 } else {
3993 while (!has_aborted() &&
3994 satb_mq_set.apply_closure_to_completed_buffer()) {
3995 if (_cm->verbose_medium()) {
3996 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3997 }
3998 statsOnly( ++_satb_buffers_processed );
3999 regular_clock_call();
4000 }
4001 }
4003 _draining_satb_buffers = false;
4005 assert(has_aborted() ||
4006 concurrent() ||
4007 satb_mq_set.completed_buffers_num() == 0, "invariant");
4009 if (G1CollectedHeap::use_parallel_gc_threads()) {
4010 satb_mq_set.set_par_closure(_worker_id, NULL);
4011 } else {
4012 satb_mq_set.set_closure(NULL);
4013 }
4015 // again, this was a potentially expensive operation, decrease the
4016 // limits to get the regular clock call early
4017 decrease_limits();
4018 }
4020 void CMTask::print_stats() {
4021 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4022 _worker_id, _calls);
4023 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4024 _elapsed_time_ms, _termination_time_ms);
4025 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4026 _step_times_ms.num(), _step_times_ms.avg(),
4027 _step_times_ms.sd());
4028 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4029 _step_times_ms.maximum(), _step_times_ms.sum());
4031 #if _MARKING_STATS_
4032 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4033 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4034 _all_clock_intervals_ms.sd());
4035 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4036 _all_clock_intervals_ms.maximum(),
4037 _all_clock_intervals_ms.sum());
4038 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4039 _clock_due_to_scanning, _clock_due_to_marking);
4040 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4041 _objs_scanned, _objs_found_on_bitmap);
4042 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4043 _local_pushes, _local_pops, _local_max_size);
4044 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4045 _global_pushes, _global_pops, _global_max_size);
4046 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4047 _global_transfers_to,_global_transfers_from);
4048 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
4049 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4050 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4051 _steal_attempts, _steals);
4052 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4053 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4054 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4055 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4056 _aborted_timed_out, _aborted_satb, _aborted_termination);
4057 #endif // _MARKING_STATS_
4058 }
4060 /*****************************************************************************
4062 The do_marking_step(time_target_ms, ...) method is the building
4063 block of the parallel marking framework. It can be called in parallel
4064 with other invocations of do_marking_step() on different tasks
4065 (but only one per task, obviously) and concurrently with the
4066 mutator threads, or during remark, hence it eliminates the need
4067 for two versions of the code. When called during remark, it will
4068 pick up from where the task left off during the concurrent marking
4069 phase. Interestingly, tasks are also claimable during evacuation
4070 pauses too, since do_marking_step() ensures that it aborts before
4071 it needs to yield.
4073 The data structures that it uses to do marking work are the
4074 following:
4076 (1) Marking Bitmap. If there are gray objects that appear only
4077 on the bitmap (this happens either when dealing with an overflow
4078 or when the initial marking phase has simply marked the roots
4079 and didn't push them on the stack), then tasks claim heap
4080 regions whose bitmap they then scan to find gray objects. A
4081 global finger indicates where the end of the last claimed region
4082 is. A local finger indicates how far into the region a task has
4083 scanned. The two fingers are used to determine how to gray an
4084 object (i.e. whether simply marking it is OK, as it will be
4085 visited by a task in the future, or whether it needs to be also
4086 pushed on a stack).
4088 (2) Local Queue. The local queue of the task which is accessed
4089 reasonably efficiently by the task. Other tasks can steal from
4090 it when they run out of work. Throughout the marking phase, a
4091 task attempts to keep its local queue short but not totally
4092 empty, so that entries are available for stealing by other
4093 tasks. Only when there is no more work, a task will totally
4094 drain its local queue.
4096 (3) Global Mark Stack. This handles local queue overflow. During
4097 marking only sets of entries are moved between it and the local
4098 queues, as access to it requires a mutex and more fine-grain
4099 interaction with it which might cause contention. If it
4100 overflows, then the marking phase should restart and iterate
4101 over the bitmap to identify gray objects. Throughout the marking
4102 phase, tasks attempt to keep the global mark stack at a small
4103 length but not totally empty, so that entries are available for
4104 popping by other tasks. Only when there is no more work, tasks
4105 will totally drain the global mark stack.
4107 (4) SATB Buffer Queue. This is where completed SATB buffers are
4108 made available. Buffers are regularly removed from this queue
4109 and scanned for roots, so that the queue doesn't get too
4110 long. During remark, all completed buffers are processed, as
4111 well as the filled in parts of any uncompleted buffers.
4113 The do_marking_step() method tries to abort when the time target
4114 has been reached. There are a few other cases when the
4115 do_marking_step() method also aborts:
4117 (1) When the marking phase has been aborted (after a Full GC).
4119 (2) When a global overflow (on the global stack) has been
4120 triggered. Before the task aborts, it will actually sync up with
4121 the other tasks to ensure that all the marking data structures
4122 (local queues, stacks, fingers etc.) are re-initialized so that
4123 when do_marking_step() completes, the marking phase can
4124 immediately restart.
4126 (3) When enough completed SATB buffers are available. The
4127 do_marking_step() method only tries to drain SATB buffers right
4128 at the beginning. So, if enough buffers are available, the
4129 marking step aborts and the SATB buffers are processed at
4130 the beginning of the next invocation.
4132 (4) To yield. when we have to yield then we abort and yield
4133 right at the end of do_marking_step(). This saves us from a lot
4134 of hassle as, by yielding we might allow a Full GC. If this
4135 happens then objects will be compacted underneath our feet, the
4136 heap might shrink, etc. We save checking for this by just
4137 aborting and doing the yield right at the end.
4139 From the above it follows that the do_marking_step() method should
4140 be called in a loop (or, otherwise, regularly) until it completes.
4142 If a marking step completes without its has_aborted() flag being
4143 true, it means it has completed the current marking phase (and
4144 also all other marking tasks have done so and have all synced up).
4146 A method called regular_clock_call() is invoked "regularly" (in
4147 sub ms intervals) throughout marking. It is this clock method that
4148 checks all the abort conditions which were mentioned above and
4149 decides when the task should abort. A work-based scheme is used to
4150 trigger this clock method: when the number of object words the
4151 marking phase has scanned or the number of references the marking
4152 phase has visited reach a given limit. Additional invocations to
4153 the method clock have been planted in a few other strategic places
4154 too. The initial reason for the clock method was to avoid calling
4155 vtime too regularly, as it is quite expensive. So, once it was in
4156 place, it was natural to piggy-back all the other conditions on it
4157 too and not constantly check them throughout the code.
4159 If do_termination is true then do_marking_step will enter its
4160 termination protocol.
4162 The value of is_serial must be true when do_marking_step is being
4163 called serially (i.e. by the VMThread) and do_marking_step should
4164 skip any synchronization in the termination and overflow code.
4165 Examples include the serial remark code and the serial reference
4166 processing closures.
4168 The value of is_serial must be false when do_marking_step is
4169 being called by any of the worker threads in a work gang.
4170 Examples include the concurrent marking code (CMMarkingTask),
4171 the MT remark code, and the MT reference processing closures.
4173 *****************************************************************************/
4175 void CMTask::do_marking_step(double time_target_ms,
4176 bool do_termination,
4177 bool is_serial) {
4178 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4179 assert(concurrent() == _cm->concurrent(), "they should be the same");
4181 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4182 assert(_task_queues != NULL, "invariant");
4183 assert(_task_queue != NULL, "invariant");
4184 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4186 assert(!_claimed,
4187 "only one thread should claim this task at any one time");
4189 // OK, this doesn't safeguard again all possible scenarios, as it is
4190 // possible for two threads to set the _claimed flag at the same
4191 // time. But it is only for debugging purposes anyway and it will
4192 // catch most problems.
4193 _claimed = true;
4195 _start_time_ms = os::elapsedVTime() * 1000.0;
4196 statsOnly( _interval_start_time_ms = _start_time_ms );
4198 // If do_stealing is true then do_marking_step will attempt to
4199 // steal work from the other CMTasks. It only makes sense to
4200 // enable stealing when the termination protocol is enabled
4201 // and do_marking_step() is not being called serially.
4202 bool do_stealing = do_termination && !is_serial;
4204 double diff_prediction_ms =
4205 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4206 _time_target_ms = time_target_ms - diff_prediction_ms;
4208 // set up the variables that are used in the work-based scheme to
4209 // call the regular clock method
4210 _words_scanned = 0;
4211 _refs_reached = 0;
4212 recalculate_limits();
4214 // clear all flags
4215 clear_has_aborted();
4216 _has_timed_out = false;
4217 _draining_satb_buffers = false;
4219 ++_calls;
4221 if (_cm->verbose_low()) {
4222 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4223 "target = %1.2lfms >>>>>>>>>>",
4224 _worker_id, _calls, _time_target_ms);
4225 }
4227 // Set up the bitmap and oop closures. Anything that uses them is
4228 // eventually called from this method, so it is OK to allocate these
4229 // statically.
4230 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4231 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4232 set_cm_oop_closure(&cm_oop_closure);
4234 if (_cm->has_overflown()) {
4235 // This can happen if the mark stack overflows during a GC pause
4236 // and this task, after a yield point, restarts. We have to abort
4237 // as we need to get into the overflow protocol which happens
4238 // right at the end of this task.
4239 set_has_aborted();
4240 }
4242 // First drain any available SATB buffers. After this, we will not
4243 // look at SATB buffers before the next invocation of this method.
4244 // If enough completed SATB buffers are queued up, the regular clock
4245 // will abort this task so that it restarts.
4246 drain_satb_buffers();
4247 // ...then partially drain the local queue and the global stack
4248 drain_local_queue(true);
4249 drain_global_stack(true);
4251 do {
4252 if (!has_aborted() && _curr_region != NULL) {
4253 // This means that we're already holding on to a region.
4254 assert(_finger != NULL, "if region is not NULL, then the finger "
4255 "should not be NULL either");
4257 // We might have restarted this task after an evacuation pause
4258 // which might have evacuated the region we're holding on to
4259 // underneath our feet. Let's read its limit again to make sure
4260 // that we do not iterate over a region of the heap that
4261 // contains garbage (update_region_limit() will also move
4262 // _finger to the start of the region if it is found empty).
4263 update_region_limit();
4264 // We will start from _finger not from the start of the region,
4265 // as we might be restarting this task after aborting half-way
4266 // through scanning this region. In this case, _finger points to
4267 // the address where we last found a marked object. If this is a
4268 // fresh region, _finger points to start().
4269 MemRegion mr = MemRegion(_finger, _region_limit);
4271 if (_cm->verbose_low()) {
4272 gclog_or_tty->print_cr("[%u] we're scanning part "
4273 "["PTR_FORMAT", "PTR_FORMAT") "
4274 "of region "HR_FORMAT,
4275 _worker_id, p2i(_finger), p2i(_region_limit),
4276 HR_FORMAT_PARAMS(_curr_region));
4277 }
4279 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4280 "humongous regions should go around loop once only");
4282 // Some special cases:
4283 // If the memory region is empty, we can just give up the region.
4284 // If the current region is humongous then we only need to check
4285 // the bitmap for the bit associated with the start of the object,
4286 // scan the object if it's live, and give up the region.
4287 // Otherwise, let's iterate over the bitmap of the part of the region
4288 // that is left.
4289 // If the iteration is successful, give up the region.
4290 if (mr.is_empty()) {
4291 giveup_current_region();
4292 regular_clock_call();
4293 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4294 if (_nextMarkBitMap->isMarked(mr.start())) {
4295 // The object is marked - apply the closure
4296 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4297 bitmap_closure.do_bit(offset);
4298 }
4299 // Even if this task aborted while scanning the humongous object
4300 // we can (and should) give up the current region.
4301 giveup_current_region();
4302 regular_clock_call();
4303 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4304 giveup_current_region();
4305 regular_clock_call();
4306 } else {
4307 assert(has_aborted(), "currently the only way to do so");
4308 // The only way to abort the bitmap iteration is to return
4309 // false from the do_bit() method. However, inside the
4310 // do_bit() method we move the _finger to point to the
4311 // object currently being looked at. So, if we bail out, we
4312 // have definitely set _finger to something non-null.
4313 assert(_finger != NULL, "invariant");
4315 // Region iteration was actually aborted. So now _finger
4316 // points to the address of the object we last scanned. If we
4317 // leave it there, when we restart this task, we will rescan
4318 // the object. It is easy to avoid this. We move the finger by
4319 // enough to point to the next possible object header (the
4320 // bitmap knows by how much we need to move it as it knows its
4321 // granularity).
4322 assert(_finger < _region_limit, "invariant");
4323 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4324 // Check if bitmap iteration was aborted while scanning the last object
4325 if (new_finger >= _region_limit) {
4326 giveup_current_region();
4327 } else {
4328 move_finger_to(new_finger);
4329 }
4330 }
4331 }
4332 // At this point we have either completed iterating over the
4333 // region we were holding on to, or we have aborted.
4335 // We then partially drain the local queue and the global stack.
4336 // (Do we really need this?)
4337 drain_local_queue(true);
4338 drain_global_stack(true);
4340 // Read the note on the claim_region() method on why it might
4341 // return NULL with potentially more regions available for
4342 // claiming and why we have to check out_of_regions() to determine
4343 // whether we're done or not.
4344 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4345 // We are going to try to claim a new region. We should have
4346 // given up on the previous one.
4347 // Separated the asserts so that we know which one fires.
4348 assert(_curr_region == NULL, "invariant");
4349 assert(_finger == NULL, "invariant");
4350 assert(_region_limit == NULL, "invariant");
4351 if (_cm->verbose_low()) {
4352 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4353 }
4354 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4355 if (claimed_region != NULL) {
4356 // Yes, we managed to claim one
4357 statsOnly( ++_regions_claimed );
4359 if (_cm->verbose_low()) {
4360 gclog_or_tty->print_cr("[%u] we successfully claimed "
4361 "region "PTR_FORMAT,
4362 _worker_id, p2i(claimed_region));
4363 }
4365 setup_for_region(claimed_region);
4366 assert(_curr_region == claimed_region, "invariant");
4367 }
4368 // It is important to call the regular clock here. It might take
4369 // a while to claim a region if, for example, we hit a large
4370 // block of empty regions. So we need to call the regular clock
4371 // method once round the loop to make sure it's called
4372 // frequently enough.
4373 regular_clock_call();
4374 }
4376 if (!has_aborted() && _curr_region == NULL) {
4377 assert(_cm->out_of_regions(),
4378 "at this point we should be out of regions");
4379 }
4380 } while ( _curr_region != NULL && !has_aborted());
4382 if (!has_aborted()) {
4383 // We cannot check whether the global stack is empty, since other
4384 // tasks might be pushing objects to it concurrently.
4385 assert(_cm->out_of_regions(),
4386 "at this point we should be out of regions");
4388 if (_cm->verbose_low()) {
4389 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4390 }
4392 // Try to reduce the number of available SATB buffers so that
4393 // remark has less work to do.
4394 drain_satb_buffers();
4395 }
4397 // Since we've done everything else, we can now totally drain the
4398 // local queue and global stack.
4399 drain_local_queue(false);
4400 drain_global_stack(false);
4402 // Attempt at work stealing from other task's queues.
4403 if (do_stealing && !has_aborted()) {
4404 // We have not aborted. This means that we have finished all that
4405 // we could. Let's try to do some stealing...
4407 // We cannot check whether the global stack is empty, since other
4408 // tasks might be pushing objects to it concurrently.
4409 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4410 "only way to reach here");
4412 if (_cm->verbose_low()) {
4413 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4414 }
4416 while (!has_aborted()) {
4417 oop obj;
4418 statsOnly( ++_steal_attempts );
4420 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4421 if (_cm->verbose_medium()) {
4422 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4423 _worker_id, p2i((void*) obj));
4424 }
4426 statsOnly( ++_steals );
4428 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4429 "any stolen object should be marked");
4430 scan_object(obj);
4432 // And since we're towards the end, let's totally drain the
4433 // local queue and global stack.
4434 drain_local_queue(false);
4435 drain_global_stack(false);
4436 } else {
4437 break;
4438 }
4439 }
4440 }
4442 // If we are about to wrap up and go into termination, check if we
4443 // should raise the overflow flag.
4444 if (do_termination && !has_aborted()) {
4445 if (_cm->force_overflow()->should_force()) {
4446 _cm->set_has_overflown();
4447 regular_clock_call();
4448 }
4449 }
4451 // We still haven't aborted. Now, let's try to get into the
4452 // termination protocol.
4453 if (do_termination && !has_aborted()) {
4454 // We cannot check whether the global stack is empty, since other
4455 // tasks might be concurrently pushing objects on it.
4456 // Separated the asserts so that we know which one fires.
4457 assert(_cm->out_of_regions(), "only way to reach here");
4458 assert(_task_queue->size() == 0, "only way to reach here");
4460 if (_cm->verbose_low()) {
4461 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4462 }
4464 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4466 // The CMTask class also extends the TerminatorTerminator class,
4467 // hence its should_exit_termination() method will also decide
4468 // whether to exit the termination protocol or not.
4469 bool finished = (is_serial ||
4470 _cm->terminator()->offer_termination(this));
4471 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4472 _termination_time_ms +=
4473 termination_end_time_ms - _termination_start_time_ms;
4475 if (finished) {
4476 // We're all done.
4478 if (_worker_id == 0) {
4479 // let's allow task 0 to do this
4480 if (concurrent()) {
4481 assert(_cm->concurrent_marking_in_progress(), "invariant");
4482 // we need to set this to false before the next
4483 // safepoint. This way we ensure that the marking phase
4484 // doesn't observe any more heap expansions.
4485 _cm->clear_concurrent_marking_in_progress();
4486 }
4487 }
4489 // We can now guarantee that the global stack is empty, since
4490 // all other tasks have finished. We separated the guarantees so
4491 // that, if a condition is false, we can immediately find out
4492 // which one.
4493 guarantee(_cm->out_of_regions(), "only way to reach here");
4494 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4495 guarantee(_task_queue->size() == 0, "only way to reach here");
4496 guarantee(!_cm->has_overflown(), "only way to reach here");
4497 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4499 if (_cm->verbose_low()) {
4500 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4501 }
4502 } else {
4503 // Apparently there's more work to do. Let's abort this task. It
4504 // will restart it and we can hopefully find more things to do.
4506 if (_cm->verbose_low()) {
4507 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4508 _worker_id);
4509 }
4511 set_has_aborted();
4512 statsOnly( ++_aborted_termination );
4513 }
4514 }
4516 // Mainly for debugging purposes to make sure that a pointer to the
4517 // closure which was statically allocated in this frame doesn't
4518 // escape it by accident.
4519 set_cm_oop_closure(NULL);
4520 double end_time_ms = os::elapsedVTime() * 1000.0;
4521 double elapsed_time_ms = end_time_ms - _start_time_ms;
4522 // Update the step history.
4523 _step_times_ms.add(elapsed_time_ms);
4525 if (has_aborted()) {
4526 // The task was aborted for some reason.
4528 statsOnly( ++_aborted );
4530 if (_has_timed_out) {
4531 double diff_ms = elapsed_time_ms - _time_target_ms;
4532 // Keep statistics of how well we did with respect to hitting
4533 // our target only if we actually timed out (if we aborted for
4534 // other reasons, then the results might get skewed).
4535 _marking_step_diffs_ms.add(diff_ms);
4536 }
4538 if (_cm->has_overflown()) {
4539 // This is the interesting one. We aborted because a global
4540 // overflow was raised. This means we have to restart the
4541 // marking phase and start iterating over regions. However, in
4542 // order to do this we have to make sure that all tasks stop
4543 // what they are doing and re-initialise in a safe manner. We
4544 // will achieve this with the use of two barrier sync points.
4546 if (_cm->verbose_low()) {
4547 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4548 }
4550 if (!is_serial) {
4551 // We only need to enter the sync barrier if being called
4552 // from a parallel context
4553 _cm->enter_first_sync_barrier(_worker_id);
4555 // When we exit this sync barrier we know that all tasks have
4556 // stopped doing marking work. So, it's now safe to
4557 // re-initialise our data structures. At the end of this method,
4558 // task 0 will clear the global data structures.
4559 }
4561 statsOnly( ++_aborted_overflow );
4563 // We clear the local state of this task...
4564 clear_region_fields();
4566 if (!is_serial) {
4567 // ...and enter the second barrier.
4568 _cm->enter_second_sync_barrier(_worker_id);
4569 }
4570 // At this point, if we're during the concurrent phase of
4571 // marking, everything has been re-initialized and we're
4572 // ready to restart.
4573 }
4575 if (_cm->verbose_low()) {
4576 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4577 "elapsed = %1.2lfms <<<<<<<<<<",
4578 _worker_id, _time_target_ms, elapsed_time_ms);
4579 if (_cm->has_aborted()) {
4580 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4581 _worker_id);
4582 }
4583 }
4584 } else {
4585 if (_cm->verbose_low()) {
4586 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4587 "elapsed = %1.2lfms <<<<<<<<<<",
4588 _worker_id, _time_target_ms, elapsed_time_ms);
4589 }
4590 }
4592 _claimed = false;
4593 }
4595 CMTask::CMTask(uint worker_id,
4596 ConcurrentMark* cm,
4597 size_t* marked_bytes,
4598 BitMap* card_bm,
4599 CMTaskQueue* task_queue,
4600 CMTaskQueueSet* task_queues)
4601 : _g1h(G1CollectedHeap::heap()),
4602 _worker_id(worker_id), _cm(cm),
4603 _claimed(false),
4604 _nextMarkBitMap(NULL), _hash_seed(17),
4605 _task_queue(task_queue),
4606 _task_queues(task_queues),
4607 _cm_oop_closure(NULL),
4608 _marked_bytes_array(marked_bytes),
4609 _card_bm(card_bm) {
4610 guarantee(task_queue != NULL, "invariant");
4611 guarantee(task_queues != NULL, "invariant");
4613 statsOnly( _clock_due_to_scanning = 0;
4614 _clock_due_to_marking = 0 );
4616 _marking_step_diffs_ms.add(0.5);
4617 }
4619 // These are formatting macros that are used below to ensure
4620 // consistent formatting. The *_H_* versions are used to format the
4621 // header for a particular value and they should be kept consistent
4622 // with the corresponding macro. Also note that most of the macros add
4623 // the necessary white space (as a prefix) which makes them a bit
4624 // easier to compose.
4626 // All the output lines are prefixed with this string to be able to
4627 // identify them easily in a large log file.
4628 #define G1PPRL_LINE_PREFIX "###"
4630 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4631 #ifdef _LP64
4632 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4633 #else // _LP64
4634 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4635 #endif // _LP64
4637 // For per-region info
4638 #define G1PPRL_TYPE_FORMAT " %-4s"
4639 #define G1PPRL_TYPE_H_FORMAT " %4s"
4640 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4641 #define G1PPRL_BYTE_H_FORMAT " %9s"
4642 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4643 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4645 // For summary info
4646 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4647 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4648 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4649 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4651 G1PrintRegionLivenessInfoClosure::
4652 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4653 : _out(out),
4654 _total_used_bytes(0), _total_capacity_bytes(0),
4655 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4656 _hum_used_bytes(0), _hum_capacity_bytes(0),
4657 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4658 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4659 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4660 MemRegion g1_reserved = g1h->g1_reserved();
4661 double now = os::elapsedTime();
4663 // Print the header of the output.
4664 _out->cr();
4665 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4666 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4667 G1PPRL_SUM_ADDR_FORMAT("reserved")
4668 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4669 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4670 HeapRegion::GrainBytes);
4671 _out->print_cr(G1PPRL_LINE_PREFIX);
4672 _out->print_cr(G1PPRL_LINE_PREFIX
4673 G1PPRL_TYPE_H_FORMAT
4674 G1PPRL_ADDR_BASE_H_FORMAT
4675 G1PPRL_BYTE_H_FORMAT
4676 G1PPRL_BYTE_H_FORMAT
4677 G1PPRL_BYTE_H_FORMAT
4678 G1PPRL_DOUBLE_H_FORMAT
4679 G1PPRL_BYTE_H_FORMAT
4680 G1PPRL_BYTE_H_FORMAT,
4681 "type", "address-range",
4682 "used", "prev-live", "next-live", "gc-eff",
4683 "remset", "code-roots");
4684 _out->print_cr(G1PPRL_LINE_PREFIX
4685 G1PPRL_TYPE_H_FORMAT
4686 G1PPRL_ADDR_BASE_H_FORMAT
4687 G1PPRL_BYTE_H_FORMAT
4688 G1PPRL_BYTE_H_FORMAT
4689 G1PPRL_BYTE_H_FORMAT
4690 G1PPRL_DOUBLE_H_FORMAT
4691 G1PPRL_BYTE_H_FORMAT
4692 G1PPRL_BYTE_H_FORMAT,
4693 "", "",
4694 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4695 "(bytes)", "(bytes)");
4696 }
4698 // It takes as a parameter a reference to one of the _hum_* fields, it
4699 // deduces the corresponding value for a region in a humongous region
4700 // series (either the region size, or what's left if the _hum_* field
4701 // is < the region size), and updates the _hum_* field accordingly.
4702 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4703 size_t bytes = 0;
4704 // The > 0 check is to deal with the prev and next live bytes which
4705 // could be 0.
4706 if (*hum_bytes > 0) {
4707 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4708 *hum_bytes -= bytes;
4709 }
4710 return bytes;
4711 }
4713 // It deduces the values for a region in a humongous region series
4714 // from the _hum_* fields and updates those accordingly. It assumes
4715 // that that _hum_* fields have already been set up from the "starts
4716 // humongous" region and we visit the regions in address order.
4717 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4718 size_t* capacity_bytes,
4719 size_t* prev_live_bytes,
4720 size_t* next_live_bytes) {
4721 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4722 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4723 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4724 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4725 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4726 }
4728 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4729 const char* type = "";
4730 HeapWord* bottom = r->bottom();
4731 HeapWord* end = r->end();
4732 size_t capacity_bytes = r->capacity();
4733 size_t used_bytes = r->used();
4734 size_t prev_live_bytes = r->live_bytes();
4735 size_t next_live_bytes = r->next_live_bytes();
4736 double gc_eff = r->gc_efficiency();
4737 size_t remset_bytes = r->rem_set()->mem_size();
4738 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4740 if (r->used() == 0) {
4741 type = "FREE";
4742 } else if (r->is_survivor()) {
4743 type = "SURV";
4744 } else if (r->is_young()) {
4745 type = "EDEN";
4746 } else if (r->startsHumongous()) {
4747 type = "HUMS";
4749 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4750 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4751 "they should have been zeroed after the last time we used them");
4752 // Set up the _hum_* fields.
4753 _hum_capacity_bytes = capacity_bytes;
4754 _hum_used_bytes = used_bytes;
4755 _hum_prev_live_bytes = prev_live_bytes;
4756 _hum_next_live_bytes = next_live_bytes;
4757 get_hum_bytes(&used_bytes, &capacity_bytes,
4758 &prev_live_bytes, &next_live_bytes);
4759 end = bottom + HeapRegion::GrainWords;
4760 } else if (r->continuesHumongous()) {
4761 type = "HUMC";
4762 get_hum_bytes(&used_bytes, &capacity_bytes,
4763 &prev_live_bytes, &next_live_bytes);
4764 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4765 } else {
4766 type = "OLD";
4767 }
4769 _total_used_bytes += used_bytes;
4770 _total_capacity_bytes += capacity_bytes;
4771 _total_prev_live_bytes += prev_live_bytes;
4772 _total_next_live_bytes += next_live_bytes;
4773 _total_remset_bytes += remset_bytes;
4774 _total_strong_code_roots_bytes += strong_code_roots_bytes;
4776 // Print a line for this particular region.
4777 _out->print_cr(G1PPRL_LINE_PREFIX
4778 G1PPRL_TYPE_FORMAT
4779 G1PPRL_ADDR_BASE_FORMAT
4780 G1PPRL_BYTE_FORMAT
4781 G1PPRL_BYTE_FORMAT
4782 G1PPRL_BYTE_FORMAT
4783 G1PPRL_DOUBLE_FORMAT
4784 G1PPRL_BYTE_FORMAT
4785 G1PPRL_BYTE_FORMAT,
4786 type, p2i(bottom), p2i(end),
4787 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4788 remset_bytes, strong_code_roots_bytes);
4790 return false;
4791 }
4793 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4794 // add static memory usages to remembered set sizes
4795 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4796 // Print the footer of the output.
4797 _out->print_cr(G1PPRL_LINE_PREFIX);
4798 _out->print_cr(G1PPRL_LINE_PREFIX
4799 " SUMMARY"
4800 G1PPRL_SUM_MB_FORMAT("capacity")
4801 G1PPRL_SUM_MB_PERC_FORMAT("used")
4802 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4803 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4804 G1PPRL_SUM_MB_FORMAT("remset")
4805 G1PPRL_SUM_MB_FORMAT("code-roots"),
4806 bytes_to_mb(_total_capacity_bytes),
4807 bytes_to_mb(_total_used_bytes),
4808 perc(_total_used_bytes, _total_capacity_bytes),
4809 bytes_to_mb(_total_prev_live_bytes),
4810 perc(_total_prev_live_bytes, _total_capacity_bytes),
4811 bytes_to_mb(_total_next_live_bytes),
4812 perc(_total_next_live_bytes, _total_capacity_bytes),
4813 bytes_to_mb(_total_remset_bytes),
4814 bytes_to_mb(_total_strong_code_roots_bytes));
4815 _out->cr();
4816 }