Thu, 26 Sep 2013 10:25:02 -0400
7195622: CheckUnhandledOops has limited usefulness now
Summary: Enable CHECK_UNHANDLED_OOPS in fastdebug builds across all supported platforms.
Reviewed-by: coleenp, hseigel, dholmes, stefank, twisti, ihse, rdurbin
Contributed-by: lois.foltan@oracle.com
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1Log.hpp"
33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 #include "gc_implementation/g1/g1RemSet.hpp"
35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 #include "gc_implementation/shared/gcTimer.hpp"
40 #include "gc_implementation/shared/gcTrace.hpp"
41 #include "gc_implementation/shared/gcTraceTime.hpp"
42 #include "memory/genOopClosures.inline.hpp"
43 #include "memory/referencePolicy.hpp"
44 #include "memory/resourceArea.hpp"
45 #include "oops/oop.inline.hpp"
46 #include "runtime/handles.inline.hpp"
47 #include "runtime/java.hpp"
48 #include "services/memTracker.hpp"
50 // Concurrent marking bit map wrapper
52 CMBitMapRO::CMBitMapRO(int shifter) :
53 _bm(),
54 _shifter(shifter) {
55 _bmStartWord = 0;
56 _bmWordSize = 0;
57 }
59 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
60 HeapWord* limit) const {
61 // First we must round addr *up* to a possible object boundary.
62 addr = (HeapWord*)align_size_up((intptr_t)addr,
63 HeapWordSize << _shifter);
64 size_t addrOffset = heapWordToOffset(addr);
65 if (limit == NULL) {
66 limit = _bmStartWord + _bmWordSize;
67 }
68 size_t limitOffset = heapWordToOffset(limit);
69 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
70 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
71 assert(nextAddr >= addr, "get_next_one postcondition");
72 assert(nextAddr == limit || isMarked(nextAddr),
73 "get_next_one postcondition");
74 return nextAddr;
75 }
77 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
78 HeapWord* limit) const {
79 size_t addrOffset = heapWordToOffset(addr);
80 if (limit == NULL) {
81 limit = _bmStartWord + _bmWordSize;
82 }
83 size_t limitOffset = heapWordToOffset(limit);
84 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
85 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
86 assert(nextAddr >= addr, "get_next_one postcondition");
87 assert(nextAddr == limit || !isMarked(nextAddr),
88 "get_next_one postcondition");
89 return nextAddr;
90 }
92 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
93 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
94 return (int) (diff >> _shifter);
95 }
97 #ifndef PRODUCT
98 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
99 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
100 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
101 "size inconsistency");
102 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
103 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
104 }
105 #endif
107 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
108 _bm.print_on_error(st, prefix);
109 }
111 bool CMBitMap::allocate(ReservedSpace heap_rs) {
112 _bmStartWord = (HeapWord*)(heap_rs.base());
113 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
114 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
115 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
116 if (!brs.is_reserved()) {
117 warning("ConcurrentMark marking bit map allocation failure");
118 return false;
119 }
120 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
121 // For now we'll just commit all of the bit map up front.
122 // Later on we'll try to be more parsimonious with swap.
123 if (!_virtual_space.initialize(brs, brs.size())) {
124 warning("ConcurrentMark marking bit map backing store failure");
125 return false;
126 }
127 assert(_virtual_space.committed_size() == brs.size(),
128 "didn't reserve backing store for all of concurrent marking bit map?");
129 _bm.set_map((uintptr_t*)_virtual_space.low());
130 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
131 _bmWordSize, "inconsistency in bit map sizing");
132 _bm.set_size(_bmWordSize >> _shifter);
133 return true;
134 }
136 void CMBitMap::clearAll() {
137 _bm.clear();
138 return;
139 }
141 void CMBitMap::markRange(MemRegion mr) {
142 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
143 assert(!mr.is_empty(), "unexpected empty region");
144 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
145 ((HeapWord *) mr.end())),
146 "markRange memory region end is not card aligned");
147 // convert address range into offset range
148 _bm.at_put_range(heapWordToOffset(mr.start()),
149 heapWordToOffset(mr.end()), true);
150 }
152 void CMBitMap::clearRange(MemRegion mr) {
153 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
154 assert(!mr.is_empty(), "unexpected empty region");
155 // convert address range into offset range
156 _bm.at_put_range(heapWordToOffset(mr.start()),
157 heapWordToOffset(mr.end()), false);
158 }
160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
161 HeapWord* end_addr) {
162 HeapWord* start = getNextMarkedWordAddress(addr);
163 start = MIN2(start, end_addr);
164 HeapWord* end = getNextUnmarkedWordAddress(start);
165 end = MIN2(end, end_addr);
166 assert(start <= end, "Consistency check");
167 MemRegion mr(start, end);
168 if (!mr.is_empty()) {
169 clearRange(mr);
170 }
171 return mr;
172 }
174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
175 _base(NULL), _cm(cm)
176 #ifdef ASSERT
177 , _drain_in_progress(false)
178 , _drain_in_progress_yields(false)
179 #endif
180 {}
182 bool CMMarkStack::allocate(size_t capacity) {
183 // allocate a stack of the requisite depth
184 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
185 if (!rs.is_reserved()) {
186 warning("ConcurrentMark MarkStack allocation failure");
187 return false;
188 }
189 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
190 if (!_virtual_space.initialize(rs, rs.size())) {
191 warning("ConcurrentMark MarkStack backing store failure");
192 // Release the virtual memory reserved for the marking stack
193 rs.release();
194 return false;
195 }
196 assert(_virtual_space.committed_size() == rs.size(),
197 "Didn't reserve backing store for all of ConcurrentMark stack?");
198 _base = (oop*) _virtual_space.low();
199 setEmpty();
200 _capacity = (jint) capacity;
201 _saved_index = -1;
202 _should_expand = false;
203 NOT_PRODUCT(_max_depth = 0);
204 return true;
205 }
207 void CMMarkStack::expand() {
208 // Called, during remark, if we've overflown the marking stack during marking.
209 assert(isEmpty(), "stack should been emptied while handling overflow");
210 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
211 // Clear expansion flag
212 _should_expand = false;
213 if (_capacity == (jint) MarkStackSizeMax) {
214 if (PrintGCDetails && Verbose) {
215 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
216 }
217 return;
218 }
219 // Double capacity if possible
220 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
221 // Do not give up existing stack until we have managed to
222 // get the double capacity that we desired.
223 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
224 sizeof(oop)));
225 if (rs.is_reserved()) {
226 // Release the backing store associated with old stack
227 _virtual_space.release();
228 // Reinitialize virtual space for new stack
229 if (!_virtual_space.initialize(rs, rs.size())) {
230 fatal("Not enough swap for expanded marking stack capacity");
231 }
232 _base = (oop*)(_virtual_space.low());
233 _index = 0;
234 _capacity = new_capacity;
235 } else {
236 if (PrintGCDetails && Verbose) {
237 // Failed to double capacity, continue;
238 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
239 SIZE_FORMAT"K to " SIZE_FORMAT"K",
240 _capacity / K, new_capacity / K);
241 }
242 }
243 }
245 void CMMarkStack::set_should_expand() {
246 // If we're resetting the marking state because of an
247 // marking stack overflow, record that we should, if
248 // possible, expand the stack.
249 _should_expand = _cm->has_overflown();
250 }
252 CMMarkStack::~CMMarkStack() {
253 if (_base != NULL) {
254 _base = NULL;
255 _virtual_space.release();
256 }
257 }
259 void CMMarkStack::par_push(oop ptr) {
260 while (true) {
261 if (isFull()) {
262 _overflow = true;
263 return;
264 }
265 // Otherwise...
266 jint index = _index;
267 jint next_index = index+1;
268 jint res = Atomic::cmpxchg(next_index, &_index, index);
269 if (res == index) {
270 _base[index] = ptr;
271 // Note that we don't maintain this atomically. We could, but it
272 // doesn't seem necessary.
273 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
274 return;
275 }
276 // Otherwise, we need to try again.
277 }
278 }
280 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
281 while (true) {
282 if (isFull()) {
283 _overflow = true;
284 return;
285 }
286 // Otherwise...
287 jint index = _index;
288 jint next_index = index + n;
289 if (next_index > _capacity) {
290 _overflow = true;
291 return;
292 }
293 jint res = Atomic::cmpxchg(next_index, &_index, index);
294 if (res == index) {
295 for (int i = 0; i < n; i++) {
296 int ind = index + i;
297 assert(ind < _capacity, "By overflow test above.");
298 _base[ind] = ptr_arr[i];
299 }
300 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
301 return;
302 }
303 // Otherwise, we need to try again.
304 }
305 }
307 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
308 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
309 jint start = _index;
310 jint next_index = start + n;
311 if (next_index > _capacity) {
312 _overflow = true;
313 return;
314 }
315 // Otherwise.
316 _index = next_index;
317 for (int i = 0; i < n; i++) {
318 int ind = start + i;
319 assert(ind < _capacity, "By overflow test above.");
320 _base[ind] = ptr_arr[i];
321 }
322 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
323 }
325 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
326 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
327 jint index = _index;
328 if (index == 0) {
329 *n = 0;
330 return false;
331 } else {
332 int k = MIN2(max, index);
333 jint new_ind = index - k;
334 for (int j = 0; j < k; j++) {
335 ptr_arr[j] = _base[new_ind + j];
336 }
337 _index = new_ind;
338 *n = k;
339 return true;
340 }
341 }
343 template<class OopClosureClass>
344 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
345 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
346 || SafepointSynchronize::is_at_safepoint(),
347 "Drain recursion must be yield-safe.");
348 bool res = true;
349 debug_only(_drain_in_progress = true);
350 debug_only(_drain_in_progress_yields = yield_after);
351 while (!isEmpty()) {
352 oop newOop = pop();
353 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
354 assert(newOop->is_oop(), "Expected an oop");
355 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
356 "only grey objects on this stack");
357 newOop->oop_iterate(cl);
358 if (yield_after && _cm->do_yield_check()) {
359 res = false;
360 break;
361 }
362 }
363 debug_only(_drain_in_progress = false);
364 return res;
365 }
367 void CMMarkStack::note_start_of_gc() {
368 assert(_saved_index == -1,
369 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
370 _saved_index = _index;
371 }
373 void CMMarkStack::note_end_of_gc() {
374 // This is intentionally a guarantee, instead of an assert. If we
375 // accidentally add something to the mark stack during GC, it
376 // will be a correctness issue so it's better if we crash. we'll
377 // only check this once per GC anyway, so it won't be a performance
378 // issue in any way.
379 guarantee(_saved_index == _index,
380 err_msg("saved index: %d index: %d", _saved_index, _index));
381 _saved_index = -1;
382 }
384 void CMMarkStack::oops_do(OopClosure* f) {
385 assert(_saved_index == _index,
386 err_msg("saved index: %d index: %d", _saved_index, _index));
387 for (int i = 0; i < _index; i += 1) {
388 f->do_oop(&_base[i]);
389 }
390 }
392 bool ConcurrentMark::not_yet_marked(oop obj) const {
393 return _g1h->is_obj_ill(obj);
394 }
396 CMRootRegions::CMRootRegions() :
397 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
398 _should_abort(false), _next_survivor(NULL) { }
400 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
401 _young_list = g1h->young_list();
402 _cm = cm;
403 }
405 void CMRootRegions::prepare_for_scan() {
406 assert(!scan_in_progress(), "pre-condition");
408 // Currently, only survivors can be root regions.
409 assert(_next_survivor == NULL, "pre-condition");
410 _next_survivor = _young_list->first_survivor_region();
411 _scan_in_progress = (_next_survivor != NULL);
412 _should_abort = false;
413 }
415 HeapRegion* CMRootRegions::claim_next() {
416 if (_should_abort) {
417 // If someone has set the should_abort flag, we return NULL to
418 // force the caller to bail out of their loop.
419 return NULL;
420 }
422 // Currently, only survivors can be root regions.
423 HeapRegion* res = _next_survivor;
424 if (res != NULL) {
425 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
426 // Read it again in case it changed while we were waiting for the lock.
427 res = _next_survivor;
428 if (res != NULL) {
429 if (res == _young_list->last_survivor_region()) {
430 // We just claimed the last survivor so store NULL to indicate
431 // that we're done.
432 _next_survivor = NULL;
433 } else {
434 _next_survivor = res->get_next_young_region();
435 }
436 } else {
437 // Someone else claimed the last survivor while we were trying
438 // to take the lock so nothing else to do.
439 }
440 }
441 assert(res == NULL || res->is_survivor(), "post-condition");
443 return res;
444 }
446 void CMRootRegions::scan_finished() {
447 assert(scan_in_progress(), "pre-condition");
449 // Currently, only survivors can be root regions.
450 if (!_should_abort) {
451 assert(_next_survivor == NULL, "we should have claimed all survivors");
452 }
453 _next_survivor = NULL;
455 {
456 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
457 _scan_in_progress = false;
458 RootRegionScan_lock->notify_all();
459 }
460 }
462 bool CMRootRegions::wait_until_scan_finished() {
463 if (!scan_in_progress()) return false;
465 {
466 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
467 while (scan_in_progress()) {
468 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
469 }
470 }
471 return true;
472 }
474 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
475 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
476 #endif // _MSC_VER
478 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
479 return MAX2((n_par_threads + 2) / 4, 1U);
480 }
482 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
483 _g1h(g1h),
484 _markBitMap1(log2_intptr(MinObjAlignment)),
485 _markBitMap2(log2_intptr(MinObjAlignment)),
486 _parallel_marking_threads(0),
487 _max_parallel_marking_threads(0),
488 _sleep_factor(0.0),
489 _marking_task_overhead(1.0),
490 _cleanup_sleep_factor(0.0),
491 _cleanup_task_overhead(1.0),
492 _cleanup_list("Cleanup List"),
493 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
494 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
495 CardTableModRefBS::card_shift,
496 false /* in_resource_area*/),
498 _prevMarkBitMap(&_markBitMap1),
499 _nextMarkBitMap(&_markBitMap2),
501 _markStack(this),
502 // _finger set in set_non_marking_state
504 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
505 // _active_tasks set in set_non_marking_state
506 // _tasks set inside the constructor
507 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
508 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
510 _has_overflown(false),
511 _concurrent(false),
512 _has_aborted(false),
513 _restart_for_overflow(false),
514 _concurrent_marking_in_progress(false),
516 // _verbose_level set below
518 _init_times(),
519 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
520 _cleanup_times(),
521 _total_counting_time(0.0),
522 _total_rs_scrub_time(0.0),
524 _parallel_workers(NULL),
526 _count_card_bitmaps(NULL),
527 _count_marked_bytes(NULL),
528 _completed_initialization(false) {
529 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
530 if (verbose_level < no_verbose) {
531 verbose_level = no_verbose;
532 }
533 if (verbose_level > high_verbose) {
534 verbose_level = high_verbose;
535 }
536 _verbose_level = verbose_level;
538 if (verbose_low()) {
539 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
540 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
541 }
543 if (!_markBitMap1.allocate(heap_rs)) {
544 warning("Failed to allocate first CM bit map");
545 return;
546 }
547 if (!_markBitMap2.allocate(heap_rs)) {
548 warning("Failed to allocate second CM bit map");
549 return;
550 }
552 // Create & start a ConcurrentMark thread.
553 _cmThread = new ConcurrentMarkThread(this);
554 assert(cmThread() != NULL, "CM Thread should have been created");
555 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
557 assert(CGC_lock != NULL, "Where's the CGC_lock?");
558 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
559 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
561 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
562 satb_qs.set_buffer_size(G1SATBBufferSize);
564 _root_regions.init(_g1h, this);
566 if (ConcGCThreads > ParallelGCThreads) {
567 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
568 "than ParallelGCThreads (" UINT32_FORMAT ").",
569 ConcGCThreads, ParallelGCThreads);
570 return;
571 }
572 if (ParallelGCThreads == 0) {
573 // if we are not running with any parallel GC threads we will not
574 // spawn any marking threads either
575 _parallel_marking_threads = 0;
576 _max_parallel_marking_threads = 0;
577 _sleep_factor = 0.0;
578 _marking_task_overhead = 1.0;
579 } else {
580 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
581 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
582 // if both are set
583 _sleep_factor = 0.0;
584 _marking_task_overhead = 1.0;
585 } else if (G1MarkingOverheadPercent > 0) {
586 // We will calculate the number of parallel marking threads based
587 // on a target overhead with respect to the soft real-time goal
588 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
589 double overall_cm_overhead =
590 (double) MaxGCPauseMillis * marking_overhead /
591 (double) GCPauseIntervalMillis;
592 double cpu_ratio = 1.0 / (double) os::processor_count();
593 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
594 double marking_task_overhead =
595 overall_cm_overhead / marking_thread_num *
596 (double) os::processor_count();
597 double sleep_factor =
598 (1.0 - marking_task_overhead) / marking_task_overhead;
600 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
601 _sleep_factor = sleep_factor;
602 _marking_task_overhead = marking_task_overhead;
603 } else {
604 // Calculate the number of parallel marking threads by scaling
605 // the number of parallel GC threads.
606 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
607 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
608 _sleep_factor = 0.0;
609 _marking_task_overhead = 1.0;
610 }
612 assert(ConcGCThreads > 0, "Should have been set");
613 _parallel_marking_threads = (uint) ConcGCThreads;
614 _max_parallel_marking_threads = _parallel_marking_threads;
616 if (parallel_marking_threads() > 1) {
617 _cleanup_task_overhead = 1.0;
618 } else {
619 _cleanup_task_overhead = marking_task_overhead();
620 }
621 _cleanup_sleep_factor =
622 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
624 #if 0
625 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
626 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
627 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
628 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
629 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
630 #endif
632 guarantee(parallel_marking_threads() > 0, "peace of mind");
633 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
634 _max_parallel_marking_threads, false, true);
635 if (_parallel_workers == NULL) {
636 vm_exit_during_initialization("Failed necessary allocation.");
637 } else {
638 _parallel_workers->initialize_workers();
639 }
640 }
642 if (FLAG_IS_DEFAULT(MarkStackSize)) {
643 uintx mark_stack_size =
644 MIN2(MarkStackSizeMax,
645 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
646 // Verify that the calculated value for MarkStackSize is in range.
647 // It would be nice to use the private utility routine from Arguments.
648 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
649 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
650 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
651 mark_stack_size, 1, MarkStackSizeMax);
652 return;
653 }
654 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
655 } else {
656 // Verify MarkStackSize is in range.
657 if (FLAG_IS_CMDLINE(MarkStackSize)) {
658 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
659 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
660 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
661 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
662 MarkStackSize, 1, MarkStackSizeMax);
663 return;
664 }
665 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
666 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
667 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
668 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
669 MarkStackSize, MarkStackSizeMax);
670 return;
671 }
672 }
673 }
674 }
676 if (!_markStack.allocate(MarkStackSize)) {
677 warning("Failed to allocate CM marking stack");
678 return;
679 }
681 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
682 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
684 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
685 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
687 BitMap::idx_t card_bm_size = _card_bm.size();
689 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
690 _active_tasks = _max_worker_id;
692 size_t max_regions = (size_t) _g1h->max_regions();
693 for (uint i = 0; i < _max_worker_id; ++i) {
694 CMTaskQueue* task_queue = new CMTaskQueue();
695 task_queue->initialize();
696 _task_queues->register_queue(i, task_queue);
698 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
699 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
701 _tasks[i] = new CMTask(i, this,
702 _count_marked_bytes[i],
703 &_count_card_bitmaps[i],
704 task_queue, _task_queues);
706 _accum_task_vtime[i] = 0.0;
707 }
709 // Calculate the card number for the bottom of the heap. Used
710 // in biasing indexes into the accounting card bitmaps.
711 _heap_bottom_card_num =
712 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
713 CardTableModRefBS::card_shift);
715 // Clear all the liveness counting data
716 clear_all_count_data();
718 // so that the call below can read a sensible value
719 _heap_start = (HeapWord*) heap_rs.base();
720 set_non_marking_state();
721 _completed_initialization = true;
722 }
724 void ConcurrentMark::update_g1_committed(bool force) {
725 // If concurrent marking is not in progress, then we do not need to
726 // update _heap_end.
727 if (!concurrent_marking_in_progress() && !force) return;
729 MemRegion committed = _g1h->g1_committed();
730 assert(committed.start() == _heap_start, "start shouldn't change");
731 HeapWord* new_end = committed.end();
732 if (new_end > _heap_end) {
733 // The heap has been expanded.
735 _heap_end = new_end;
736 }
737 // Notice that the heap can also shrink. However, this only happens
738 // during a Full GC (at least currently) and the entire marking
739 // phase will bail out and the task will not be restarted. So, let's
740 // do nothing.
741 }
743 void ConcurrentMark::reset() {
744 // Starting values for these two. This should be called in a STW
745 // phase. CM will be notified of any future g1_committed expansions
746 // will be at the end of evacuation pauses, when tasks are
747 // inactive.
748 MemRegion committed = _g1h->g1_committed();
749 _heap_start = committed.start();
750 _heap_end = committed.end();
752 // Separated the asserts so that we know which one fires.
753 assert(_heap_start != NULL, "heap bounds should look ok");
754 assert(_heap_end != NULL, "heap bounds should look ok");
755 assert(_heap_start < _heap_end, "heap bounds should look ok");
757 // Reset all the marking data structures and any necessary flags
758 reset_marking_state();
760 if (verbose_low()) {
761 gclog_or_tty->print_cr("[global] resetting");
762 }
764 // We do reset all of them, since different phases will use
765 // different number of active threads. So, it's easiest to have all
766 // of them ready.
767 for (uint i = 0; i < _max_worker_id; ++i) {
768 _tasks[i]->reset(_nextMarkBitMap);
769 }
771 // we need this to make sure that the flag is on during the evac
772 // pause with initial mark piggy-backed
773 set_concurrent_marking_in_progress();
774 }
777 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
778 _markStack.set_should_expand();
779 _markStack.setEmpty(); // Also clears the _markStack overflow flag
780 if (clear_overflow) {
781 clear_has_overflown();
782 } else {
783 assert(has_overflown(), "pre-condition");
784 }
785 _finger = _heap_start;
787 for (uint i = 0; i < _max_worker_id; ++i) {
788 CMTaskQueue* queue = _task_queues->queue(i);
789 queue->set_empty();
790 }
791 }
793 void ConcurrentMark::set_concurrency(uint active_tasks) {
794 assert(active_tasks <= _max_worker_id, "we should not have more");
796 _active_tasks = active_tasks;
797 // Need to update the three data structures below according to the
798 // number of active threads for this phase.
799 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
800 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
801 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
802 }
804 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
805 set_concurrency(active_tasks);
807 _concurrent = concurrent;
808 // We propagate this to all tasks, not just the active ones.
809 for (uint i = 0; i < _max_worker_id; ++i)
810 _tasks[i]->set_concurrent(concurrent);
812 if (concurrent) {
813 set_concurrent_marking_in_progress();
814 } else {
815 // We currently assume that the concurrent flag has been set to
816 // false before we start remark. At this point we should also be
817 // in a STW phase.
818 assert(!concurrent_marking_in_progress(), "invariant");
819 assert(_finger == _heap_end,
820 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
821 _finger, _heap_end));
822 update_g1_committed(true);
823 }
824 }
826 void ConcurrentMark::set_non_marking_state() {
827 // We set the global marking state to some default values when we're
828 // not doing marking.
829 reset_marking_state();
830 _active_tasks = 0;
831 clear_concurrent_marking_in_progress();
832 }
834 ConcurrentMark::~ConcurrentMark() {
835 // The ConcurrentMark instance is never freed.
836 ShouldNotReachHere();
837 }
839 void ConcurrentMark::clearNextBitmap() {
840 G1CollectedHeap* g1h = G1CollectedHeap::heap();
841 G1CollectorPolicy* g1p = g1h->g1_policy();
843 // Make sure that the concurrent mark thread looks to still be in
844 // the current cycle.
845 guarantee(cmThread()->during_cycle(), "invariant");
847 // We are finishing up the current cycle by clearing the next
848 // marking bitmap and getting it ready for the next cycle. During
849 // this time no other cycle can start. So, let's make sure that this
850 // is the case.
851 guarantee(!g1h->mark_in_progress(), "invariant");
853 // clear the mark bitmap (no grey objects to start with).
854 // We need to do this in chunks and offer to yield in between
855 // each chunk.
856 HeapWord* start = _nextMarkBitMap->startWord();
857 HeapWord* end = _nextMarkBitMap->endWord();
858 HeapWord* cur = start;
859 size_t chunkSize = M;
860 while (cur < end) {
861 HeapWord* next = cur + chunkSize;
862 if (next > end) {
863 next = end;
864 }
865 MemRegion mr(cur,next);
866 _nextMarkBitMap->clearRange(mr);
867 cur = next;
868 do_yield_check();
870 // Repeat the asserts from above. We'll do them as asserts here to
871 // minimize their overhead on the product. However, we'll have
872 // them as guarantees at the beginning / end of the bitmap
873 // clearing to get some checking in the product.
874 assert(cmThread()->during_cycle(), "invariant");
875 assert(!g1h->mark_in_progress(), "invariant");
876 }
878 // Clear the liveness counting data
879 clear_all_count_data();
881 // Repeat the asserts from above.
882 guarantee(cmThread()->during_cycle(), "invariant");
883 guarantee(!g1h->mark_in_progress(), "invariant");
884 }
886 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
887 public:
888 bool doHeapRegion(HeapRegion* r) {
889 if (!r->continuesHumongous()) {
890 r->note_start_of_marking();
891 }
892 return false;
893 }
894 };
896 void ConcurrentMark::checkpointRootsInitialPre() {
897 G1CollectedHeap* g1h = G1CollectedHeap::heap();
898 G1CollectorPolicy* g1p = g1h->g1_policy();
900 _has_aborted = false;
902 #ifndef PRODUCT
903 if (G1PrintReachableAtInitialMark) {
904 print_reachable("at-cycle-start",
905 VerifyOption_G1UsePrevMarking, true /* all */);
906 }
907 #endif
909 // Initialise marking structures. This has to be done in a STW phase.
910 reset();
912 // For each region note start of marking.
913 NoteStartOfMarkHRClosure startcl;
914 g1h->heap_region_iterate(&startcl);
915 }
918 void ConcurrentMark::checkpointRootsInitialPost() {
919 G1CollectedHeap* g1h = G1CollectedHeap::heap();
921 // If we force an overflow during remark, the remark operation will
922 // actually abort and we'll restart concurrent marking. If we always
923 // force an oveflow during remark we'll never actually complete the
924 // marking phase. So, we initilize this here, at the start of the
925 // cycle, so that at the remaining overflow number will decrease at
926 // every remark and we'll eventually not need to cause one.
927 force_overflow_stw()->init();
929 // Start Concurrent Marking weak-reference discovery.
930 ReferenceProcessor* rp = g1h->ref_processor_cm();
931 // enable ("weak") refs discovery
932 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
933 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
935 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
936 // This is the start of the marking cycle, we're expected all
937 // threads to have SATB queues with active set to false.
938 satb_mq_set.set_active_all_threads(true, /* new active value */
939 false /* expected_active */);
941 _root_regions.prepare_for_scan();
943 // update_g1_committed() will be called at the end of an evac pause
944 // when marking is on. So, it's also called at the end of the
945 // initial-mark pause to update the heap end, if the heap expands
946 // during it. No need to call it here.
947 }
949 /*
950 * Notice that in the next two methods, we actually leave the STS
951 * during the barrier sync and join it immediately afterwards. If we
952 * do not do this, the following deadlock can occur: one thread could
953 * be in the barrier sync code, waiting for the other thread to also
954 * sync up, whereas another one could be trying to yield, while also
955 * waiting for the other threads to sync up too.
956 *
957 * Note, however, that this code is also used during remark and in
958 * this case we should not attempt to leave / enter the STS, otherwise
959 * we'll either hit an asseert (debug / fastdebug) or deadlock
960 * (product). So we should only leave / enter the STS if we are
961 * operating concurrently.
962 *
963 * Because the thread that does the sync barrier has left the STS, it
964 * is possible to be suspended for a Full GC or an evacuation pause
965 * could occur. This is actually safe, since the entering the sync
966 * barrier is one of the last things do_marking_step() does, and it
967 * doesn't manipulate any data structures afterwards.
968 */
970 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
971 if (verbose_low()) {
972 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
973 }
975 if (concurrent()) {
976 ConcurrentGCThread::stsLeave();
977 }
978 _first_overflow_barrier_sync.enter();
979 if (concurrent()) {
980 ConcurrentGCThread::stsJoin();
981 }
982 // at this point everyone should have synced up and not be doing any
983 // more work
985 if (verbose_low()) {
986 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
987 }
989 // If we're executing the concurrent phase of marking, reset the marking
990 // state; otherwise the marking state is reset after reference processing,
991 // during the remark pause.
992 // If we reset here as a result of an overflow during the remark we will
993 // see assertion failures from any subsequent set_concurrency_and_phase()
994 // calls.
995 if (concurrent()) {
996 // let the task associated with with worker 0 do this
997 if (worker_id == 0) {
998 // task 0 is responsible for clearing the global data structures
999 // We should be here because of an overflow. During STW we should
1000 // not clear the overflow flag since we rely on it being true when
1001 // we exit this method to abort the pause and restart concurent
1002 // marking.
1003 reset_marking_state(true /* clear_overflow */);
1004 force_overflow()->update();
1006 if (G1Log::fine()) {
1007 gclog_or_tty->date_stamp(PrintGCDateStamps);
1008 gclog_or_tty->stamp(PrintGCTimeStamps);
1009 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1010 }
1011 }
1012 }
1014 // after this, each task should reset its own data structures then
1015 // then go into the second barrier
1016 }
1018 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1019 if (verbose_low()) {
1020 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1021 }
1023 if (concurrent()) {
1024 ConcurrentGCThread::stsLeave();
1025 }
1026 _second_overflow_barrier_sync.enter();
1027 if (concurrent()) {
1028 ConcurrentGCThread::stsJoin();
1029 }
1030 // at this point everything should be re-initialized and ready to go
1032 if (verbose_low()) {
1033 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1034 }
1035 }
1037 #ifndef PRODUCT
1038 void ForceOverflowSettings::init() {
1039 _num_remaining = G1ConcMarkForceOverflow;
1040 _force = false;
1041 update();
1042 }
1044 void ForceOverflowSettings::update() {
1045 if (_num_remaining > 0) {
1046 _num_remaining -= 1;
1047 _force = true;
1048 } else {
1049 _force = false;
1050 }
1051 }
1053 bool ForceOverflowSettings::should_force() {
1054 if (_force) {
1055 _force = false;
1056 return true;
1057 } else {
1058 return false;
1059 }
1060 }
1061 #endif // !PRODUCT
1063 class CMConcurrentMarkingTask: public AbstractGangTask {
1064 private:
1065 ConcurrentMark* _cm;
1066 ConcurrentMarkThread* _cmt;
1068 public:
1069 void work(uint worker_id) {
1070 assert(Thread::current()->is_ConcurrentGC_thread(),
1071 "this should only be done by a conc GC thread");
1072 ResourceMark rm;
1074 double start_vtime = os::elapsedVTime();
1076 ConcurrentGCThread::stsJoin();
1078 assert(worker_id < _cm->active_tasks(), "invariant");
1079 CMTask* the_task = _cm->task(worker_id);
1080 the_task->record_start_time();
1081 if (!_cm->has_aborted()) {
1082 do {
1083 double start_vtime_sec = os::elapsedVTime();
1084 double start_time_sec = os::elapsedTime();
1085 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1087 the_task->do_marking_step(mark_step_duration_ms,
1088 true /* do_termination */,
1089 false /* is_serial*/);
1091 double end_time_sec = os::elapsedTime();
1092 double end_vtime_sec = os::elapsedVTime();
1093 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1094 double elapsed_time_sec = end_time_sec - start_time_sec;
1095 _cm->clear_has_overflown();
1097 bool ret = _cm->do_yield_check(worker_id);
1099 jlong sleep_time_ms;
1100 if (!_cm->has_aborted() && the_task->has_aborted()) {
1101 sleep_time_ms =
1102 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1103 ConcurrentGCThread::stsLeave();
1104 os::sleep(Thread::current(), sleep_time_ms, false);
1105 ConcurrentGCThread::stsJoin();
1106 }
1107 double end_time2_sec = os::elapsedTime();
1108 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1110 #if 0
1111 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1112 "overhead %1.4lf",
1113 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1114 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1115 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1116 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1117 #endif
1118 } while (!_cm->has_aborted() && the_task->has_aborted());
1119 }
1120 the_task->record_end_time();
1121 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1123 ConcurrentGCThread::stsLeave();
1125 double end_vtime = os::elapsedVTime();
1126 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1127 }
1129 CMConcurrentMarkingTask(ConcurrentMark* cm,
1130 ConcurrentMarkThread* cmt) :
1131 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1133 ~CMConcurrentMarkingTask() { }
1134 };
1136 // Calculates the number of active workers for a concurrent
1137 // phase.
1138 uint ConcurrentMark::calc_parallel_marking_threads() {
1139 if (G1CollectedHeap::use_parallel_gc_threads()) {
1140 uint n_conc_workers = 0;
1141 if (!UseDynamicNumberOfGCThreads ||
1142 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1143 !ForceDynamicNumberOfGCThreads)) {
1144 n_conc_workers = max_parallel_marking_threads();
1145 } else {
1146 n_conc_workers =
1147 AdaptiveSizePolicy::calc_default_active_workers(
1148 max_parallel_marking_threads(),
1149 1, /* Minimum workers */
1150 parallel_marking_threads(),
1151 Threads::number_of_non_daemon_threads());
1152 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1153 // that scaling has already gone into "_max_parallel_marking_threads".
1154 }
1155 assert(n_conc_workers > 0, "Always need at least 1");
1156 return n_conc_workers;
1157 }
1158 // If we are not running with any parallel GC threads we will not
1159 // have spawned any marking threads either. Hence the number of
1160 // concurrent workers should be 0.
1161 return 0;
1162 }
1164 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1165 // Currently, only survivors can be root regions.
1166 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1167 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1169 const uintx interval = PrefetchScanIntervalInBytes;
1170 HeapWord* curr = hr->bottom();
1171 const HeapWord* end = hr->top();
1172 while (curr < end) {
1173 Prefetch::read(curr, interval);
1174 oop obj = oop(curr);
1175 int size = obj->oop_iterate(&cl);
1176 assert(size == obj->size(), "sanity");
1177 curr += size;
1178 }
1179 }
1181 class CMRootRegionScanTask : public AbstractGangTask {
1182 private:
1183 ConcurrentMark* _cm;
1185 public:
1186 CMRootRegionScanTask(ConcurrentMark* cm) :
1187 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1189 void work(uint worker_id) {
1190 assert(Thread::current()->is_ConcurrentGC_thread(),
1191 "this should only be done by a conc GC thread");
1193 CMRootRegions* root_regions = _cm->root_regions();
1194 HeapRegion* hr = root_regions->claim_next();
1195 while (hr != NULL) {
1196 _cm->scanRootRegion(hr, worker_id);
1197 hr = root_regions->claim_next();
1198 }
1199 }
1200 };
1202 void ConcurrentMark::scanRootRegions() {
1203 // scan_in_progress() will have been set to true only if there was
1204 // at least one root region to scan. So, if it's false, we
1205 // should not attempt to do any further work.
1206 if (root_regions()->scan_in_progress()) {
1207 _parallel_marking_threads = calc_parallel_marking_threads();
1208 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1209 "Maximum number of marking threads exceeded");
1210 uint active_workers = MAX2(1U, parallel_marking_threads());
1212 CMRootRegionScanTask task(this);
1213 if (use_parallel_marking_threads()) {
1214 _parallel_workers->set_active_workers((int) active_workers);
1215 _parallel_workers->run_task(&task);
1216 } else {
1217 task.work(0);
1218 }
1220 // It's possible that has_aborted() is true here without actually
1221 // aborting the survivor scan earlier. This is OK as it's
1222 // mainly used for sanity checking.
1223 root_regions()->scan_finished();
1224 }
1225 }
1227 void ConcurrentMark::markFromRoots() {
1228 // we might be tempted to assert that:
1229 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1230 // "inconsistent argument?");
1231 // However that wouldn't be right, because it's possible that
1232 // a safepoint is indeed in progress as a younger generation
1233 // stop-the-world GC happens even as we mark in this generation.
1235 _restart_for_overflow = false;
1236 force_overflow_conc()->init();
1238 // _g1h has _n_par_threads
1239 _parallel_marking_threads = calc_parallel_marking_threads();
1240 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1241 "Maximum number of marking threads exceeded");
1243 uint active_workers = MAX2(1U, parallel_marking_threads());
1245 // Parallel task terminator is set in "set_concurrency_and_phase()"
1246 set_concurrency_and_phase(active_workers, true /* concurrent */);
1248 CMConcurrentMarkingTask markingTask(this, cmThread());
1249 if (use_parallel_marking_threads()) {
1250 _parallel_workers->set_active_workers((int)active_workers);
1251 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1252 // and the decisions on that MT processing is made elsewhere.
1253 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1254 _parallel_workers->run_task(&markingTask);
1255 } else {
1256 markingTask.work(0);
1257 }
1258 print_stats();
1259 }
1261 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1262 // world is stopped at this checkpoint
1263 assert(SafepointSynchronize::is_at_safepoint(),
1264 "world should be stopped");
1266 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1268 // If a full collection has happened, we shouldn't do this.
1269 if (has_aborted()) {
1270 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1271 return;
1272 }
1274 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1276 if (VerifyDuringGC) {
1277 HandleMark hm; // handle scope
1278 Universe::heap()->prepare_for_verify();
1279 Universe::verify(VerifyOption_G1UsePrevMarking,
1280 " VerifyDuringGC:(before)");
1281 }
1283 G1CollectorPolicy* g1p = g1h->g1_policy();
1284 g1p->record_concurrent_mark_remark_start();
1286 double start = os::elapsedTime();
1288 checkpointRootsFinalWork();
1290 double mark_work_end = os::elapsedTime();
1292 weakRefsWork(clear_all_soft_refs);
1294 if (has_overflown()) {
1295 // Oops. We overflowed. Restart concurrent marking.
1296 _restart_for_overflow = true;
1297 if (G1TraceMarkStackOverflow) {
1298 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1299 }
1301 // Verify the heap w.r.t. the previous marking bitmap.
1302 if (VerifyDuringGC) {
1303 HandleMark hm; // handle scope
1304 Universe::heap()->prepare_for_verify();
1305 Universe::verify(VerifyOption_G1UsePrevMarking,
1306 " VerifyDuringGC:(overflow)");
1307 }
1309 // Clear the marking state because we will be restarting
1310 // marking due to overflowing the global mark stack.
1311 reset_marking_state();
1312 } else {
1313 // Aggregate the per-task counting data that we have accumulated
1314 // while marking.
1315 aggregate_count_data();
1317 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1318 // We're done with marking.
1319 // This is the end of the marking cycle, we're expected all
1320 // threads to have SATB queues with active set to true.
1321 satb_mq_set.set_active_all_threads(false, /* new active value */
1322 true /* expected_active */);
1324 if (VerifyDuringGC) {
1325 HandleMark hm; // handle scope
1326 Universe::heap()->prepare_for_verify();
1327 Universe::verify(VerifyOption_G1UseNextMarking,
1328 " VerifyDuringGC:(after)");
1329 }
1330 assert(!restart_for_overflow(), "sanity");
1331 // Completely reset the marking state since marking completed
1332 set_non_marking_state();
1333 }
1335 // Expand the marking stack, if we have to and if we can.
1336 if (_markStack.should_expand()) {
1337 _markStack.expand();
1338 }
1340 // Statistics
1341 double now = os::elapsedTime();
1342 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1343 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1344 _remark_times.add((now - start) * 1000.0);
1346 g1p->record_concurrent_mark_remark_end();
1348 G1CMIsAliveClosure is_alive(g1h);
1349 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1350 }
1352 // Base class of the closures that finalize and verify the
1353 // liveness counting data.
1354 class CMCountDataClosureBase: public HeapRegionClosure {
1355 protected:
1356 G1CollectedHeap* _g1h;
1357 ConcurrentMark* _cm;
1358 CardTableModRefBS* _ct_bs;
1360 BitMap* _region_bm;
1361 BitMap* _card_bm;
1363 // Takes a region that's not empty (i.e., it has at least one
1364 // live object in it and sets its corresponding bit on the region
1365 // bitmap to 1. If the region is "starts humongous" it will also set
1366 // to 1 the bits on the region bitmap that correspond to its
1367 // associated "continues humongous" regions.
1368 void set_bit_for_region(HeapRegion* hr) {
1369 assert(!hr->continuesHumongous(), "should have filtered those out");
1371 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1372 if (!hr->startsHumongous()) {
1373 // Normal (non-humongous) case: just set the bit.
1374 _region_bm->par_at_put(index, true);
1375 } else {
1376 // Starts humongous case: calculate how many regions are part of
1377 // this humongous region and then set the bit range.
1378 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1379 _region_bm->par_at_put_range(index, end_index, true);
1380 }
1381 }
1383 public:
1384 CMCountDataClosureBase(G1CollectedHeap* g1h,
1385 BitMap* region_bm, BitMap* card_bm):
1386 _g1h(g1h), _cm(g1h->concurrent_mark()),
1387 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1388 _region_bm(region_bm), _card_bm(card_bm) { }
1389 };
1391 // Closure that calculates the # live objects per region. Used
1392 // for verification purposes during the cleanup pause.
1393 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1394 CMBitMapRO* _bm;
1395 size_t _region_marked_bytes;
1397 public:
1398 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1399 BitMap* region_bm, BitMap* card_bm) :
1400 CMCountDataClosureBase(g1h, region_bm, card_bm),
1401 _bm(bm), _region_marked_bytes(0) { }
1403 bool doHeapRegion(HeapRegion* hr) {
1405 if (hr->continuesHumongous()) {
1406 // We will ignore these here and process them when their
1407 // associated "starts humongous" region is processed (see
1408 // set_bit_for_heap_region()). Note that we cannot rely on their
1409 // associated "starts humongous" region to have their bit set to
1410 // 1 since, due to the region chunking in the parallel region
1411 // iteration, a "continues humongous" region might be visited
1412 // before its associated "starts humongous".
1413 return false;
1414 }
1416 HeapWord* ntams = hr->next_top_at_mark_start();
1417 HeapWord* start = hr->bottom();
1419 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1420 err_msg("Preconditions not met - "
1421 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1422 start, ntams, hr->end()));
1424 // Find the first marked object at or after "start".
1425 start = _bm->getNextMarkedWordAddress(start, ntams);
1427 size_t marked_bytes = 0;
1429 while (start < ntams) {
1430 oop obj = oop(start);
1431 int obj_sz = obj->size();
1432 HeapWord* obj_end = start + obj_sz;
1434 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1435 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1437 // Note: if we're looking at the last region in heap - obj_end
1438 // could be actually just beyond the end of the heap; end_idx
1439 // will then correspond to a (non-existent) card that is also
1440 // just beyond the heap.
1441 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1442 // end of object is not card aligned - increment to cover
1443 // all the cards spanned by the object
1444 end_idx += 1;
1445 }
1447 // Set the bits in the card BM for the cards spanned by this object.
1448 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1450 // Add the size of this object to the number of marked bytes.
1451 marked_bytes += (size_t)obj_sz * HeapWordSize;
1453 // Find the next marked object after this one.
1454 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1455 }
1457 // Mark the allocated-since-marking portion...
1458 HeapWord* top = hr->top();
1459 if (ntams < top) {
1460 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1461 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1463 // Note: if we're looking at the last region in heap - top
1464 // could be actually just beyond the end of the heap; end_idx
1465 // will then correspond to a (non-existent) card that is also
1466 // just beyond the heap.
1467 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1468 // end of object is not card aligned - increment to cover
1469 // all the cards spanned by the object
1470 end_idx += 1;
1471 }
1472 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1474 // This definitely means the region has live objects.
1475 set_bit_for_region(hr);
1476 }
1478 // Update the live region bitmap.
1479 if (marked_bytes > 0) {
1480 set_bit_for_region(hr);
1481 }
1483 // Set the marked bytes for the current region so that
1484 // it can be queried by a calling verificiation routine
1485 _region_marked_bytes = marked_bytes;
1487 return false;
1488 }
1490 size_t region_marked_bytes() const { return _region_marked_bytes; }
1491 };
1493 // Heap region closure used for verifying the counting data
1494 // that was accumulated concurrently and aggregated during
1495 // the remark pause. This closure is applied to the heap
1496 // regions during the STW cleanup pause.
1498 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1499 G1CollectedHeap* _g1h;
1500 ConcurrentMark* _cm;
1501 CalcLiveObjectsClosure _calc_cl;
1502 BitMap* _region_bm; // Region BM to be verified
1503 BitMap* _card_bm; // Card BM to be verified
1504 bool _verbose; // verbose output?
1506 BitMap* _exp_region_bm; // Expected Region BM values
1507 BitMap* _exp_card_bm; // Expected card BM values
1509 int _failures;
1511 public:
1512 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1513 BitMap* region_bm,
1514 BitMap* card_bm,
1515 BitMap* exp_region_bm,
1516 BitMap* exp_card_bm,
1517 bool verbose) :
1518 _g1h(g1h), _cm(g1h->concurrent_mark()),
1519 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1520 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1521 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1522 _failures(0) { }
1524 int failures() const { return _failures; }
1526 bool doHeapRegion(HeapRegion* hr) {
1527 if (hr->continuesHumongous()) {
1528 // We will ignore these here and process them when their
1529 // associated "starts humongous" region is processed (see
1530 // set_bit_for_heap_region()). Note that we cannot rely on their
1531 // associated "starts humongous" region to have their bit set to
1532 // 1 since, due to the region chunking in the parallel region
1533 // iteration, a "continues humongous" region might be visited
1534 // before its associated "starts humongous".
1535 return false;
1536 }
1538 int failures = 0;
1540 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1541 // this region and set the corresponding bits in the expected region
1542 // and card bitmaps.
1543 bool res = _calc_cl.doHeapRegion(hr);
1544 assert(res == false, "should be continuing");
1546 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1547 Mutex::_no_safepoint_check_flag);
1549 // Verify the marked bytes for this region.
1550 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1551 size_t act_marked_bytes = hr->next_marked_bytes();
1553 // We're not OK if expected marked bytes > actual marked bytes. It means
1554 // we have missed accounting some objects during the actual marking.
1555 if (exp_marked_bytes > act_marked_bytes) {
1556 if (_verbose) {
1557 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1558 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1559 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1560 }
1561 failures += 1;
1562 }
1564 // Verify the bit, for this region, in the actual and expected
1565 // (which was just calculated) region bit maps.
1566 // We're not OK if the bit in the calculated expected region
1567 // bitmap is set and the bit in the actual region bitmap is not.
1568 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1570 bool expected = _exp_region_bm->at(index);
1571 bool actual = _region_bm->at(index);
1572 if (expected && !actual) {
1573 if (_verbose) {
1574 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1575 "expected: %s, actual: %s",
1576 hr->hrs_index(),
1577 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1578 }
1579 failures += 1;
1580 }
1582 // Verify that the card bit maps for the cards spanned by the current
1583 // region match. We have an error if we have a set bit in the expected
1584 // bit map and the corresponding bit in the actual bitmap is not set.
1586 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1587 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1589 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1590 expected = _exp_card_bm->at(i);
1591 actual = _card_bm->at(i);
1593 if (expected && !actual) {
1594 if (_verbose) {
1595 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1596 "expected: %s, actual: %s",
1597 hr->hrs_index(), i,
1598 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1599 }
1600 failures += 1;
1601 }
1602 }
1604 if (failures > 0 && _verbose) {
1605 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1606 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1607 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1608 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1609 }
1611 _failures += failures;
1613 // We could stop iteration over the heap when we
1614 // find the first violating region by returning true.
1615 return false;
1616 }
1617 };
1620 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1621 protected:
1622 G1CollectedHeap* _g1h;
1623 ConcurrentMark* _cm;
1624 BitMap* _actual_region_bm;
1625 BitMap* _actual_card_bm;
1627 uint _n_workers;
1629 BitMap* _expected_region_bm;
1630 BitMap* _expected_card_bm;
1632 int _failures;
1633 bool _verbose;
1635 public:
1636 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1637 BitMap* region_bm, BitMap* card_bm,
1638 BitMap* expected_region_bm, BitMap* expected_card_bm)
1639 : AbstractGangTask("G1 verify final counting"),
1640 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1641 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1642 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1643 _failures(0), _verbose(false),
1644 _n_workers(0) {
1645 assert(VerifyDuringGC, "don't call this otherwise");
1647 // Use the value already set as the number of active threads
1648 // in the call to run_task().
1649 if (G1CollectedHeap::use_parallel_gc_threads()) {
1650 assert( _g1h->workers()->active_workers() > 0,
1651 "Should have been previously set");
1652 _n_workers = _g1h->workers()->active_workers();
1653 } else {
1654 _n_workers = 1;
1655 }
1657 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1658 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1660 _verbose = _cm->verbose_medium();
1661 }
1663 void work(uint worker_id) {
1664 assert(worker_id < _n_workers, "invariant");
1666 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1667 _actual_region_bm, _actual_card_bm,
1668 _expected_region_bm,
1669 _expected_card_bm,
1670 _verbose);
1672 if (G1CollectedHeap::use_parallel_gc_threads()) {
1673 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1674 worker_id,
1675 _n_workers,
1676 HeapRegion::VerifyCountClaimValue);
1677 } else {
1678 _g1h->heap_region_iterate(&verify_cl);
1679 }
1681 Atomic::add(verify_cl.failures(), &_failures);
1682 }
1684 int failures() const { return _failures; }
1685 };
1687 // Closure that finalizes the liveness counting data.
1688 // Used during the cleanup pause.
1689 // Sets the bits corresponding to the interval [NTAMS, top]
1690 // (which contains the implicitly live objects) in the
1691 // card liveness bitmap. Also sets the bit for each region,
1692 // containing live data, in the region liveness bitmap.
1694 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1695 public:
1696 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1697 BitMap* region_bm,
1698 BitMap* card_bm) :
1699 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1701 bool doHeapRegion(HeapRegion* hr) {
1703 if (hr->continuesHumongous()) {
1704 // We will ignore these here and process them when their
1705 // associated "starts humongous" region is processed (see
1706 // set_bit_for_heap_region()). Note that we cannot rely on their
1707 // associated "starts humongous" region to have their bit set to
1708 // 1 since, due to the region chunking in the parallel region
1709 // iteration, a "continues humongous" region might be visited
1710 // before its associated "starts humongous".
1711 return false;
1712 }
1714 HeapWord* ntams = hr->next_top_at_mark_start();
1715 HeapWord* top = hr->top();
1717 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1719 // Mark the allocated-since-marking portion...
1720 if (ntams < top) {
1721 // This definitely means the region has live objects.
1722 set_bit_for_region(hr);
1724 // Now set the bits in the card bitmap for [ntams, top)
1725 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1726 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1728 // Note: if we're looking at the last region in heap - top
1729 // could be actually just beyond the end of the heap; end_idx
1730 // will then correspond to a (non-existent) card that is also
1731 // just beyond the heap.
1732 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1733 // end of object is not card aligned - increment to cover
1734 // all the cards spanned by the object
1735 end_idx += 1;
1736 }
1738 assert(end_idx <= _card_bm->size(),
1739 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1740 end_idx, _card_bm->size()));
1741 assert(start_idx < _card_bm->size(),
1742 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1743 start_idx, _card_bm->size()));
1745 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1746 }
1748 // Set the bit for the region if it contains live data
1749 if (hr->next_marked_bytes() > 0) {
1750 set_bit_for_region(hr);
1751 }
1753 return false;
1754 }
1755 };
1757 class G1ParFinalCountTask: public AbstractGangTask {
1758 protected:
1759 G1CollectedHeap* _g1h;
1760 ConcurrentMark* _cm;
1761 BitMap* _actual_region_bm;
1762 BitMap* _actual_card_bm;
1764 uint _n_workers;
1766 public:
1767 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1768 : AbstractGangTask("G1 final counting"),
1769 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1770 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1771 _n_workers(0) {
1772 // Use the value already set as the number of active threads
1773 // in the call to run_task().
1774 if (G1CollectedHeap::use_parallel_gc_threads()) {
1775 assert( _g1h->workers()->active_workers() > 0,
1776 "Should have been previously set");
1777 _n_workers = _g1h->workers()->active_workers();
1778 } else {
1779 _n_workers = 1;
1780 }
1781 }
1783 void work(uint worker_id) {
1784 assert(worker_id < _n_workers, "invariant");
1786 FinalCountDataUpdateClosure final_update_cl(_g1h,
1787 _actual_region_bm,
1788 _actual_card_bm);
1790 if (G1CollectedHeap::use_parallel_gc_threads()) {
1791 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1792 worker_id,
1793 _n_workers,
1794 HeapRegion::FinalCountClaimValue);
1795 } else {
1796 _g1h->heap_region_iterate(&final_update_cl);
1797 }
1798 }
1799 };
1801 class G1ParNoteEndTask;
1803 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1804 G1CollectedHeap* _g1;
1805 int _worker_num;
1806 size_t _max_live_bytes;
1807 uint _regions_claimed;
1808 size_t _freed_bytes;
1809 FreeRegionList* _local_cleanup_list;
1810 OldRegionSet* _old_proxy_set;
1811 HumongousRegionSet* _humongous_proxy_set;
1812 HRRSCleanupTask* _hrrs_cleanup_task;
1813 double _claimed_region_time;
1814 double _max_region_time;
1816 public:
1817 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1818 int worker_num,
1819 FreeRegionList* local_cleanup_list,
1820 OldRegionSet* old_proxy_set,
1821 HumongousRegionSet* humongous_proxy_set,
1822 HRRSCleanupTask* hrrs_cleanup_task) :
1823 _g1(g1), _worker_num(worker_num),
1824 _max_live_bytes(0), _regions_claimed(0),
1825 _freed_bytes(0),
1826 _claimed_region_time(0.0), _max_region_time(0.0),
1827 _local_cleanup_list(local_cleanup_list),
1828 _old_proxy_set(old_proxy_set),
1829 _humongous_proxy_set(humongous_proxy_set),
1830 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1832 size_t freed_bytes() { return _freed_bytes; }
1834 bool doHeapRegion(HeapRegion *hr) {
1835 if (hr->continuesHumongous()) {
1836 return false;
1837 }
1838 // We use a claim value of zero here because all regions
1839 // were claimed with value 1 in the FinalCount task.
1840 _g1->reset_gc_time_stamps(hr);
1841 double start = os::elapsedTime();
1842 _regions_claimed++;
1843 hr->note_end_of_marking();
1844 _max_live_bytes += hr->max_live_bytes();
1845 _g1->free_region_if_empty(hr,
1846 &_freed_bytes,
1847 _local_cleanup_list,
1848 _old_proxy_set,
1849 _humongous_proxy_set,
1850 _hrrs_cleanup_task,
1851 true /* par */);
1852 double region_time = (os::elapsedTime() - start);
1853 _claimed_region_time += region_time;
1854 if (region_time > _max_region_time) {
1855 _max_region_time = region_time;
1856 }
1857 return false;
1858 }
1860 size_t max_live_bytes() { return _max_live_bytes; }
1861 uint regions_claimed() { return _regions_claimed; }
1862 double claimed_region_time_sec() { return _claimed_region_time; }
1863 double max_region_time_sec() { return _max_region_time; }
1864 };
1866 class G1ParNoteEndTask: public AbstractGangTask {
1867 friend class G1NoteEndOfConcMarkClosure;
1869 protected:
1870 G1CollectedHeap* _g1h;
1871 size_t _max_live_bytes;
1872 size_t _freed_bytes;
1873 FreeRegionList* _cleanup_list;
1875 public:
1876 G1ParNoteEndTask(G1CollectedHeap* g1h,
1877 FreeRegionList* cleanup_list) :
1878 AbstractGangTask("G1 note end"), _g1h(g1h),
1879 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1881 void work(uint worker_id) {
1882 double start = os::elapsedTime();
1883 FreeRegionList local_cleanup_list("Local Cleanup List");
1884 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1885 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1886 HRRSCleanupTask hrrs_cleanup_task;
1887 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1888 &old_proxy_set,
1889 &humongous_proxy_set,
1890 &hrrs_cleanup_task);
1891 if (G1CollectedHeap::use_parallel_gc_threads()) {
1892 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1893 _g1h->workers()->active_workers(),
1894 HeapRegion::NoteEndClaimValue);
1895 } else {
1896 _g1h->heap_region_iterate(&g1_note_end);
1897 }
1898 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1900 // Now update the lists
1901 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1902 NULL /* free_list */,
1903 &old_proxy_set,
1904 &humongous_proxy_set,
1905 true /* par */);
1906 {
1907 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1908 _max_live_bytes += g1_note_end.max_live_bytes();
1909 _freed_bytes += g1_note_end.freed_bytes();
1911 // If we iterate over the global cleanup list at the end of
1912 // cleanup to do this printing we will not guarantee to only
1913 // generate output for the newly-reclaimed regions (the list
1914 // might not be empty at the beginning of cleanup; we might
1915 // still be working on its previous contents). So we do the
1916 // printing here, before we append the new regions to the global
1917 // cleanup list.
1919 G1HRPrinter* hr_printer = _g1h->hr_printer();
1920 if (hr_printer->is_active()) {
1921 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1922 while (iter.more_available()) {
1923 HeapRegion* hr = iter.get_next();
1924 hr_printer->cleanup(hr);
1925 }
1926 }
1928 _cleanup_list->add_as_tail(&local_cleanup_list);
1929 assert(local_cleanup_list.is_empty(), "post-condition");
1931 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1932 }
1933 }
1934 size_t max_live_bytes() { return _max_live_bytes; }
1935 size_t freed_bytes() { return _freed_bytes; }
1936 };
1938 class G1ParScrubRemSetTask: public AbstractGangTask {
1939 protected:
1940 G1RemSet* _g1rs;
1941 BitMap* _region_bm;
1942 BitMap* _card_bm;
1943 public:
1944 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1945 BitMap* region_bm, BitMap* card_bm) :
1946 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1947 _region_bm(region_bm), _card_bm(card_bm) { }
1949 void work(uint worker_id) {
1950 if (G1CollectedHeap::use_parallel_gc_threads()) {
1951 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1952 HeapRegion::ScrubRemSetClaimValue);
1953 } else {
1954 _g1rs->scrub(_region_bm, _card_bm);
1955 }
1956 }
1958 };
1960 void ConcurrentMark::cleanup() {
1961 // world is stopped at this checkpoint
1962 assert(SafepointSynchronize::is_at_safepoint(),
1963 "world should be stopped");
1964 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1966 // If a full collection has happened, we shouldn't do this.
1967 if (has_aborted()) {
1968 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1969 return;
1970 }
1972 HRSPhaseSetter x(HRSPhaseCleanup);
1973 g1h->verify_region_sets_optional();
1975 if (VerifyDuringGC) {
1976 HandleMark hm; // handle scope
1977 Universe::heap()->prepare_for_verify();
1978 Universe::verify(VerifyOption_G1UsePrevMarking,
1979 " VerifyDuringGC:(before)");
1980 }
1982 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1983 g1p->record_concurrent_mark_cleanup_start();
1985 double start = os::elapsedTime();
1987 HeapRegionRemSet::reset_for_cleanup_tasks();
1989 uint n_workers;
1991 // Do counting once more with the world stopped for good measure.
1992 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1994 if (G1CollectedHeap::use_parallel_gc_threads()) {
1995 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1996 "sanity check");
1998 g1h->set_par_threads();
1999 n_workers = g1h->n_par_threads();
2000 assert(g1h->n_par_threads() == n_workers,
2001 "Should not have been reset");
2002 g1h->workers()->run_task(&g1_par_count_task);
2003 // Done with the parallel phase so reset to 0.
2004 g1h->set_par_threads(0);
2006 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2007 "sanity check");
2008 } else {
2009 n_workers = 1;
2010 g1_par_count_task.work(0);
2011 }
2013 if (VerifyDuringGC) {
2014 // Verify that the counting data accumulated during marking matches
2015 // that calculated by walking the marking bitmap.
2017 // Bitmaps to hold expected values
2018 BitMap expected_region_bm(_region_bm.size(), false);
2019 BitMap expected_card_bm(_card_bm.size(), false);
2021 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2022 &_region_bm,
2023 &_card_bm,
2024 &expected_region_bm,
2025 &expected_card_bm);
2027 if (G1CollectedHeap::use_parallel_gc_threads()) {
2028 g1h->set_par_threads((int)n_workers);
2029 g1h->workers()->run_task(&g1_par_verify_task);
2030 // Done with the parallel phase so reset to 0.
2031 g1h->set_par_threads(0);
2033 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2034 "sanity check");
2035 } else {
2036 g1_par_verify_task.work(0);
2037 }
2039 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2040 }
2042 size_t start_used_bytes = g1h->used();
2043 g1h->set_marking_complete();
2045 double count_end = os::elapsedTime();
2046 double this_final_counting_time = (count_end - start);
2047 _total_counting_time += this_final_counting_time;
2049 if (G1PrintRegionLivenessInfo) {
2050 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2051 _g1h->heap_region_iterate(&cl);
2052 }
2054 // Install newly created mark bitMap as "prev".
2055 swapMarkBitMaps();
2057 g1h->reset_gc_time_stamp();
2059 // Note end of marking in all heap regions.
2060 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2061 if (G1CollectedHeap::use_parallel_gc_threads()) {
2062 g1h->set_par_threads((int)n_workers);
2063 g1h->workers()->run_task(&g1_par_note_end_task);
2064 g1h->set_par_threads(0);
2066 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2067 "sanity check");
2068 } else {
2069 g1_par_note_end_task.work(0);
2070 }
2071 g1h->check_gc_time_stamps();
2073 if (!cleanup_list_is_empty()) {
2074 // The cleanup list is not empty, so we'll have to process it
2075 // concurrently. Notify anyone else that might be wanting free
2076 // regions that there will be more free regions coming soon.
2077 g1h->set_free_regions_coming();
2078 }
2080 // call below, since it affects the metric by which we sort the heap
2081 // regions.
2082 if (G1ScrubRemSets) {
2083 double rs_scrub_start = os::elapsedTime();
2084 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2085 if (G1CollectedHeap::use_parallel_gc_threads()) {
2086 g1h->set_par_threads((int)n_workers);
2087 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2088 g1h->set_par_threads(0);
2090 assert(g1h->check_heap_region_claim_values(
2091 HeapRegion::ScrubRemSetClaimValue),
2092 "sanity check");
2093 } else {
2094 g1_par_scrub_rs_task.work(0);
2095 }
2097 double rs_scrub_end = os::elapsedTime();
2098 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2099 _total_rs_scrub_time += this_rs_scrub_time;
2100 }
2102 // this will also free any regions totally full of garbage objects,
2103 // and sort the regions.
2104 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2106 // Statistics.
2107 double end = os::elapsedTime();
2108 _cleanup_times.add((end - start) * 1000.0);
2110 if (G1Log::fine()) {
2111 g1h->print_size_transition(gclog_or_tty,
2112 start_used_bytes,
2113 g1h->used(),
2114 g1h->capacity());
2115 }
2117 // Clean up will have freed any regions completely full of garbage.
2118 // Update the soft reference policy with the new heap occupancy.
2119 Universe::update_heap_info_at_gc();
2121 // We need to make this be a "collection" so any collection pause that
2122 // races with it goes around and waits for completeCleanup to finish.
2123 g1h->increment_total_collections();
2125 // We reclaimed old regions so we should calculate the sizes to make
2126 // sure we update the old gen/space data.
2127 g1h->g1mm()->update_sizes();
2129 if (VerifyDuringGC) {
2130 HandleMark hm; // handle scope
2131 Universe::heap()->prepare_for_verify();
2132 Universe::verify(VerifyOption_G1UsePrevMarking,
2133 " VerifyDuringGC:(after)");
2134 }
2136 g1h->verify_region_sets_optional();
2137 g1h->trace_heap_after_concurrent_cycle();
2138 }
2140 void ConcurrentMark::completeCleanup() {
2141 if (has_aborted()) return;
2143 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2145 _cleanup_list.verify_optional();
2146 FreeRegionList tmp_free_list("Tmp Free List");
2148 if (G1ConcRegionFreeingVerbose) {
2149 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2150 "cleanup list has %u entries",
2151 _cleanup_list.length());
2152 }
2154 // Noone else should be accessing the _cleanup_list at this point,
2155 // so it's not necessary to take any locks
2156 while (!_cleanup_list.is_empty()) {
2157 HeapRegion* hr = _cleanup_list.remove_head();
2158 assert(hr != NULL, "the list was not empty");
2159 hr->par_clear();
2160 tmp_free_list.add_as_tail(hr);
2162 // Instead of adding one region at a time to the secondary_free_list,
2163 // we accumulate them in the local list and move them a few at a
2164 // time. This also cuts down on the number of notify_all() calls
2165 // we do during this process. We'll also append the local list when
2166 // _cleanup_list is empty (which means we just removed the last
2167 // region from the _cleanup_list).
2168 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2169 _cleanup_list.is_empty()) {
2170 if (G1ConcRegionFreeingVerbose) {
2171 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2172 "appending %u entries to the secondary_free_list, "
2173 "cleanup list still has %u entries",
2174 tmp_free_list.length(),
2175 _cleanup_list.length());
2176 }
2178 {
2179 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2180 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2181 SecondaryFreeList_lock->notify_all();
2182 }
2184 if (G1StressConcRegionFreeing) {
2185 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2186 os::sleep(Thread::current(), (jlong) 1, false);
2187 }
2188 }
2189 }
2190 }
2191 assert(tmp_free_list.is_empty(), "post-condition");
2192 }
2194 // Supporting Object and Oop closures for reference discovery
2195 // and processing in during marking
2197 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2198 HeapWord* addr = (HeapWord*)obj;
2199 return addr != NULL &&
2200 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2201 }
2203 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2204 // Uses the CMTask associated with a worker thread (for serial reference
2205 // processing the CMTask for worker 0 is used) to preserve (mark) and
2206 // trace referent objects.
2207 //
2208 // Using the CMTask and embedded local queues avoids having the worker
2209 // threads operating on the global mark stack. This reduces the risk
2210 // of overflowing the stack - which we would rather avoid at this late
2211 // state. Also using the tasks' local queues removes the potential
2212 // of the workers interfering with each other that could occur if
2213 // operating on the global stack.
2215 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2216 ConcurrentMark* _cm;
2217 CMTask* _task;
2218 int _ref_counter_limit;
2219 int _ref_counter;
2220 bool _is_serial;
2221 public:
2222 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2223 _cm(cm), _task(task), _is_serial(is_serial),
2224 _ref_counter_limit(G1RefProcDrainInterval) {
2225 assert(_ref_counter_limit > 0, "sanity");
2226 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2227 _ref_counter = _ref_counter_limit;
2228 }
2230 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2231 virtual void do_oop( oop* p) { do_oop_work(p); }
2233 template <class T> void do_oop_work(T* p) {
2234 if (!_cm->has_overflown()) {
2235 oop obj = oopDesc::load_decode_heap_oop(p);
2236 if (_cm->verbose_high()) {
2237 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2238 "*"PTR_FORMAT" = "PTR_FORMAT,
2239 _task->worker_id(), p, (void*) obj);
2240 }
2242 _task->deal_with_reference(obj);
2243 _ref_counter--;
2245 if (_ref_counter == 0) {
2246 // We have dealt with _ref_counter_limit references, pushing them
2247 // and objects reachable from them on to the local stack (and
2248 // possibly the global stack). Call CMTask::do_marking_step() to
2249 // process these entries.
2250 //
2251 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2252 // there's nothing more to do (i.e. we're done with the entries that
2253 // were pushed as a result of the CMTask::deal_with_reference() calls
2254 // above) or we overflow.
2255 //
2256 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2257 // flag while there may still be some work to do. (See the comment at
2258 // the beginning of CMTask::do_marking_step() for those conditions -
2259 // one of which is reaching the specified time target.) It is only
2260 // when CMTask::do_marking_step() returns without setting the
2261 // has_aborted() flag that the marking step has completed.
2262 do {
2263 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2264 _task->do_marking_step(mark_step_duration_ms,
2265 false /* do_termination */,
2266 _is_serial);
2267 } while (_task->has_aborted() && !_cm->has_overflown());
2268 _ref_counter = _ref_counter_limit;
2269 }
2270 } else {
2271 if (_cm->verbose_high()) {
2272 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2273 }
2274 }
2275 }
2276 };
2278 // 'Drain' oop closure used by both serial and parallel reference processing.
2279 // Uses the CMTask associated with a given worker thread (for serial
2280 // reference processing the CMtask for worker 0 is used). Calls the
2281 // do_marking_step routine, with an unbelievably large timeout value,
2282 // to drain the marking data structures of the remaining entries
2283 // added by the 'keep alive' oop closure above.
2285 class G1CMDrainMarkingStackClosure: public VoidClosure {
2286 ConcurrentMark* _cm;
2287 CMTask* _task;
2288 bool _is_serial;
2289 public:
2290 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2291 _cm(cm), _task(task), _is_serial(is_serial) {
2292 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2293 }
2295 void do_void() {
2296 do {
2297 if (_cm->verbose_high()) {
2298 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2299 _task->worker_id(), BOOL_TO_STR(_is_serial));
2300 }
2302 // We call CMTask::do_marking_step() to completely drain the local
2303 // and global marking stacks of entries pushed by the 'keep alive'
2304 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2305 //
2306 // CMTask::do_marking_step() is called in a loop, which we'll exit
2307 // if there's nothing more to do (i.e. we'completely drained the
2308 // entries that were pushed as a a result of applying the 'keep alive'
2309 // closure to the entries on the discovered ref lists) or we overflow
2310 // the global marking stack.
2311 //
2312 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2313 // flag while there may still be some work to do. (See the comment at
2314 // the beginning of CMTask::do_marking_step() for those conditions -
2315 // one of which is reaching the specified time target.) It is only
2316 // when CMTask::do_marking_step() returns without setting the
2317 // has_aborted() flag that the marking step has completed.
2319 _task->do_marking_step(1000000000.0 /* something very large */,
2320 true /* do_termination */,
2321 _is_serial);
2322 } while (_task->has_aborted() && !_cm->has_overflown());
2323 }
2324 };
2326 // Implementation of AbstractRefProcTaskExecutor for parallel
2327 // reference processing at the end of G1 concurrent marking
2329 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2330 private:
2331 G1CollectedHeap* _g1h;
2332 ConcurrentMark* _cm;
2333 WorkGang* _workers;
2334 int _active_workers;
2336 public:
2337 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2338 ConcurrentMark* cm,
2339 WorkGang* workers,
2340 int n_workers) :
2341 _g1h(g1h), _cm(cm),
2342 _workers(workers), _active_workers(n_workers) { }
2344 // Executes the given task using concurrent marking worker threads.
2345 virtual void execute(ProcessTask& task);
2346 virtual void execute(EnqueueTask& task);
2347 };
2349 class G1CMRefProcTaskProxy: public AbstractGangTask {
2350 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2351 ProcessTask& _proc_task;
2352 G1CollectedHeap* _g1h;
2353 ConcurrentMark* _cm;
2355 public:
2356 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2357 G1CollectedHeap* g1h,
2358 ConcurrentMark* cm) :
2359 AbstractGangTask("Process reference objects in parallel"),
2360 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2361 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2362 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2363 }
2365 virtual void work(uint worker_id) {
2366 CMTask* task = _cm->task(worker_id);
2367 G1CMIsAliveClosure g1_is_alive(_g1h);
2368 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2369 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2371 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2372 }
2373 };
2375 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2376 assert(_workers != NULL, "Need parallel worker threads.");
2377 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2379 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2381 // We need to reset the concurrency level before each
2382 // proxy task execution, so that the termination protocol
2383 // and overflow handling in CMTask::do_marking_step() knows
2384 // how many workers to wait for.
2385 _cm->set_concurrency(_active_workers);
2386 _g1h->set_par_threads(_active_workers);
2387 _workers->run_task(&proc_task_proxy);
2388 _g1h->set_par_threads(0);
2389 }
2391 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2392 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2393 EnqueueTask& _enq_task;
2395 public:
2396 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2397 AbstractGangTask("Enqueue reference objects in parallel"),
2398 _enq_task(enq_task) { }
2400 virtual void work(uint worker_id) {
2401 _enq_task.work(worker_id);
2402 }
2403 };
2405 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2406 assert(_workers != NULL, "Need parallel worker threads.");
2407 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2409 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2411 // Not strictly necessary but...
2412 //
2413 // We need to reset the concurrency level before each
2414 // proxy task execution, so that the termination protocol
2415 // and overflow handling in CMTask::do_marking_step() knows
2416 // how many workers to wait for.
2417 _cm->set_concurrency(_active_workers);
2418 _g1h->set_par_threads(_active_workers);
2419 _workers->run_task(&enq_task_proxy);
2420 _g1h->set_par_threads(0);
2421 }
2423 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2424 if (has_overflown()) {
2425 // Skip processing the discovered references if we have
2426 // overflown the global marking stack. Reference objects
2427 // only get discovered once so it is OK to not
2428 // de-populate the discovered reference lists. We could have,
2429 // but the only benefit would be that, when marking restarts,
2430 // less reference objects are discovered.
2431 return;
2432 }
2434 ResourceMark rm;
2435 HandleMark hm;
2437 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2439 // Is alive closure.
2440 G1CMIsAliveClosure g1_is_alive(g1h);
2442 // Inner scope to exclude the cleaning of the string and symbol
2443 // tables from the displayed time.
2444 {
2445 if (G1Log::finer()) {
2446 gclog_or_tty->put(' ');
2447 }
2448 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2450 ReferenceProcessor* rp = g1h->ref_processor_cm();
2452 // See the comment in G1CollectedHeap::ref_processing_init()
2453 // about how reference processing currently works in G1.
2455 // Set the soft reference policy
2456 rp->setup_policy(clear_all_soft_refs);
2457 assert(_markStack.isEmpty(), "mark stack should be empty");
2459 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2460 // in serial reference processing. Note these closures are also
2461 // used for serially processing (by the the current thread) the
2462 // JNI references during parallel reference processing.
2463 //
2464 // These closures do not need to synchronize with the worker
2465 // threads involved in parallel reference processing as these
2466 // instances are executed serially by the current thread (e.g.
2467 // reference processing is not multi-threaded and is thus
2468 // performed by the current thread instead of a gang worker).
2469 //
2470 // The gang tasks involved in parallel reference procssing create
2471 // their own instances of these closures, which do their own
2472 // synchronization among themselves.
2473 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2474 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2476 // We need at least one active thread. If reference processing
2477 // is not multi-threaded we use the current (VMThread) thread,
2478 // otherwise we use the work gang from the G1CollectedHeap and
2479 // we utilize all the worker threads we can.
2480 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2481 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2482 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2484 // Parallel processing task executor.
2485 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2486 g1h->workers(), active_workers);
2487 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2489 // Set the concurrency level. The phase was already set prior to
2490 // executing the remark task.
2491 set_concurrency(active_workers);
2493 // Set the degree of MT processing here. If the discovery was done MT,
2494 // the number of threads involved during discovery could differ from
2495 // the number of active workers. This is OK as long as the discovered
2496 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2497 rp->set_active_mt_degree(active_workers);
2499 // Process the weak references.
2500 const ReferenceProcessorStats& stats =
2501 rp->process_discovered_references(&g1_is_alive,
2502 &g1_keep_alive,
2503 &g1_drain_mark_stack,
2504 executor,
2505 g1h->gc_timer_cm());
2506 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2508 // The do_oop work routines of the keep_alive and drain_marking_stack
2509 // oop closures will set the has_overflown flag if we overflow the
2510 // global marking stack.
2512 assert(_markStack.overflow() || _markStack.isEmpty(),
2513 "mark stack should be empty (unless it overflowed)");
2515 if (_markStack.overflow()) {
2516 // This should have been done already when we tried to push an
2517 // entry on to the global mark stack. But let's do it again.
2518 set_has_overflown();
2519 }
2521 assert(rp->num_q() == active_workers, "why not");
2523 rp->enqueue_discovered_references(executor);
2525 rp->verify_no_references_recorded();
2526 assert(!rp->discovery_enabled(), "Post condition");
2527 }
2529 // Now clean up stale oops in StringTable
2530 StringTable::unlink(&g1_is_alive);
2531 // Clean up unreferenced symbols in symbol table.
2532 SymbolTable::unlink();
2533 }
2535 void ConcurrentMark::swapMarkBitMaps() {
2536 CMBitMapRO* temp = _prevMarkBitMap;
2537 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2538 _nextMarkBitMap = (CMBitMap*) temp;
2539 }
2541 class CMRemarkTask: public AbstractGangTask {
2542 private:
2543 ConcurrentMark* _cm;
2544 bool _is_serial;
2545 public:
2546 void work(uint worker_id) {
2547 // Since all available tasks are actually started, we should
2548 // only proceed if we're supposed to be actived.
2549 if (worker_id < _cm->active_tasks()) {
2550 CMTask* task = _cm->task(worker_id);
2551 task->record_start_time();
2552 do {
2553 task->do_marking_step(1000000000.0 /* something very large */,
2554 true /* do_termination */,
2555 _is_serial);
2556 } while (task->has_aborted() && !_cm->has_overflown());
2557 // If we overflow, then we do not want to restart. We instead
2558 // want to abort remark and do concurrent marking again.
2559 task->record_end_time();
2560 }
2561 }
2563 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2564 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2565 _cm->terminator()->reset_for_reuse(active_workers);
2566 }
2567 };
2569 void ConcurrentMark::checkpointRootsFinalWork() {
2570 ResourceMark rm;
2571 HandleMark hm;
2572 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2574 g1h->ensure_parsability(false);
2576 if (G1CollectedHeap::use_parallel_gc_threads()) {
2577 G1CollectedHeap::StrongRootsScope srs(g1h);
2578 // this is remark, so we'll use up all active threads
2579 uint active_workers = g1h->workers()->active_workers();
2580 if (active_workers == 0) {
2581 assert(active_workers > 0, "Should have been set earlier");
2582 active_workers = (uint) ParallelGCThreads;
2583 g1h->workers()->set_active_workers(active_workers);
2584 }
2585 set_concurrency_and_phase(active_workers, false /* concurrent */);
2586 // Leave _parallel_marking_threads at it's
2587 // value originally calculated in the ConcurrentMark
2588 // constructor and pass values of the active workers
2589 // through the gang in the task.
2591 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2592 // We will start all available threads, even if we decide that the
2593 // active_workers will be fewer. The extra ones will just bail out
2594 // immediately.
2595 g1h->set_par_threads(active_workers);
2596 g1h->workers()->run_task(&remarkTask);
2597 g1h->set_par_threads(0);
2598 } else {
2599 G1CollectedHeap::StrongRootsScope srs(g1h);
2600 uint active_workers = 1;
2601 set_concurrency_and_phase(active_workers, false /* concurrent */);
2603 // Note - if there's no work gang then the VMThread will be
2604 // the thread to execute the remark - serially. We have
2605 // to pass true for the is_serial parameter so that
2606 // CMTask::do_marking_step() doesn't enter the sync
2607 // barriers in the event of an overflow. Doing so will
2608 // cause an assert that the current thread is not a
2609 // concurrent GC thread.
2610 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2611 remarkTask.work(0);
2612 }
2613 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2614 guarantee(has_overflown() ||
2615 satb_mq_set.completed_buffers_num() == 0,
2616 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2617 BOOL_TO_STR(has_overflown()),
2618 satb_mq_set.completed_buffers_num()));
2620 print_stats();
2621 }
2623 #ifndef PRODUCT
2625 class PrintReachableOopClosure: public OopClosure {
2626 private:
2627 G1CollectedHeap* _g1h;
2628 outputStream* _out;
2629 VerifyOption _vo;
2630 bool _all;
2632 public:
2633 PrintReachableOopClosure(outputStream* out,
2634 VerifyOption vo,
2635 bool all) :
2636 _g1h(G1CollectedHeap::heap()),
2637 _out(out), _vo(vo), _all(all) { }
2639 void do_oop(narrowOop* p) { do_oop_work(p); }
2640 void do_oop( oop* p) { do_oop_work(p); }
2642 template <class T> void do_oop_work(T* p) {
2643 oop obj = oopDesc::load_decode_heap_oop(p);
2644 const char* str = NULL;
2645 const char* str2 = "";
2647 if (obj == NULL) {
2648 str = "";
2649 } else if (!_g1h->is_in_g1_reserved(obj)) {
2650 str = " O";
2651 } else {
2652 HeapRegion* hr = _g1h->heap_region_containing(obj);
2653 guarantee(hr != NULL, "invariant");
2654 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2655 bool marked = _g1h->is_marked(obj, _vo);
2657 if (over_tams) {
2658 str = " >";
2659 if (marked) {
2660 str2 = " AND MARKED";
2661 }
2662 } else if (marked) {
2663 str = " M";
2664 } else {
2665 str = " NOT";
2666 }
2667 }
2669 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2670 p, (void*) obj, str, str2);
2671 }
2672 };
2674 class PrintReachableObjectClosure : public ObjectClosure {
2675 private:
2676 G1CollectedHeap* _g1h;
2677 outputStream* _out;
2678 VerifyOption _vo;
2679 bool _all;
2680 HeapRegion* _hr;
2682 public:
2683 PrintReachableObjectClosure(outputStream* out,
2684 VerifyOption vo,
2685 bool all,
2686 HeapRegion* hr) :
2687 _g1h(G1CollectedHeap::heap()),
2688 _out(out), _vo(vo), _all(all), _hr(hr) { }
2690 void do_object(oop o) {
2691 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2692 bool marked = _g1h->is_marked(o, _vo);
2693 bool print_it = _all || over_tams || marked;
2695 if (print_it) {
2696 _out->print_cr(" "PTR_FORMAT"%s",
2697 (void *)o, (over_tams) ? " >" : (marked) ? " M" : "");
2698 PrintReachableOopClosure oopCl(_out, _vo, _all);
2699 o->oop_iterate_no_header(&oopCl);
2700 }
2701 }
2702 };
2704 class PrintReachableRegionClosure : public HeapRegionClosure {
2705 private:
2706 G1CollectedHeap* _g1h;
2707 outputStream* _out;
2708 VerifyOption _vo;
2709 bool _all;
2711 public:
2712 bool doHeapRegion(HeapRegion* hr) {
2713 HeapWord* b = hr->bottom();
2714 HeapWord* e = hr->end();
2715 HeapWord* t = hr->top();
2716 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2717 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2718 "TAMS: "PTR_FORMAT, b, e, t, p);
2719 _out->cr();
2721 HeapWord* from = b;
2722 HeapWord* to = t;
2724 if (to > from) {
2725 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2726 _out->cr();
2727 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2728 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2729 _out->cr();
2730 }
2732 return false;
2733 }
2735 PrintReachableRegionClosure(outputStream* out,
2736 VerifyOption vo,
2737 bool all) :
2738 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2739 };
2741 void ConcurrentMark::print_reachable(const char* str,
2742 VerifyOption vo,
2743 bool all) {
2744 gclog_or_tty->cr();
2745 gclog_or_tty->print_cr("== Doing heap dump... ");
2747 if (G1PrintReachableBaseFile == NULL) {
2748 gclog_or_tty->print_cr(" #### error: no base file defined");
2749 return;
2750 }
2752 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2753 (JVM_MAXPATHLEN - 1)) {
2754 gclog_or_tty->print_cr(" #### error: file name too long");
2755 return;
2756 }
2758 char file_name[JVM_MAXPATHLEN];
2759 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2760 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2762 fileStream fout(file_name);
2763 if (!fout.is_open()) {
2764 gclog_or_tty->print_cr(" #### error: could not open file");
2765 return;
2766 }
2768 outputStream* out = &fout;
2769 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2770 out->cr();
2772 out->print_cr("--- ITERATING OVER REGIONS");
2773 out->cr();
2774 PrintReachableRegionClosure rcl(out, vo, all);
2775 _g1h->heap_region_iterate(&rcl);
2776 out->cr();
2778 gclog_or_tty->print_cr(" done");
2779 gclog_or_tty->flush();
2780 }
2782 #endif // PRODUCT
2784 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2785 // Note we are overriding the read-only view of the prev map here, via
2786 // the cast.
2787 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2788 }
2790 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2791 _nextMarkBitMap->clearRange(mr);
2792 }
2794 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2795 clearRangePrevBitmap(mr);
2796 clearRangeNextBitmap(mr);
2797 }
2799 HeapRegion*
2800 ConcurrentMark::claim_region(uint worker_id) {
2801 // "checkpoint" the finger
2802 HeapWord* finger = _finger;
2804 // _heap_end will not change underneath our feet; it only changes at
2805 // yield points.
2806 while (finger < _heap_end) {
2807 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2809 // Note on how this code handles humongous regions. In the
2810 // normal case the finger will reach the start of a "starts
2811 // humongous" (SH) region. Its end will either be the end of the
2812 // last "continues humongous" (CH) region in the sequence, or the
2813 // standard end of the SH region (if the SH is the only region in
2814 // the sequence). That way claim_region() will skip over the CH
2815 // regions. However, there is a subtle race between a CM thread
2816 // executing this method and a mutator thread doing a humongous
2817 // object allocation. The two are not mutually exclusive as the CM
2818 // thread does not need to hold the Heap_lock when it gets
2819 // here. So there is a chance that claim_region() will come across
2820 // a free region that's in the progress of becoming a SH or a CH
2821 // region. In the former case, it will either
2822 // a) Miss the update to the region's end, in which case it will
2823 // visit every subsequent CH region, will find their bitmaps
2824 // empty, and do nothing, or
2825 // b) Will observe the update of the region's end (in which case
2826 // it will skip the subsequent CH regions).
2827 // If it comes across a region that suddenly becomes CH, the
2828 // scenario will be similar to b). So, the race between
2829 // claim_region() and a humongous object allocation might force us
2830 // to do a bit of unnecessary work (due to some unnecessary bitmap
2831 // iterations) but it should not introduce and correctness issues.
2832 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2833 HeapWord* bottom = curr_region->bottom();
2834 HeapWord* end = curr_region->end();
2835 HeapWord* limit = curr_region->next_top_at_mark_start();
2837 if (verbose_low()) {
2838 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2839 "["PTR_FORMAT", "PTR_FORMAT"), "
2840 "limit = "PTR_FORMAT,
2841 worker_id, curr_region, bottom, end, limit);
2842 }
2844 // Is the gap between reading the finger and doing the CAS too long?
2845 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2846 if (res == finger) {
2847 // we succeeded
2849 // notice that _finger == end cannot be guaranteed here since,
2850 // someone else might have moved the finger even further
2851 assert(_finger >= end, "the finger should have moved forward");
2853 if (verbose_low()) {
2854 gclog_or_tty->print_cr("[%u] we were successful with region = "
2855 PTR_FORMAT, worker_id, curr_region);
2856 }
2858 if (limit > bottom) {
2859 if (verbose_low()) {
2860 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2861 "returning it ", worker_id, curr_region);
2862 }
2863 return curr_region;
2864 } else {
2865 assert(limit == bottom,
2866 "the region limit should be at bottom");
2867 if (verbose_low()) {
2868 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2869 "returning NULL", worker_id, curr_region);
2870 }
2871 // we return NULL and the caller should try calling
2872 // claim_region() again.
2873 return NULL;
2874 }
2875 } else {
2876 assert(_finger > finger, "the finger should have moved forward");
2877 if (verbose_low()) {
2878 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2879 "global finger = "PTR_FORMAT", "
2880 "our finger = "PTR_FORMAT,
2881 worker_id, _finger, finger);
2882 }
2884 // read it again
2885 finger = _finger;
2886 }
2887 }
2889 return NULL;
2890 }
2892 #ifndef PRODUCT
2893 enum VerifyNoCSetOopsPhase {
2894 VerifyNoCSetOopsStack,
2895 VerifyNoCSetOopsQueues,
2896 VerifyNoCSetOopsSATBCompleted,
2897 VerifyNoCSetOopsSATBThread
2898 };
2900 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2901 private:
2902 G1CollectedHeap* _g1h;
2903 VerifyNoCSetOopsPhase _phase;
2904 int _info;
2906 const char* phase_str() {
2907 switch (_phase) {
2908 case VerifyNoCSetOopsStack: return "Stack";
2909 case VerifyNoCSetOopsQueues: return "Queue";
2910 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2911 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2912 default: ShouldNotReachHere();
2913 }
2914 return NULL;
2915 }
2917 void do_object_work(oop obj) {
2918 guarantee(!_g1h->obj_in_cs(obj),
2919 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2920 (void*) obj, phase_str(), _info));
2921 }
2923 public:
2924 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2926 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2927 _phase = phase;
2928 _info = info;
2929 }
2931 virtual void do_oop(oop* p) {
2932 oop obj = oopDesc::load_decode_heap_oop(p);
2933 do_object_work(obj);
2934 }
2936 virtual void do_oop(narrowOop* p) {
2937 // We should not come across narrow oops while scanning marking
2938 // stacks and SATB buffers.
2939 ShouldNotReachHere();
2940 }
2942 virtual void do_object(oop obj) {
2943 do_object_work(obj);
2944 }
2945 };
2947 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2948 bool verify_enqueued_buffers,
2949 bool verify_thread_buffers,
2950 bool verify_fingers) {
2951 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2952 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2953 return;
2954 }
2956 VerifyNoCSetOopsClosure cl;
2958 if (verify_stacks) {
2959 // Verify entries on the global mark stack
2960 cl.set_phase(VerifyNoCSetOopsStack);
2961 _markStack.oops_do(&cl);
2963 // Verify entries on the task queues
2964 for (uint i = 0; i < _max_worker_id; i += 1) {
2965 cl.set_phase(VerifyNoCSetOopsQueues, i);
2966 CMTaskQueue* queue = _task_queues->queue(i);
2967 queue->oops_do(&cl);
2968 }
2969 }
2971 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2973 // Verify entries on the enqueued SATB buffers
2974 if (verify_enqueued_buffers) {
2975 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2976 satb_qs.iterate_completed_buffers_read_only(&cl);
2977 }
2979 // Verify entries on the per-thread SATB buffers
2980 if (verify_thread_buffers) {
2981 cl.set_phase(VerifyNoCSetOopsSATBThread);
2982 satb_qs.iterate_thread_buffers_read_only(&cl);
2983 }
2985 if (verify_fingers) {
2986 // Verify the global finger
2987 HeapWord* global_finger = finger();
2988 if (global_finger != NULL && global_finger < _heap_end) {
2989 // The global finger always points to a heap region boundary. We
2990 // use heap_region_containing_raw() to get the containing region
2991 // given that the global finger could be pointing to a free region
2992 // which subsequently becomes continues humongous. If that
2993 // happens, heap_region_containing() will return the bottom of the
2994 // corresponding starts humongous region and the check below will
2995 // not hold any more.
2996 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2997 guarantee(global_finger == global_hr->bottom(),
2998 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2999 global_finger, HR_FORMAT_PARAMS(global_hr)));
3000 }
3002 // Verify the task fingers
3003 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3004 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3005 CMTask* task = _tasks[i];
3006 HeapWord* task_finger = task->finger();
3007 if (task_finger != NULL && task_finger < _heap_end) {
3008 // See above note on the global finger verification.
3009 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3010 guarantee(task_finger == task_hr->bottom() ||
3011 !task_hr->in_collection_set(),
3012 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3013 task_finger, HR_FORMAT_PARAMS(task_hr)));
3014 }
3015 }
3016 }
3017 }
3018 #endif // PRODUCT
3020 // Aggregate the counting data that was constructed concurrently
3021 // with marking.
3022 class AggregateCountDataHRClosure: public HeapRegionClosure {
3023 G1CollectedHeap* _g1h;
3024 ConcurrentMark* _cm;
3025 CardTableModRefBS* _ct_bs;
3026 BitMap* _cm_card_bm;
3027 uint _max_worker_id;
3029 public:
3030 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3031 BitMap* cm_card_bm,
3032 uint max_worker_id) :
3033 _g1h(g1h), _cm(g1h->concurrent_mark()),
3034 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3035 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3037 bool doHeapRegion(HeapRegion* hr) {
3038 if (hr->continuesHumongous()) {
3039 // We will ignore these here and process them when their
3040 // associated "starts humongous" region is processed.
3041 // Note that we cannot rely on their associated
3042 // "starts humongous" region to have their bit set to 1
3043 // since, due to the region chunking in the parallel region
3044 // iteration, a "continues humongous" region might be visited
3045 // before its associated "starts humongous".
3046 return false;
3047 }
3049 HeapWord* start = hr->bottom();
3050 HeapWord* limit = hr->next_top_at_mark_start();
3051 HeapWord* end = hr->end();
3053 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3054 err_msg("Preconditions not met - "
3055 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3056 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3057 start, limit, hr->top(), hr->end()));
3059 assert(hr->next_marked_bytes() == 0, "Precondition");
3061 if (start == limit) {
3062 // NTAMS of this region has not been set so nothing to do.
3063 return false;
3064 }
3066 // 'start' should be in the heap.
3067 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3068 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3069 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3071 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3072 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3073 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3075 // If ntams is not card aligned then we bump card bitmap index
3076 // for limit so that we get the all the cards spanned by
3077 // the object ending at ntams.
3078 // Note: if this is the last region in the heap then ntams
3079 // could be actually just beyond the end of the the heap;
3080 // limit_idx will then correspond to a (non-existent) card
3081 // that is also outside the heap.
3082 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3083 limit_idx += 1;
3084 }
3086 assert(limit_idx <= end_idx, "or else use atomics");
3088 // Aggregate the "stripe" in the count data associated with hr.
3089 uint hrs_index = hr->hrs_index();
3090 size_t marked_bytes = 0;
3092 for (uint i = 0; i < _max_worker_id; i += 1) {
3093 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3094 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3096 // Fetch the marked_bytes in this region for task i and
3097 // add it to the running total for this region.
3098 marked_bytes += marked_bytes_array[hrs_index];
3100 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3101 // into the global card bitmap.
3102 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3104 while (scan_idx < limit_idx) {
3105 assert(task_card_bm->at(scan_idx) == true, "should be");
3106 _cm_card_bm->set_bit(scan_idx);
3107 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3109 // BitMap::get_next_one_offset() can handle the case when
3110 // its left_offset parameter is greater than its right_offset
3111 // parameter. It does, however, have an early exit if
3112 // left_offset == right_offset. So let's limit the value
3113 // passed in for left offset here.
3114 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3115 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3116 }
3117 }
3119 // Update the marked bytes for this region.
3120 hr->add_to_marked_bytes(marked_bytes);
3122 // Next heap region
3123 return false;
3124 }
3125 };
3127 class G1AggregateCountDataTask: public AbstractGangTask {
3128 protected:
3129 G1CollectedHeap* _g1h;
3130 ConcurrentMark* _cm;
3131 BitMap* _cm_card_bm;
3132 uint _max_worker_id;
3133 int _active_workers;
3135 public:
3136 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3137 ConcurrentMark* cm,
3138 BitMap* cm_card_bm,
3139 uint max_worker_id,
3140 int n_workers) :
3141 AbstractGangTask("Count Aggregation"),
3142 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3143 _max_worker_id(max_worker_id),
3144 _active_workers(n_workers) { }
3146 void work(uint worker_id) {
3147 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3149 if (G1CollectedHeap::use_parallel_gc_threads()) {
3150 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3151 _active_workers,
3152 HeapRegion::AggregateCountClaimValue);
3153 } else {
3154 _g1h->heap_region_iterate(&cl);
3155 }
3156 }
3157 };
3160 void ConcurrentMark::aggregate_count_data() {
3161 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3162 _g1h->workers()->active_workers() :
3163 1);
3165 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3166 _max_worker_id, n_workers);
3168 if (G1CollectedHeap::use_parallel_gc_threads()) {
3169 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3170 "sanity check");
3171 _g1h->set_par_threads(n_workers);
3172 _g1h->workers()->run_task(&g1_par_agg_task);
3173 _g1h->set_par_threads(0);
3175 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3176 "sanity check");
3177 _g1h->reset_heap_region_claim_values();
3178 } else {
3179 g1_par_agg_task.work(0);
3180 }
3181 }
3183 // Clear the per-worker arrays used to store the per-region counting data
3184 void ConcurrentMark::clear_all_count_data() {
3185 // Clear the global card bitmap - it will be filled during
3186 // liveness count aggregation (during remark) and the
3187 // final counting task.
3188 _card_bm.clear();
3190 // Clear the global region bitmap - it will be filled as part
3191 // of the final counting task.
3192 _region_bm.clear();
3194 uint max_regions = _g1h->max_regions();
3195 assert(_max_worker_id > 0, "uninitialized");
3197 for (uint i = 0; i < _max_worker_id; i += 1) {
3198 BitMap* task_card_bm = count_card_bitmap_for(i);
3199 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3201 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3202 assert(marked_bytes_array != NULL, "uninitialized");
3204 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3205 task_card_bm->clear();
3206 }
3207 }
3209 void ConcurrentMark::print_stats() {
3210 if (verbose_stats()) {
3211 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3212 for (size_t i = 0; i < _active_tasks; ++i) {
3213 _tasks[i]->print_stats();
3214 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3215 }
3216 }
3217 }
3219 // abandon current marking iteration due to a Full GC
3220 void ConcurrentMark::abort() {
3221 // Clear all marks to force marking thread to do nothing
3222 _nextMarkBitMap->clearAll();
3223 // Clear the liveness counting data
3224 clear_all_count_data();
3225 // Empty mark stack
3226 reset_marking_state();
3227 for (uint i = 0; i < _max_worker_id; ++i) {
3228 _tasks[i]->clear_region_fields();
3229 }
3230 _has_aborted = true;
3232 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3233 satb_mq_set.abandon_partial_marking();
3234 // This can be called either during or outside marking, we'll read
3235 // the expected_active value from the SATB queue set.
3236 satb_mq_set.set_active_all_threads(
3237 false, /* new active value */
3238 satb_mq_set.is_active() /* expected_active */);
3240 _g1h->trace_heap_after_concurrent_cycle();
3241 _g1h->register_concurrent_cycle_end();
3242 }
3244 static void print_ms_time_info(const char* prefix, const char* name,
3245 NumberSeq& ns) {
3246 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3247 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3248 if (ns.num() > 0) {
3249 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3250 prefix, ns.sd(), ns.maximum());
3251 }
3252 }
3254 void ConcurrentMark::print_summary_info() {
3255 gclog_or_tty->print_cr(" Concurrent marking:");
3256 print_ms_time_info(" ", "init marks", _init_times);
3257 print_ms_time_info(" ", "remarks", _remark_times);
3258 {
3259 print_ms_time_info(" ", "final marks", _remark_mark_times);
3260 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3262 }
3263 print_ms_time_info(" ", "cleanups", _cleanup_times);
3264 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3265 _total_counting_time,
3266 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3267 (double)_cleanup_times.num()
3268 : 0.0));
3269 if (G1ScrubRemSets) {
3270 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3271 _total_rs_scrub_time,
3272 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3273 (double)_cleanup_times.num()
3274 : 0.0));
3275 }
3276 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3277 (_init_times.sum() + _remark_times.sum() +
3278 _cleanup_times.sum())/1000.0);
3279 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3280 "(%8.2f s marking).",
3281 cmThread()->vtime_accum(),
3282 cmThread()->vtime_mark_accum());
3283 }
3285 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3286 if (use_parallel_marking_threads()) {
3287 _parallel_workers->print_worker_threads_on(st);
3288 }
3289 }
3291 void ConcurrentMark::print_on_error(outputStream* st) const {
3292 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3293 _prevMarkBitMap, _nextMarkBitMap);
3294 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3295 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3296 }
3298 // We take a break if someone is trying to stop the world.
3299 bool ConcurrentMark::do_yield_check(uint worker_id) {
3300 if (should_yield()) {
3301 if (worker_id == 0) {
3302 _g1h->g1_policy()->record_concurrent_pause();
3303 }
3304 cmThread()->yield();
3305 return true;
3306 } else {
3307 return false;
3308 }
3309 }
3311 bool ConcurrentMark::should_yield() {
3312 return cmThread()->should_yield();
3313 }
3315 bool ConcurrentMark::containing_card_is_marked(void* p) {
3316 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3317 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3318 }
3320 bool ConcurrentMark::containing_cards_are_marked(void* start,
3321 void* last) {
3322 return containing_card_is_marked(start) &&
3323 containing_card_is_marked(last);
3324 }
3326 #ifndef PRODUCT
3327 // for debugging purposes
3328 void ConcurrentMark::print_finger() {
3329 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3330 _heap_start, _heap_end, _finger);
3331 for (uint i = 0; i < _max_worker_id; ++i) {
3332 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3333 }
3334 gclog_or_tty->print_cr("");
3335 }
3336 #endif
3338 void CMTask::scan_object(oop obj) {
3339 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3341 if (_cm->verbose_high()) {
3342 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3343 _worker_id, (void*) obj);
3344 }
3346 size_t obj_size = obj->size();
3347 _words_scanned += obj_size;
3349 obj->oop_iterate(_cm_oop_closure);
3350 statsOnly( ++_objs_scanned );
3351 check_limits();
3352 }
3354 // Closure for iteration over bitmaps
3355 class CMBitMapClosure : public BitMapClosure {
3356 private:
3357 // the bitmap that is being iterated over
3358 CMBitMap* _nextMarkBitMap;
3359 ConcurrentMark* _cm;
3360 CMTask* _task;
3362 public:
3363 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3364 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3366 bool do_bit(size_t offset) {
3367 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3368 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3369 assert( addr < _cm->finger(), "invariant");
3371 statsOnly( _task->increase_objs_found_on_bitmap() );
3372 assert(addr >= _task->finger(), "invariant");
3374 // We move that task's local finger along.
3375 _task->move_finger_to(addr);
3377 _task->scan_object(oop(addr));
3378 // we only partially drain the local queue and global stack
3379 _task->drain_local_queue(true);
3380 _task->drain_global_stack(true);
3382 // if the has_aborted flag has been raised, we need to bail out of
3383 // the iteration
3384 return !_task->has_aborted();
3385 }
3386 };
3388 // Closure for iterating over objects, currently only used for
3389 // processing SATB buffers.
3390 class CMObjectClosure : public ObjectClosure {
3391 private:
3392 CMTask* _task;
3394 public:
3395 void do_object(oop obj) {
3396 _task->deal_with_reference(obj);
3397 }
3399 CMObjectClosure(CMTask* task) : _task(task) { }
3400 };
3402 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3403 ConcurrentMark* cm,
3404 CMTask* task)
3405 : _g1h(g1h), _cm(cm), _task(task) {
3406 assert(_ref_processor == NULL, "should be initialized to NULL");
3408 if (G1UseConcMarkReferenceProcessing) {
3409 _ref_processor = g1h->ref_processor_cm();
3410 assert(_ref_processor != NULL, "should not be NULL");
3411 }
3412 }
3414 void CMTask::setup_for_region(HeapRegion* hr) {
3415 // Separated the asserts so that we know which one fires.
3416 assert(hr != NULL,
3417 "claim_region() should have filtered out continues humongous regions");
3418 assert(!hr->continuesHumongous(),
3419 "claim_region() should have filtered out continues humongous regions");
3421 if (_cm->verbose_low()) {
3422 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3423 _worker_id, hr);
3424 }
3426 _curr_region = hr;
3427 _finger = hr->bottom();
3428 update_region_limit();
3429 }
3431 void CMTask::update_region_limit() {
3432 HeapRegion* hr = _curr_region;
3433 HeapWord* bottom = hr->bottom();
3434 HeapWord* limit = hr->next_top_at_mark_start();
3436 if (limit == bottom) {
3437 if (_cm->verbose_low()) {
3438 gclog_or_tty->print_cr("[%u] found an empty region "
3439 "["PTR_FORMAT", "PTR_FORMAT")",
3440 _worker_id, bottom, limit);
3441 }
3442 // The region was collected underneath our feet.
3443 // We set the finger to bottom to ensure that the bitmap
3444 // iteration that will follow this will not do anything.
3445 // (this is not a condition that holds when we set the region up,
3446 // as the region is not supposed to be empty in the first place)
3447 _finger = bottom;
3448 } else if (limit >= _region_limit) {
3449 assert(limit >= _finger, "peace of mind");
3450 } else {
3451 assert(limit < _region_limit, "only way to get here");
3452 // This can happen under some pretty unusual circumstances. An
3453 // evacuation pause empties the region underneath our feet (NTAMS
3454 // at bottom). We then do some allocation in the region (NTAMS
3455 // stays at bottom), followed by the region being used as a GC
3456 // alloc region (NTAMS will move to top() and the objects
3457 // originally below it will be grayed). All objects now marked in
3458 // the region are explicitly grayed, if below the global finger,
3459 // and we do not need in fact to scan anything else. So, we simply
3460 // set _finger to be limit to ensure that the bitmap iteration
3461 // doesn't do anything.
3462 _finger = limit;
3463 }
3465 _region_limit = limit;
3466 }
3468 void CMTask::giveup_current_region() {
3469 assert(_curr_region != NULL, "invariant");
3470 if (_cm->verbose_low()) {
3471 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3472 _worker_id, _curr_region);
3473 }
3474 clear_region_fields();
3475 }
3477 void CMTask::clear_region_fields() {
3478 // Values for these three fields that indicate that we're not
3479 // holding on to a region.
3480 _curr_region = NULL;
3481 _finger = NULL;
3482 _region_limit = NULL;
3483 }
3485 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3486 if (cm_oop_closure == NULL) {
3487 assert(_cm_oop_closure != NULL, "invariant");
3488 } else {
3489 assert(_cm_oop_closure == NULL, "invariant");
3490 }
3491 _cm_oop_closure = cm_oop_closure;
3492 }
3494 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3495 guarantee(nextMarkBitMap != NULL, "invariant");
3497 if (_cm->verbose_low()) {
3498 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3499 }
3501 _nextMarkBitMap = nextMarkBitMap;
3502 clear_region_fields();
3504 _calls = 0;
3505 _elapsed_time_ms = 0.0;
3506 _termination_time_ms = 0.0;
3507 _termination_start_time_ms = 0.0;
3509 #if _MARKING_STATS_
3510 _local_pushes = 0;
3511 _local_pops = 0;
3512 _local_max_size = 0;
3513 _objs_scanned = 0;
3514 _global_pushes = 0;
3515 _global_pops = 0;
3516 _global_max_size = 0;
3517 _global_transfers_to = 0;
3518 _global_transfers_from = 0;
3519 _regions_claimed = 0;
3520 _objs_found_on_bitmap = 0;
3521 _satb_buffers_processed = 0;
3522 _steal_attempts = 0;
3523 _steals = 0;
3524 _aborted = 0;
3525 _aborted_overflow = 0;
3526 _aborted_cm_aborted = 0;
3527 _aborted_yield = 0;
3528 _aborted_timed_out = 0;
3529 _aborted_satb = 0;
3530 _aborted_termination = 0;
3531 #endif // _MARKING_STATS_
3532 }
3534 bool CMTask::should_exit_termination() {
3535 regular_clock_call();
3536 // This is called when we are in the termination protocol. We should
3537 // quit if, for some reason, this task wants to abort or the global
3538 // stack is not empty (this means that we can get work from it).
3539 return !_cm->mark_stack_empty() || has_aborted();
3540 }
3542 void CMTask::reached_limit() {
3543 assert(_words_scanned >= _words_scanned_limit ||
3544 _refs_reached >= _refs_reached_limit ,
3545 "shouldn't have been called otherwise");
3546 regular_clock_call();
3547 }
3549 void CMTask::regular_clock_call() {
3550 if (has_aborted()) return;
3552 // First, we need to recalculate the words scanned and refs reached
3553 // limits for the next clock call.
3554 recalculate_limits();
3556 // During the regular clock call we do the following
3558 // (1) If an overflow has been flagged, then we abort.
3559 if (_cm->has_overflown()) {
3560 set_has_aborted();
3561 return;
3562 }
3564 // If we are not concurrent (i.e. we're doing remark) we don't need
3565 // to check anything else. The other steps are only needed during
3566 // the concurrent marking phase.
3567 if (!concurrent()) return;
3569 // (2) If marking has been aborted for Full GC, then we also abort.
3570 if (_cm->has_aborted()) {
3571 set_has_aborted();
3572 statsOnly( ++_aborted_cm_aborted );
3573 return;
3574 }
3576 double curr_time_ms = os::elapsedVTime() * 1000.0;
3578 // (3) If marking stats are enabled, then we update the step history.
3579 #if _MARKING_STATS_
3580 if (_words_scanned >= _words_scanned_limit) {
3581 ++_clock_due_to_scanning;
3582 }
3583 if (_refs_reached >= _refs_reached_limit) {
3584 ++_clock_due_to_marking;
3585 }
3587 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3588 _interval_start_time_ms = curr_time_ms;
3589 _all_clock_intervals_ms.add(last_interval_ms);
3591 if (_cm->verbose_medium()) {
3592 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3593 "scanned = %d%s, refs reached = %d%s",
3594 _worker_id, last_interval_ms,
3595 _words_scanned,
3596 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3597 _refs_reached,
3598 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3599 }
3600 #endif // _MARKING_STATS_
3602 // (4) We check whether we should yield. If we have to, then we abort.
3603 if (_cm->should_yield()) {
3604 // We should yield. To do this we abort the task. The caller is
3605 // responsible for yielding.
3606 set_has_aborted();
3607 statsOnly( ++_aborted_yield );
3608 return;
3609 }
3611 // (5) We check whether we've reached our time quota. If we have,
3612 // then we abort.
3613 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3614 if (elapsed_time_ms > _time_target_ms) {
3615 set_has_aborted();
3616 _has_timed_out = true;
3617 statsOnly( ++_aborted_timed_out );
3618 return;
3619 }
3621 // (6) Finally, we check whether there are enough completed STAB
3622 // buffers available for processing. If there are, we abort.
3623 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3624 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3625 if (_cm->verbose_low()) {
3626 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3627 _worker_id);
3628 }
3629 // we do need to process SATB buffers, we'll abort and restart
3630 // the marking task to do so
3631 set_has_aborted();
3632 statsOnly( ++_aborted_satb );
3633 return;
3634 }
3635 }
3637 void CMTask::recalculate_limits() {
3638 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3639 _words_scanned_limit = _real_words_scanned_limit;
3641 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3642 _refs_reached_limit = _real_refs_reached_limit;
3643 }
3645 void CMTask::decrease_limits() {
3646 // This is called when we believe that we're going to do an infrequent
3647 // operation which will increase the per byte scanned cost (i.e. move
3648 // entries to/from the global stack). It basically tries to decrease the
3649 // scanning limit so that the clock is called earlier.
3651 if (_cm->verbose_medium()) {
3652 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3653 }
3655 _words_scanned_limit = _real_words_scanned_limit -
3656 3 * words_scanned_period / 4;
3657 _refs_reached_limit = _real_refs_reached_limit -
3658 3 * refs_reached_period / 4;
3659 }
3661 void CMTask::move_entries_to_global_stack() {
3662 // local array where we'll store the entries that will be popped
3663 // from the local queue
3664 oop buffer[global_stack_transfer_size];
3666 int n = 0;
3667 oop obj;
3668 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3669 buffer[n] = obj;
3670 ++n;
3671 }
3673 if (n > 0) {
3674 // we popped at least one entry from the local queue
3676 statsOnly( ++_global_transfers_to; _local_pops += n );
3678 if (!_cm->mark_stack_push(buffer, n)) {
3679 if (_cm->verbose_low()) {
3680 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3681 _worker_id);
3682 }
3683 set_has_aborted();
3684 } else {
3685 // the transfer was successful
3687 if (_cm->verbose_medium()) {
3688 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3689 _worker_id, n);
3690 }
3691 statsOnly( int tmp_size = _cm->mark_stack_size();
3692 if (tmp_size > _global_max_size) {
3693 _global_max_size = tmp_size;
3694 }
3695 _global_pushes += n );
3696 }
3697 }
3699 // this operation was quite expensive, so decrease the limits
3700 decrease_limits();
3701 }
3703 void CMTask::get_entries_from_global_stack() {
3704 // local array where we'll store the entries that will be popped
3705 // from the global stack.
3706 oop buffer[global_stack_transfer_size];
3707 int n;
3708 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3709 assert(n <= global_stack_transfer_size,
3710 "we should not pop more than the given limit");
3711 if (n > 0) {
3712 // yes, we did actually pop at least one entry
3714 statsOnly( ++_global_transfers_from; _global_pops += n );
3715 if (_cm->verbose_medium()) {
3716 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3717 _worker_id, n);
3718 }
3719 for (int i = 0; i < n; ++i) {
3720 bool success = _task_queue->push(buffer[i]);
3721 // We only call this when the local queue is empty or under a
3722 // given target limit. So, we do not expect this push to fail.
3723 assert(success, "invariant");
3724 }
3726 statsOnly( int tmp_size = _task_queue->size();
3727 if (tmp_size > _local_max_size) {
3728 _local_max_size = tmp_size;
3729 }
3730 _local_pushes += n );
3731 }
3733 // this operation was quite expensive, so decrease the limits
3734 decrease_limits();
3735 }
3737 void CMTask::drain_local_queue(bool partially) {
3738 if (has_aborted()) return;
3740 // Decide what the target size is, depending whether we're going to
3741 // drain it partially (so that other tasks can steal if they run out
3742 // of things to do) or totally (at the very end).
3743 size_t target_size;
3744 if (partially) {
3745 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3746 } else {
3747 target_size = 0;
3748 }
3750 if (_task_queue->size() > target_size) {
3751 if (_cm->verbose_high()) {
3752 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3753 _worker_id, target_size);
3754 }
3756 oop obj;
3757 bool ret = _task_queue->pop_local(obj);
3758 while (ret) {
3759 statsOnly( ++_local_pops );
3761 if (_cm->verbose_high()) {
3762 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3763 (void*) obj);
3764 }
3766 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3767 assert(!_g1h->is_on_master_free_list(
3768 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3770 scan_object(obj);
3772 if (_task_queue->size() <= target_size || has_aborted()) {
3773 ret = false;
3774 } else {
3775 ret = _task_queue->pop_local(obj);
3776 }
3777 }
3779 if (_cm->verbose_high()) {
3780 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3781 _worker_id, _task_queue->size());
3782 }
3783 }
3784 }
3786 void CMTask::drain_global_stack(bool partially) {
3787 if (has_aborted()) return;
3789 // We have a policy to drain the local queue before we attempt to
3790 // drain the global stack.
3791 assert(partially || _task_queue->size() == 0, "invariant");
3793 // Decide what the target size is, depending whether we're going to
3794 // drain it partially (so that other tasks can steal if they run out
3795 // of things to do) or totally (at the very end). Notice that,
3796 // because we move entries from the global stack in chunks or
3797 // because another task might be doing the same, we might in fact
3798 // drop below the target. But, this is not a problem.
3799 size_t target_size;
3800 if (partially) {
3801 target_size = _cm->partial_mark_stack_size_target();
3802 } else {
3803 target_size = 0;
3804 }
3806 if (_cm->mark_stack_size() > target_size) {
3807 if (_cm->verbose_low()) {
3808 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3809 _worker_id, target_size);
3810 }
3812 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3813 get_entries_from_global_stack();
3814 drain_local_queue(partially);
3815 }
3817 if (_cm->verbose_low()) {
3818 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3819 _worker_id, _cm->mark_stack_size());
3820 }
3821 }
3822 }
3824 // SATB Queue has several assumptions on whether to call the par or
3825 // non-par versions of the methods. this is why some of the code is
3826 // replicated. We should really get rid of the single-threaded version
3827 // of the code to simplify things.
3828 void CMTask::drain_satb_buffers() {
3829 if (has_aborted()) return;
3831 // We set this so that the regular clock knows that we're in the
3832 // middle of draining buffers and doesn't set the abort flag when it
3833 // notices that SATB buffers are available for draining. It'd be
3834 // very counter productive if it did that. :-)
3835 _draining_satb_buffers = true;
3837 CMObjectClosure oc(this);
3838 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3839 if (G1CollectedHeap::use_parallel_gc_threads()) {
3840 satb_mq_set.set_par_closure(_worker_id, &oc);
3841 } else {
3842 satb_mq_set.set_closure(&oc);
3843 }
3845 // This keeps claiming and applying the closure to completed buffers
3846 // until we run out of buffers or we need to abort.
3847 if (G1CollectedHeap::use_parallel_gc_threads()) {
3848 while (!has_aborted() &&
3849 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3850 if (_cm->verbose_medium()) {
3851 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3852 }
3853 statsOnly( ++_satb_buffers_processed );
3854 regular_clock_call();
3855 }
3856 } else {
3857 while (!has_aborted() &&
3858 satb_mq_set.apply_closure_to_completed_buffer()) {
3859 if (_cm->verbose_medium()) {
3860 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3861 }
3862 statsOnly( ++_satb_buffers_processed );
3863 regular_clock_call();
3864 }
3865 }
3867 if (!concurrent() && !has_aborted()) {
3868 // We should only do this during remark.
3869 if (G1CollectedHeap::use_parallel_gc_threads()) {
3870 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3871 } else {
3872 satb_mq_set.iterate_closure_all_threads();
3873 }
3874 }
3876 _draining_satb_buffers = false;
3878 assert(has_aborted() ||
3879 concurrent() ||
3880 satb_mq_set.completed_buffers_num() == 0, "invariant");
3882 if (G1CollectedHeap::use_parallel_gc_threads()) {
3883 satb_mq_set.set_par_closure(_worker_id, NULL);
3884 } else {
3885 satb_mq_set.set_closure(NULL);
3886 }
3888 // again, this was a potentially expensive operation, decrease the
3889 // limits to get the regular clock call early
3890 decrease_limits();
3891 }
3893 void CMTask::print_stats() {
3894 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3895 _worker_id, _calls);
3896 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3897 _elapsed_time_ms, _termination_time_ms);
3898 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3899 _step_times_ms.num(), _step_times_ms.avg(),
3900 _step_times_ms.sd());
3901 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3902 _step_times_ms.maximum(), _step_times_ms.sum());
3904 #if _MARKING_STATS_
3905 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3906 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3907 _all_clock_intervals_ms.sd());
3908 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3909 _all_clock_intervals_ms.maximum(),
3910 _all_clock_intervals_ms.sum());
3911 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3912 _clock_due_to_scanning, _clock_due_to_marking);
3913 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3914 _objs_scanned, _objs_found_on_bitmap);
3915 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3916 _local_pushes, _local_pops, _local_max_size);
3917 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3918 _global_pushes, _global_pops, _global_max_size);
3919 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3920 _global_transfers_to,_global_transfers_from);
3921 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3922 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3923 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3924 _steal_attempts, _steals);
3925 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3926 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3927 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3928 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3929 _aborted_timed_out, _aborted_satb, _aborted_termination);
3930 #endif // _MARKING_STATS_
3931 }
3933 /*****************************************************************************
3935 The do_marking_step(time_target_ms, ...) method is the building
3936 block of the parallel marking framework. It can be called in parallel
3937 with other invocations of do_marking_step() on different tasks
3938 (but only one per task, obviously) and concurrently with the
3939 mutator threads, or during remark, hence it eliminates the need
3940 for two versions of the code. When called during remark, it will
3941 pick up from where the task left off during the concurrent marking
3942 phase. Interestingly, tasks are also claimable during evacuation
3943 pauses too, since do_marking_step() ensures that it aborts before
3944 it needs to yield.
3946 The data structures that it uses to do marking work are the
3947 following:
3949 (1) Marking Bitmap. If there are gray objects that appear only
3950 on the bitmap (this happens either when dealing with an overflow
3951 or when the initial marking phase has simply marked the roots
3952 and didn't push them on the stack), then tasks claim heap
3953 regions whose bitmap they then scan to find gray objects. A
3954 global finger indicates where the end of the last claimed region
3955 is. A local finger indicates how far into the region a task has
3956 scanned. The two fingers are used to determine how to gray an
3957 object (i.e. whether simply marking it is OK, as it will be
3958 visited by a task in the future, or whether it needs to be also
3959 pushed on a stack).
3961 (2) Local Queue. The local queue of the task which is accessed
3962 reasonably efficiently by the task. Other tasks can steal from
3963 it when they run out of work. Throughout the marking phase, a
3964 task attempts to keep its local queue short but not totally
3965 empty, so that entries are available for stealing by other
3966 tasks. Only when there is no more work, a task will totally
3967 drain its local queue.
3969 (3) Global Mark Stack. This handles local queue overflow. During
3970 marking only sets of entries are moved between it and the local
3971 queues, as access to it requires a mutex and more fine-grain
3972 interaction with it which might cause contention. If it
3973 overflows, then the marking phase should restart and iterate
3974 over the bitmap to identify gray objects. Throughout the marking
3975 phase, tasks attempt to keep the global mark stack at a small
3976 length but not totally empty, so that entries are available for
3977 popping by other tasks. Only when there is no more work, tasks
3978 will totally drain the global mark stack.
3980 (4) SATB Buffer Queue. This is where completed SATB buffers are
3981 made available. Buffers are regularly removed from this queue
3982 and scanned for roots, so that the queue doesn't get too
3983 long. During remark, all completed buffers are processed, as
3984 well as the filled in parts of any uncompleted buffers.
3986 The do_marking_step() method tries to abort when the time target
3987 has been reached. There are a few other cases when the
3988 do_marking_step() method also aborts:
3990 (1) When the marking phase has been aborted (after a Full GC).
3992 (2) When a global overflow (on the global stack) has been
3993 triggered. Before the task aborts, it will actually sync up with
3994 the other tasks to ensure that all the marking data structures
3995 (local queues, stacks, fingers etc.) are re-initialized so that
3996 when do_marking_step() completes, the marking phase can
3997 immediately restart.
3999 (3) When enough completed SATB buffers are available. The
4000 do_marking_step() method only tries to drain SATB buffers right
4001 at the beginning. So, if enough buffers are available, the
4002 marking step aborts and the SATB buffers are processed at
4003 the beginning of the next invocation.
4005 (4) To yield. when we have to yield then we abort and yield
4006 right at the end of do_marking_step(). This saves us from a lot
4007 of hassle as, by yielding we might allow a Full GC. If this
4008 happens then objects will be compacted underneath our feet, the
4009 heap might shrink, etc. We save checking for this by just
4010 aborting and doing the yield right at the end.
4012 From the above it follows that the do_marking_step() method should
4013 be called in a loop (or, otherwise, regularly) until it completes.
4015 If a marking step completes without its has_aborted() flag being
4016 true, it means it has completed the current marking phase (and
4017 also all other marking tasks have done so and have all synced up).
4019 A method called regular_clock_call() is invoked "regularly" (in
4020 sub ms intervals) throughout marking. It is this clock method that
4021 checks all the abort conditions which were mentioned above and
4022 decides when the task should abort. A work-based scheme is used to
4023 trigger this clock method: when the number of object words the
4024 marking phase has scanned or the number of references the marking
4025 phase has visited reach a given limit. Additional invocations to
4026 the method clock have been planted in a few other strategic places
4027 too. The initial reason for the clock method was to avoid calling
4028 vtime too regularly, as it is quite expensive. So, once it was in
4029 place, it was natural to piggy-back all the other conditions on it
4030 too and not constantly check them throughout the code.
4032 If do_termination is true then do_marking_step will enter its
4033 termination protocol.
4035 The value of is_serial must be true when do_marking_step is being
4036 called serially (i.e. by the VMThread) and do_marking_step should
4037 skip any synchronization in the termination and overflow code.
4038 Examples include the serial remark code and the serial reference
4039 processing closures.
4041 The value of is_serial must be false when do_marking_step is
4042 being called by any of the worker threads in a work gang.
4043 Examples include the concurrent marking code (CMMarkingTask),
4044 the MT remark code, and the MT reference processing closures.
4046 *****************************************************************************/
4048 void CMTask::do_marking_step(double time_target_ms,
4049 bool do_termination,
4050 bool is_serial) {
4051 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4052 assert(concurrent() == _cm->concurrent(), "they should be the same");
4054 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4055 assert(_task_queues != NULL, "invariant");
4056 assert(_task_queue != NULL, "invariant");
4057 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4059 assert(!_claimed,
4060 "only one thread should claim this task at any one time");
4062 // OK, this doesn't safeguard again all possible scenarios, as it is
4063 // possible for two threads to set the _claimed flag at the same
4064 // time. But it is only for debugging purposes anyway and it will
4065 // catch most problems.
4066 _claimed = true;
4068 _start_time_ms = os::elapsedVTime() * 1000.0;
4069 statsOnly( _interval_start_time_ms = _start_time_ms );
4071 // If do_stealing is true then do_marking_step will attempt to
4072 // steal work from the other CMTasks. It only makes sense to
4073 // enable stealing when the termination protocol is enabled
4074 // and do_marking_step() is not being called serially.
4075 bool do_stealing = do_termination && !is_serial;
4077 double diff_prediction_ms =
4078 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4079 _time_target_ms = time_target_ms - diff_prediction_ms;
4081 // set up the variables that are used in the work-based scheme to
4082 // call the regular clock method
4083 _words_scanned = 0;
4084 _refs_reached = 0;
4085 recalculate_limits();
4087 // clear all flags
4088 clear_has_aborted();
4089 _has_timed_out = false;
4090 _draining_satb_buffers = false;
4092 ++_calls;
4094 if (_cm->verbose_low()) {
4095 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4096 "target = %1.2lfms >>>>>>>>>>",
4097 _worker_id, _calls, _time_target_ms);
4098 }
4100 // Set up the bitmap and oop closures. Anything that uses them is
4101 // eventually called from this method, so it is OK to allocate these
4102 // statically.
4103 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4104 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4105 set_cm_oop_closure(&cm_oop_closure);
4107 if (_cm->has_overflown()) {
4108 // This can happen if the mark stack overflows during a GC pause
4109 // and this task, after a yield point, restarts. We have to abort
4110 // as we need to get into the overflow protocol which happens
4111 // right at the end of this task.
4112 set_has_aborted();
4113 }
4115 // First drain any available SATB buffers. After this, we will not
4116 // look at SATB buffers before the next invocation of this method.
4117 // If enough completed SATB buffers are queued up, the regular clock
4118 // will abort this task so that it restarts.
4119 drain_satb_buffers();
4120 // ...then partially drain the local queue and the global stack
4121 drain_local_queue(true);
4122 drain_global_stack(true);
4124 do {
4125 if (!has_aborted() && _curr_region != NULL) {
4126 // This means that we're already holding on to a region.
4127 assert(_finger != NULL, "if region is not NULL, then the finger "
4128 "should not be NULL either");
4130 // We might have restarted this task after an evacuation pause
4131 // which might have evacuated the region we're holding on to
4132 // underneath our feet. Let's read its limit again to make sure
4133 // that we do not iterate over a region of the heap that
4134 // contains garbage (update_region_limit() will also move
4135 // _finger to the start of the region if it is found empty).
4136 update_region_limit();
4137 // We will start from _finger not from the start of the region,
4138 // as we might be restarting this task after aborting half-way
4139 // through scanning this region. In this case, _finger points to
4140 // the address where we last found a marked object. If this is a
4141 // fresh region, _finger points to start().
4142 MemRegion mr = MemRegion(_finger, _region_limit);
4144 if (_cm->verbose_low()) {
4145 gclog_or_tty->print_cr("[%u] we're scanning part "
4146 "["PTR_FORMAT", "PTR_FORMAT") "
4147 "of region "HR_FORMAT,
4148 _worker_id, _finger, _region_limit,
4149 HR_FORMAT_PARAMS(_curr_region));
4150 }
4152 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4153 "humongous regions should go around loop once only");
4155 // Some special cases:
4156 // If the memory region is empty, we can just give up the region.
4157 // If the current region is humongous then we only need to check
4158 // the bitmap for the bit associated with the start of the object,
4159 // scan the object if it's live, and give up the region.
4160 // Otherwise, let's iterate over the bitmap of the part of the region
4161 // that is left.
4162 // If the iteration is successful, give up the region.
4163 if (mr.is_empty()) {
4164 giveup_current_region();
4165 regular_clock_call();
4166 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4167 if (_nextMarkBitMap->isMarked(mr.start())) {
4168 // The object is marked - apply the closure
4169 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4170 bitmap_closure.do_bit(offset);
4171 }
4172 // Even if this task aborted while scanning the humongous object
4173 // we can (and should) give up the current region.
4174 giveup_current_region();
4175 regular_clock_call();
4176 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4177 giveup_current_region();
4178 regular_clock_call();
4179 } else {
4180 assert(has_aborted(), "currently the only way to do so");
4181 // The only way to abort the bitmap iteration is to return
4182 // false from the do_bit() method. However, inside the
4183 // do_bit() method we move the _finger to point to the
4184 // object currently being looked at. So, if we bail out, we
4185 // have definitely set _finger to something non-null.
4186 assert(_finger != NULL, "invariant");
4188 // Region iteration was actually aborted. So now _finger
4189 // points to the address of the object we last scanned. If we
4190 // leave it there, when we restart this task, we will rescan
4191 // the object. It is easy to avoid this. We move the finger by
4192 // enough to point to the next possible object header (the
4193 // bitmap knows by how much we need to move it as it knows its
4194 // granularity).
4195 assert(_finger < _region_limit, "invariant");
4196 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4197 // Check if bitmap iteration was aborted while scanning the last object
4198 if (new_finger >= _region_limit) {
4199 giveup_current_region();
4200 } else {
4201 move_finger_to(new_finger);
4202 }
4203 }
4204 }
4205 // At this point we have either completed iterating over the
4206 // region we were holding on to, or we have aborted.
4208 // We then partially drain the local queue and the global stack.
4209 // (Do we really need this?)
4210 drain_local_queue(true);
4211 drain_global_stack(true);
4213 // Read the note on the claim_region() method on why it might
4214 // return NULL with potentially more regions available for
4215 // claiming and why we have to check out_of_regions() to determine
4216 // whether we're done or not.
4217 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4218 // We are going to try to claim a new region. We should have
4219 // given up on the previous one.
4220 // Separated the asserts so that we know which one fires.
4221 assert(_curr_region == NULL, "invariant");
4222 assert(_finger == NULL, "invariant");
4223 assert(_region_limit == NULL, "invariant");
4224 if (_cm->verbose_low()) {
4225 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4226 }
4227 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4228 if (claimed_region != NULL) {
4229 // Yes, we managed to claim one
4230 statsOnly( ++_regions_claimed );
4232 if (_cm->verbose_low()) {
4233 gclog_or_tty->print_cr("[%u] we successfully claimed "
4234 "region "PTR_FORMAT,
4235 _worker_id, claimed_region);
4236 }
4238 setup_for_region(claimed_region);
4239 assert(_curr_region == claimed_region, "invariant");
4240 }
4241 // It is important to call the regular clock here. It might take
4242 // a while to claim a region if, for example, we hit a large
4243 // block of empty regions. So we need to call the regular clock
4244 // method once round the loop to make sure it's called
4245 // frequently enough.
4246 regular_clock_call();
4247 }
4249 if (!has_aborted() && _curr_region == NULL) {
4250 assert(_cm->out_of_regions(),
4251 "at this point we should be out of regions");
4252 }
4253 } while ( _curr_region != NULL && !has_aborted());
4255 if (!has_aborted()) {
4256 // We cannot check whether the global stack is empty, since other
4257 // tasks might be pushing objects to it concurrently.
4258 assert(_cm->out_of_regions(),
4259 "at this point we should be out of regions");
4261 if (_cm->verbose_low()) {
4262 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4263 }
4265 // Try to reduce the number of available SATB buffers so that
4266 // remark has less work to do.
4267 drain_satb_buffers();
4268 }
4270 // Since we've done everything else, we can now totally drain the
4271 // local queue and global stack.
4272 drain_local_queue(false);
4273 drain_global_stack(false);
4275 // Attempt at work stealing from other task's queues.
4276 if (do_stealing && !has_aborted()) {
4277 // We have not aborted. This means that we have finished all that
4278 // we could. Let's try to do some stealing...
4280 // We cannot check whether the global stack is empty, since other
4281 // tasks might be pushing objects to it concurrently.
4282 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4283 "only way to reach here");
4285 if (_cm->verbose_low()) {
4286 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4287 }
4289 while (!has_aborted()) {
4290 oop obj;
4291 statsOnly( ++_steal_attempts );
4293 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4294 if (_cm->verbose_medium()) {
4295 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4296 _worker_id, (void*) obj);
4297 }
4299 statsOnly( ++_steals );
4301 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4302 "any stolen object should be marked");
4303 scan_object(obj);
4305 // And since we're towards the end, let's totally drain the
4306 // local queue and global stack.
4307 drain_local_queue(false);
4308 drain_global_stack(false);
4309 } else {
4310 break;
4311 }
4312 }
4313 }
4315 // If we are about to wrap up and go into termination, check if we
4316 // should raise the overflow flag.
4317 if (do_termination && !has_aborted()) {
4318 if (_cm->force_overflow()->should_force()) {
4319 _cm->set_has_overflown();
4320 regular_clock_call();
4321 }
4322 }
4324 // We still haven't aborted. Now, let's try to get into the
4325 // termination protocol.
4326 if (do_termination && !has_aborted()) {
4327 // We cannot check whether the global stack is empty, since other
4328 // tasks might be concurrently pushing objects on it.
4329 // Separated the asserts so that we know which one fires.
4330 assert(_cm->out_of_regions(), "only way to reach here");
4331 assert(_task_queue->size() == 0, "only way to reach here");
4333 if (_cm->verbose_low()) {
4334 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4335 }
4337 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4339 // The CMTask class also extends the TerminatorTerminator class,
4340 // hence its should_exit_termination() method will also decide
4341 // whether to exit the termination protocol or not.
4342 bool finished = (is_serial ||
4343 _cm->terminator()->offer_termination(this));
4344 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4345 _termination_time_ms +=
4346 termination_end_time_ms - _termination_start_time_ms;
4348 if (finished) {
4349 // We're all done.
4351 if (_worker_id == 0) {
4352 // let's allow task 0 to do this
4353 if (concurrent()) {
4354 assert(_cm->concurrent_marking_in_progress(), "invariant");
4355 // we need to set this to false before the next
4356 // safepoint. This way we ensure that the marking phase
4357 // doesn't observe any more heap expansions.
4358 _cm->clear_concurrent_marking_in_progress();
4359 }
4360 }
4362 // We can now guarantee that the global stack is empty, since
4363 // all other tasks have finished. We separated the guarantees so
4364 // that, if a condition is false, we can immediately find out
4365 // which one.
4366 guarantee(_cm->out_of_regions(), "only way to reach here");
4367 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4368 guarantee(_task_queue->size() == 0, "only way to reach here");
4369 guarantee(!_cm->has_overflown(), "only way to reach here");
4370 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4372 if (_cm->verbose_low()) {
4373 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4374 }
4375 } else {
4376 // Apparently there's more work to do. Let's abort this task. It
4377 // will restart it and we can hopefully find more things to do.
4379 if (_cm->verbose_low()) {
4380 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4381 _worker_id);
4382 }
4384 set_has_aborted();
4385 statsOnly( ++_aborted_termination );
4386 }
4387 }
4389 // Mainly for debugging purposes to make sure that a pointer to the
4390 // closure which was statically allocated in this frame doesn't
4391 // escape it by accident.
4392 set_cm_oop_closure(NULL);
4393 double end_time_ms = os::elapsedVTime() * 1000.0;
4394 double elapsed_time_ms = end_time_ms - _start_time_ms;
4395 // Update the step history.
4396 _step_times_ms.add(elapsed_time_ms);
4398 if (has_aborted()) {
4399 // The task was aborted for some reason.
4401 statsOnly( ++_aborted );
4403 if (_has_timed_out) {
4404 double diff_ms = elapsed_time_ms - _time_target_ms;
4405 // Keep statistics of how well we did with respect to hitting
4406 // our target only if we actually timed out (if we aborted for
4407 // other reasons, then the results might get skewed).
4408 _marking_step_diffs_ms.add(diff_ms);
4409 }
4411 if (_cm->has_overflown()) {
4412 // This is the interesting one. We aborted because a global
4413 // overflow was raised. This means we have to restart the
4414 // marking phase and start iterating over regions. However, in
4415 // order to do this we have to make sure that all tasks stop
4416 // what they are doing and re-initialise in a safe manner. We
4417 // will achieve this with the use of two barrier sync points.
4419 if (_cm->verbose_low()) {
4420 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4421 }
4423 if (!is_serial) {
4424 // We only need to enter the sync barrier if being called
4425 // from a parallel context
4426 _cm->enter_first_sync_barrier(_worker_id);
4428 // When we exit this sync barrier we know that all tasks have
4429 // stopped doing marking work. So, it's now safe to
4430 // re-initialise our data structures. At the end of this method,
4431 // task 0 will clear the global data structures.
4432 }
4434 statsOnly( ++_aborted_overflow );
4436 // We clear the local state of this task...
4437 clear_region_fields();
4439 if (!is_serial) {
4440 // ...and enter the second barrier.
4441 _cm->enter_second_sync_barrier(_worker_id);
4442 }
4443 // At this point, if we're during the concurrent phase of
4444 // marking, everything has been re-initialized and we're
4445 // ready to restart.
4446 }
4448 if (_cm->verbose_low()) {
4449 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4450 "elapsed = %1.2lfms <<<<<<<<<<",
4451 _worker_id, _time_target_ms, elapsed_time_ms);
4452 if (_cm->has_aborted()) {
4453 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4454 _worker_id);
4455 }
4456 }
4457 } else {
4458 if (_cm->verbose_low()) {
4459 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4460 "elapsed = %1.2lfms <<<<<<<<<<",
4461 _worker_id, _time_target_ms, elapsed_time_ms);
4462 }
4463 }
4465 _claimed = false;
4466 }
4468 CMTask::CMTask(uint worker_id,
4469 ConcurrentMark* cm,
4470 size_t* marked_bytes,
4471 BitMap* card_bm,
4472 CMTaskQueue* task_queue,
4473 CMTaskQueueSet* task_queues)
4474 : _g1h(G1CollectedHeap::heap()),
4475 _worker_id(worker_id), _cm(cm),
4476 _claimed(false),
4477 _nextMarkBitMap(NULL), _hash_seed(17),
4478 _task_queue(task_queue),
4479 _task_queues(task_queues),
4480 _cm_oop_closure(NULL),
4481 _marked_bytes_array(marked_bytes),
4482 _card_bm(card_bm) {
4483 guarantee(task_queue != NULL, "invariant");
4484 guarantee(task_queues != NULL, "invariant");
4486 statsOnly( _clock_due_to_scanning = 0;
4487 _clock_due_to_marking = 0 );
4489 _marking_step_diffs_ms.add(0.5);
4490 }
4492 // These are formatting macros that are used below to ensure
4493 // consistent formatting. The *_H_* versions are used to format the
4494 // header for a particular value and they should be kept consistent
4495 // with the corresponding macro. Also note that most of the macros add
4496 // the necessary white space (as a prefix) which makes them a bit
4497 // easier to compose.
4499 // All the output lines are prefixed with this string to be able to
4500 // identify them easily in a large log file.
4501 #define G1PPRL_LINE_PREFIX "###"
4503 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4504 #ifdef _LP64
4505 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4506 #else // _LP64
4507 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4508 #endif // _LP64
4510 // For per-region info
4511 #define G1PPRL_TYPE_FORMAT " %-4s"
4512 #define G1PPRL_TYPE_H_FORMAT " %4s"
4513 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4514 #define G1PPRL_BYTE_H_FORMAT " %9s"
4515 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4516 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4518 // For summary info
4519 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4520 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4521 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4522 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4524 G1PrintRegionLivenessInfoClosure::
4525 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4526 : _out(out),
4527 _total_used_bytes(0), _total_capacity_bytes(0),
4528 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4529 _hum_used_bytes(0), _hum_capacity_bytes(0),
4530 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4531 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4532 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4533 MemRegion g1_committed = g1h->g1_committed();
4534 MemRegion g1_reserved = g1h->g1_reserved();
4535 double now = os::elapsedTime();
4537 // Print the header of the output.
4538 _out->cr();
4539 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4540 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4541 G1PPRL_SUM_ADDR_FORMAT("committed")
4542 G1PPRL_SUM_ADDR_FORMAT("reserved")
4543 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4544 g1_committed.start(), g1_committed.end(),
4545 g1_reserved.start(), g1_reserved.end(),
4546 HeapRegion::GrainBytes);
4547 _out->print_cr(G1PPRL_LINE_PREFIX);
4548 _out->print_cr(G1PPRL_LINE_PREFIX
4549 G1PPRL_TYPE_H_FORMAT
4550 G1PPRL_ADDR_BASE_H_FORMAT
4551 G1PPRL_BYTE_H_FORMAT
4552 G1PPRL_BYTE_H_FORMAT
4553 G1PPRL_BYTE_H_FORMAT
4554 G1PPRL_DOUBLE_H_FORMAT
4555 G1PPRL_BYTE_H_FORMAT
4556 G1PPRL_BYTE_H_FORMAT,
4557 "type", "address-range",
4558 "used", "prev-live", "next-live", "gc-eff",
4559 "remset", "code-roots");
4560 _out->print_cr(G1PPRL_LINE_PREFIX
4561 G1PPRL_TYPE_H_FORMAT
4562 G1PPRL_ADDR_BASE_H_FORMAT
4563 G1PPRL_BYTE_H_FORMAT
4564 G1PPRL_BYTE_H_FORMAT
4565 G1PPRL_BYTE_H_FORMAT
4566 G1PPRL_DOUBLE_H_FORMAT
4567 G1PPRL_BYTE_H_FORMAT
4568 G1PPRL_BYTE_H_FORMAT,
4569 "", "",
4570 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4571 "(bytes)", "(bytes)");
4572 }
4574 // It takes as a parameter a reference to one of the _hum_* fields, it
4575 // deduces the corresponding value for a region in a humongous region
4576 // series (either the region size, or what's left if the _hum_* field
4577 // is < the region size), and updates the _hum_* field accordingly.
4578 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4579 size_t bytes = 0;
4580 // The > 0 check is to deal with the prev and next live bytes which
4581 // could be 0.
4582 if (*hum_bytes > 0) {
4583 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4584 *hum_bytes -= bytes;
4585 }
4586 return bytes;
4587 }
4589 // It deduces the values for a region in a humongous region series
4590 // from the _hum_* fields and updates those accordingly. It assumes
4591 // that that _hum_* fields have already been set up from the "starts
4592 // humongous" region and we visit the regions in address order.
4593 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4594 size_t* capacity_bytes,
4595 size_t* prev_live_bytes,
4596 size_t* next_live_bytes) {
4597 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4598 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4599 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4600 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4601 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4602 }
4604 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4605 const char* type = "";
4606 HeapWord* bottom = r->bottom();
4607 HeapWord* end = r->end();
4608 size_t capacity_bytes = r->capacity();
4609 size_t used_bytes = r->used();
4610 size_t prev_live_bytes = r->live_bytes();
4611 size_t next_live_bytes = r->next_live_bytes();
4612 double gc_eff = r->gc_efficiency();
4613 size_t remset_bytes = r->rem_set()->mem_size();
4614 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4616 if (r->used() == 0) {
4617 type = "FREE";
4618 } else if (r->is_survivor()) {
4619 type = "SURV";
4620 } else if (r->is_young()) {
4621 type = "EDEN";
4622 } else if (r->startsHumongous()) {
4623 type = "HUMS";
4625 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4626 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4627 "they should have been zeroed after the last time we used them");
4628 // Set up the _hum_* fields.
4629 _hum_capacity_bytes = capacity_bytes;
4630 _hum_used_bytes = used_bytes;
4631 _hum_prev_live_bytes = prev_live_bytes;
4632 _hum_next_live_bytes = next_live_bytes;
4633 get_hum_bytes(&used_bytes, &capacity_bytes,
4634 &prev_live_bytes, &next_live_bytes);
4635 end = bottom + HeapRegion::GrainWords;
4636 } else if (r->continuesHumongous()) {
4637 type = "HUMC";
4638 get_hum_bytes(&used_bytes, &capacity_bytes,
4639 &prev_live_bytes, &next_live_bytes);
4640 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4641 } else {
4642 type = "OLD";
4643 }
4645 _total_used_bytes += used_bytes;
4646 _total_capacity_bytes += capacity_bytes;
4647 _total_prev_live_bytes += prev_live_bytes;
4648 _total_next_live_bytes += next_live_bytes;
4649 _total_remset_bytes += remset_bytes;
4650 _total_strong_code_roots_bytes += strong_code_roots_bytes;
4652 // Print a line for this particular region.
4653 _out->print_cr(G1PPRL_LINE_PREFIX
4654 G1PPRL_TYPE_FORMAT
4655 G1PPRL_ADDR_BASE_FORMAT
4656 G1PPRL_BYTE_FORMAT
4657 G1PPRL_BYTE_FORMAT
4658 G1PPRL_BYTE_FORMAT
4659 G1PPRL_DOUBLE_FORMAT
4660 G1PPRL_BYTE_FORMAT
4661 G1PPRL_BYTE_FORMAT,
4662 type, bottom, end,
4663 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4664 remset_bytes, strong_code_roots_bytes);
4666 return false;
4667 }
4669 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4670 // add static memory usages to remembered set sizes
4671 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4672 // Print the footer of the output.
4673 _out->print_cr(G1PPRL_LINE_PREFIX);
4674 _out->print_cr(G1PPRL_LINE_PREFIX
4675 " SUMMARY"
4676 G1PPRL_SUM_MB_FORMAT("capacity")
4677 G1PPRL_SUM_MB_PERC_FORMAT("used")
4678 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4679 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4680 G1PPRL_SUM_MB_FORMAT("remset")
4681 G1PPRL_SUM_MB_FORMAT("code-roots"),
4682 bytes_to_mb(_total_capacity_bytes),
4683 bytes_to_mb(_total_used_bytes),
4684 perc(_total_used_bytes, _total_capacity_bytes),
4685 bytes_to_mb(_total_prev_live_bytes),
4686 perc(_total_prev_live_bytes, _total_capacity_bytes),
4687 bytes_to_mb(_total_next_live_bytes),
4688 perc(_total_next_live_bytes, _total_capacity_bytes),
4689 bytes_to_mb(_total_remset_bytes),
4690 bytes_to_mb(_total_strong_code_roots_bytes));
4691 _out->cr();
4692 }