Thu, 28 Mar 2013 10:27:28 +0100
7014552: gc/lock/jni/jnilockXXX works too slow on 1-processor machine
Summary: Keep a counter of how many times we were stalled by the GC locker, add a diagnostic flag which sets the limit.
Reviewed-by: brutisso, ehelin, johnc
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1Log.hpp"
33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 #include "gc_implementation/g1/g1RemSet.hpp"
35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 #include "memory/genOopClosures.inline.hpp"
40 #include "memory/referencePolicy.hpp"
41 #include "memory/resourceArea.hpp"
42 #include "oops/oop.inline.hpp"
43 #include "runtime/handles.inline.hpp"
44 #include "runtime/java.hpp"
45 #include "services/memTracker.hpp"
47 // Concurrent marking bit map wrapper
49 CMBitMapRO::CMBitMapRO(int shifter) :
50 _bm(),
51 _shifter(shifter) {
52 _bmStartWord = 0;
53 _bmWordSize = 0;
54 }
56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
57 HeapWord* limit) const {
58 // First we must round addr *up* to a possible object boundary.
59 addr = (HeapWord*)align_size_up((intptr_t)addr,
60 HeapWordSize << _shifter);
61 size_t addrOffset = heapWordToOffset(addr);
62 if (limit == NULL) {
63 limit = _bmStartWord + _bmWordSize;
64 }
65 size_t limitOffset = heapWordToOffset(limit);
66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
67 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
68 assert(nextAddr >= addr, "get_next_one postcondition");
69 assert(nextAddr == limit || isMarked(nextAddr),
70 "get_next_one postcondition");
71 return nextAddr;
72 }
74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
75 HeapWord* limit) const {
76 size_t addrOffset = heapWordToOffset(addr);
77 if (limit == NULL) {
78 limit = _bmStartWord + _bmWordSize;
79 }
80 size_t limitOffset = heapWordToOffset(limit);
81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
82 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
83 assert(nextAddr >= addr, "get_next_one postcondition");
84 assert(nextAddr == limit || !isMarked(nextAddr),
85 "get_next_one postcondition");
86 return nextAddr;
87 }
89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
91 return (int) (diff >> _shifter);
92 }
94 #ifndef PRODUCT
95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
98 "size inconsistency");
99 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
100 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
101 }
102 #endif
104 bool CMBitMap::allocate(ReservedSpace heap_rs) {
105 _bmStartWord = (HeapWord*)(heap_rs.base());
106 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
107 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
108 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
109 if (!brs.is_reserved()) {
110 warning("ConcurrentMark marking bit map allocation failure");
111 return false;
112 }
113 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
114 // For now we'll just commit all of the bit map up front.
115 // Later on we'll try to be more parsimonious with swap.
116 if (!_virtual_space.initialize(brs, brs.size())) {
117 warning("ConcurrentMark marking bit map backing store failure");
118 return false;
119 }
120 assert(_virtual_space.committed_size() == brs.size(),
121 "didn't reserve backing store for all of concurrent marking bit map?");
122 _bm.set_map((uintptr_t*)_virtual_space.low());
123 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
124 _bmWordSize, "inconsistency in bit map sizing");
125 _bm.set_size(_bmWordSize >> _shifter);
126 return true;
127 }
129 void CMBitMap::clearAll() {
130 _bm.clear();
131 return;
132 }
134 void CMBitMap::markRange(MemRegion mr) {
135 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
136 assert(!mr.is_empty(), "unexpected empty region");
137 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
138 ((HeapWord *) mr.end())),
139 "markRange memory region end is not card aligned");
140 // convert address range into offset range
141 _bm.at_put_range(heapWordToOffset(mr.start()),
142 heapWordToOffset(mr.end()), true);
143 }
145 void CMBitMap::clearRange(MemRegion mr) {
146 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
147 assert(!mr.is_empty(), "unexpected empty region");
148 // convert address range into offset range
149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 heapWordToOffset(mr.end()), false);
151 }
153 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
154 HeapWord* end_addr) {
155 HeapWord* start = getNextMarkedWordAddress(addr);
156 start = MIN2(start, end_addr);
157 HeapWord* end = getNextUnmarkedWordAddress(start);
158 end = MIN2(end, end_addr);
159 assert(start <= end, "Consistency check");
160 MemRegion mr(start, end);
161 if (!mr.is_empty()) {
162 clearRange(mr);
163 }
164 return mr;
165 }
167 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
168 _base(NULL), _cm(cm)
169 #ifdef ASSERT
170 , _drain_in_progress(false)
171 , _drain_in_progress_yields(false)
172 #endif
173 {}
175 bool CMMarkStack::allocate(size_t capacity) {
176 // allocate a stack of the requisite depth
177 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
178 if (!rs.is_reserved()) {
179 warning("ConcurrentMark MarkStack allocation failure");
180 return false;
181 }
182 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
183 if (!_virtual_space.initialize(rs, rs.size())) {
184 warning("ConcurrentMark MarkStack backing store failure");
185 // Release the virtual memory reserved for the marking stack
186 rs.release();
187 return false;
188 }
189 assert(_virtual_space.committed_size() == rs.size(),
190 "Didn't reserve backing store for all of ConcurrentMark stack?");
191 _base = (oop*) _virtual_space.low();
192 setEmpty();
193 _capacity = (jint) capacity;
194 _saved_index = -1;
195 _should_expand = false;
196 NOT_PRODUCT(_max_depth = 0);
197 return true;
198 }
200 void CMMarkStack::expand() {
201 // Called, during remark, if we've overflown the marking stack during marking.
202 assert(isEmpty(), "stack should been emptied while handling overflow");
203 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
204 // Clear expansion flag
205 _should_expand = false;
206 if (_capacity == (jint) MarkStackSizeMax) {
207 if (PrintGCDetails && Verbose) {
208 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
209 }
210 return;
211 }
212 // Double capacity if possible
213 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
214 // Do not give up existing stack until we have managed to
215 // get the double capacity that we desired.
216 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
217 sizeof(oop)));
218 if (rs.is_reserved()) {
219 // Release the backing store associated with old stack
220 _virtual_space.release();
221 // Reinitialize virtual space for new stack
222 if (!_virtual_space.initialize(rs, rs.size())) {
223 fatal("Not enough swap for expanded marking stack capacity");
224 }
225 _base = (oop*)(_virtual_space.low());
226 _index = 0;
227 _capacity = new_capacity;
228 } else {
229 if (PrintGCDetails && Verbose) {
230 // Failed to double capacity, continue;
231 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
232 SIZE_FORMAT"K to " SIZE_FORMAT"K",
233 _capacity / K, new_capacity / K);
234 }
235 }
236 }
238 void CMMarkStack::set_should_expand() {
239 // If we're resetting the marking state because of an
240 // marking stack overflow, record that we should, if
241 // possible, expand the stack.
242 _should_expand = _cm->has_overflown();
243 }
245 CMMarkStack::~CMMarkStack() {
246 if (_base != NULL) {
247 _base = NULL;
248 _virtual_space.release();
249 }
250 }
252 void CMMarkStack::par_push(oop ptr) {
253 while (true) {
254 if (isFull()) {
255 _overflow = true;
256 return;
257 }
258 // Otherwise...
259 jint index = _index;
260 jint next_index = index+1;
261 jint res = Atomic::cmpxchg(next_index, &_index, index);
262 if (res == index) {
263 _base[index] = ptr;
264 // Note that we don't maintain this atomically. We could, but it
265 // doesn't seem necessary.
266 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
267 return;
268 }
269 // Otherwise, we need to try again.
270 }
271 }
273 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
274 while (true) {
275 if (isFull()) {
276 _overflow = true;
277 return;
278 }
279 // Otherwise...
280 jint index = _index;
281 jint next_index = index + n;
282 if (next_index > _capacity) {
283 _overflow = true;
284 return;
285 }
286 jint res = Atomic::cmpxchg(next_index, &_index, index);
287 if (res == index) {
288 for (int i = 0; i < n; i++) {
289 int ind = index + i;
290 assert(ind < _capacity, "By overflow test above.");
291 _base[ind] = ptr_arr[i];
292 }
293 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
301 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
302 jint start = _index;
303 jint next_index = start + n;
304 if (next_index > _capacity) {
305 _overflow = true;
306 return;
307 }
308 // Otherwise.
309 _index = next_index;
310 for (int i = 0; i < n; i++) {
311 int ind = start + i;
312 assert(ind < _capacity, "By overflow test above.");
313 _base[ind] = ptr_arr[i];
314 }
315 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
316 }
318 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
319 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
320 jint index = _index;
321 if (index == 0) {
322 *n = 0;
323 return false;
324 } else {
325 int k = MIN2(max, index);
326 jint new_ind = index - k;
327 for (int j = 0; j < k; j++) {
328 ptr_arr[j] = _base[new_ind + j];
329 }
330 _index = new_ind;
331 *n = k;
332 return true;
333 }
334 }
336 template<class OopClosureClass>
337 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
338 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
339 || SafepointSynchronize::is_at_safepoint(),
340 "Drain recursion must be yield-safe.");
341 bool res = true;
342 debug_only(_drain_in_progress = true);
343 debug_only(_drain_in_progress_yields = yield_after);
344 while (!isEmpty()) {
345 oop newOop = pop();
346 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
347 assert(newOop->is_oop(), "Expected an oop");
348 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
349 "only grey objects on this stack");
350 newOop->oop_iterate(cl);
351 if (yield_after && _cm->do_yield_check()) {
352 res = false;
353 break;
354 }
355 }
356 debug_only(_drain_in_progress = false);
357 return res;
358 }
360 void CMMarkStack::note_start_of_gc() {
361 assert(_saved_index == -1,
362 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
363 _saved_index = _index;
364 }
366 void CMMarkStack::note_end_of_gc() {
367 // This is intentionally a guarantee, instead of an assert. If we
368 // accidentally add something to the mark stack during GC, it
369 // will be a correctness issue so it's better if we crash. we'll
370 // only check this once per GC anyway, so it won't be a performance
371 // issue in any way.
372 guarantee(_saved_index == _index,
373 err_msg("saved index: %d index: %d", _saved_index, _index));
374 _saved_index = -1;
375 }
377 void CMMarkStack::oops_do(OopClosure* f) {
378 assert(_saved_index == _index,
379 err_msg("saved index: %d index: %d", _saved_index, _index));
380 for (int i = 0; i < _index; i += 1) {
381 f->do_oop(&_base[i]);
382 }
383 }
385 bool ConcurrentMark::not_yet_marked(oop obj) const {
386 return _g1h->is_obj_ill(obj);
387 }
389 CMRootRegions::CMRootRegions() :
390 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
391 _should_abort(false), _next_survivor(NULL) { }
393 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
394 _young_list = g1h->young_list();
395 _cm = cm;
396 }
398 void CMRootRegions::prepare_for_scan() {
399 assert(!scan_in_progress(), "pre-condition");
401 // Currently, only survivors can be root regions.
402 assert(_next_survivor == NULL, "pre-condition");
403 _next_survivor = _young_list->first_survivor_region();
404 _scan_in_progress = (_next_survivor != NULL);
405 _should_abort = false;
406 }
408 HeapRegion* CMRootRegions::claim_next() {
409 if (_should_abort) {
410 // If someone has set the should_abort flag, we return NULL to
411 // force the caller to bail out of their loop.
412 return NULL;
413 }
415 // Currently, only survivors can be root regions.
416 HeapRegion* res = _next_survivor;
417 if (res != NULL) {
418 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
419 // Read it again in case it changed while we were waiting for the lock.
420 res = _next_survivor;
421 if (res != NULL) {
422 if (res == _young_list->last_survivor_region()) {
423 // We just claimed the last survivor so store NULL to indicate
424 // that we're done.
425 _next_survivor = NULL;
426 } else {
427 _next_survivor = res->get_next_young_region();
428 }
429 } else {
430 // Someone else claimed the last survivor while we were trying
431 // to take the lock so nothing else to do.
432 }
433 }
434 assert(res == NULL || res->is_survivor(), "post-condition");
436 return res;
437 }
439 void CMRootRegions::scan_finished() {
440 assert(scan_in_progress(), "pre-condition");
442 // Currently, only survivors can be root regions.
443 if (!_should_abort) {
444 assert(_next_survivor == NULL, "we should have claimed all survivors");
445 }
446 _next_survivor = NULL;
448 {
449 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
450 _scan_in_progress = false;
451 RootRegionScan_lock->notify_all();
452 }
453 }
455 bool CMRootRegions::wait_until_scan_finished() {
456 if (!scan_in_progress()) return false;
458 {
459 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
460 while (scan_in_progress()) {
461 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
462 }
463 }
464 return true;
465 }
467 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
468 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
469 #endif // _MSC_VER
471 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
472 return MAX2((n_par_threads + 2) / 4, 1U);
473 }
475 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
476 _g1h(g1h),
477 _markBitMap1(MinObjAlignment - 1),
478 _markBitMap2(MinObjAlignment - 1),
480 _parallel_marking_threads(0),
481 _max_parallel_marking_threads(0),
482 _sleep_factor(0.0),
483 _marking_task_overhead(1.0),
484 _cleanup_sleep_factor(0.0),
485 _cleanup_task_overhead(1.0),
486 _cleanup_list("Cleanup List"),
487 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
488 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
489 CardTableModRefBS::card_shift,
490 false /* in_resource_area*/),
492 _prevMarkBitMap(&_markBitMap1),
493 _nextMarkBitMap(&_markBitMap2),
495 _markStack(this),
496 // _finger set in set_non_marking_state
498 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
499 // _active_tasks set in set_non_marking_state
500 // _tasks set inside the constructor
501 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
502 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
504 _has_overflown(false),
505 _concurrent(false),
506 _has_aborted(false),
507 _restart_for_overflow(false),
508 _concurrent_marking_in_progress(false),
510 // _verbose_level set below
512 _init_times(),
513 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
514 _cleanup_times(),
515 _total_counting_time(0.0),
516 _total_rs_scrub_time(0.0),
518 _parallel_workers(NULL),
520 _count_card_bitmaps(NULL),
521 _count_marked_bytes(NULL),
522 _completed_initialization(false) {
523 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
524 if (verbose_level < no_verbose) {
525 verbose_level = no_verbose;
526 }
527 if (verbose_level > high_verbose) {
528 verbose_level = high_verbose;
529 }
530 _verbose_level = verbose_level;
532 if (verbose_low()) {
533 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
534 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
535 }
537 if (!_markBitMap1.allocate(heap_rs)) {
538 warning("Failed to allocate first CM bit map");
539 return;
540 }
541 if (!_markBitMap2.allocate(heap_rs)) {
542 warning("Failed to allocate second CM bit map");
543 return;
544 }
546 // Create & start a ConcurrentMark thread.
547 _cmThread = new ConcurrentMarkThread(this);
548 assert(cmThread() != NULL, "CM Thread should have been created");
549 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
551 assert(CGC_lock != NULL, "Where's the CGC_lock?");
552 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
553 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
555 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
556 satb_qs.set_buffer_size(G1SATBBufferSize);
558 _root_regions.init(_g1h, this);
560 if (ConcGCThreads > ParallelGCThreads) {
561 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
562 "than ParallelGCThreads (" UINT32_FORMAT ").",
563 ConcGCThreads, ParallelGCThreads);
564 return;
565 }
566 if (ParallelGCThreads == 0) {
567 // if we are not running with any parallel GC threads we will not
568 // spawn any marking threads either
569 _parallel_marking_threads = 0;
570 _max_parallel_marking_threads = 0;
571 _sleep_factor = 0.0;
572 _marking_task_overhead = 1.0;
573 } else {
574 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
575 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
576 // if both are set
577 _sleep_factor = 0.0;
578 _marking_task_overhead = 1.0;
579 } else if (G1MarkingOverheadPercent > 0) {
580 // We will calculate the number of parallel marking threads based
581 // on a target overhead with respect to the soft real-time goal
582 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
583 double overall_cm_overhead =
584 (double) MaxGCPauseMillis * marking_overhead /
585 (double) GCPauseIntervalMillis;
586 double cpu_ratio = 1.0 / (double) os::processor_count();
587 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
588 double marking_task_overhead =
589 overall_cm_overhead / marking_thread_num *
590 (double) os::processor_count();
591 double sleep_factor =
592 (1.0 - marking_task_overhead) / marking_task_overhead;
594 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
595 _sleep_factor = sleep_factor;
596 _marking_task_overhead = marking_task_overhead;
597 } else {
598 // Calculate the number of parallel marking threads by scaling
599 // the number of parallel GC threads.
600 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
601 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
602 _sleep_factor = 0.0;
603 _marking_task_overhead = 1.0;
604 }
606 assert(ConcGCThreads > 0, "Should have been set");
607 _parallel_marking_threads = (uint) ConcGCThreads;
608 _max_parallel_marking_threads = _parallel_marking_threads;
610 if (parallel_marking_threads() > 1) {
611 _cleanup_task_overhead = 1.0;
612 } else {
613 _cleanup_task_overhead = marking_task_overhead();
614 }
615 _cleanup_sleep_factor =
616 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
618 #if 0
619 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
620 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
621 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
622 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
623 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
624 #endif
626 guarantee(parallel_marking_threads() > 0, "peace of mind");
627 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
628 _max_parallel_marking_threads, false, true);
629 if (_parallel_workers == NULL) {
630 vm_exit_during_initialization("Failed necessary allocation.");
631 } else {
632 _parallel_workers->initialize_workers();
633 }
634 }
636 if (FLAG_IS_DEFAULT(MarkStackSize)) {
637 uintx mark_stack_size =
638 MIN2(MarkStackSizeMax,
639 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
640 // Verify that the calculated value for MarkStackSize is in range.
641 // It would be nice to use the private utility routine from Arguments.
642 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
643 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
644 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
645 mark_stack_size, 1, MarkStackSizeMax);
646 return;
647 }
648 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
649 } else {
650 // Verify MarkStackSize is in range.
651 if (FLAG_IS_CMDLINE(MarkStackSize)) {
652 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
653 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
654 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
655 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
656 MarkStackSize, 1, MarkStackSizeMax);
657 return;
658 }
659 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
660 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
661 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
662 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
663 MarkStackSize, MarkStackSizeMax);
664 return;
665 }
666 }
667 }
668 }
670 if (!_markStack.allocate(MarkStackSize)) {
671 warning("Failed to allocate CM marking stack");
672 return;
673 }
675 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
676 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
678 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
679 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
681 BitMap::idx_t card_bm_size = _card_bm.size();
683 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
684 _active_tasks = _max_worker_id;
686 size_t max_regions = (size_t) _g1h->max_regions();
687 for (uint i = 0; i < _max_worker_id; ++i) {
688 CMTaskQueue* task_queue = new CMTaskQueue();
689 task_queue->initialize();
690 _task_queues->register_queue(i, task_queue);
692 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
693 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
695 _tasks[i] = new CMTask(i, this,
696 _count_marked_bytes[i],
697 &_count_card_bitmaps[i],
698 task_queue, _task_queues);
700 _accum_task_vtime[i] = 0.0;
701 }
703 // Calculate the card number for the bottom of the heap. Used
704 // in biasing indexes into the accounting card bitmaps.
705 _heap_bottom_card_num =
706 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
707 CardTableModRefBS::card_shift);
709 // Clear all the liveness counting data
710 clear_all_count_data();
712 // so that the call below can read a sensible value
713 _heap_start = (HeapWord*) heap_rs.base();
714 set_non_marking_state();
715 _completed_initialization = true;
716 }
718 void ConcurrentMark::update_g1_committed(bool force) {
719 // If concurrent marking is not in progress, then we do not need to
720 // update _heap_end.
721 if (!concurrent_marking_in_progress() && !force) return;
723 MemRegion committed = _g1h->g1_committed();
724 assert(committed.start() == _heap_start, "start shouldn't change");
725 HeapWord* new_end = committed.end();
726 if (new_end > _heap_end) {
727 // The heap has been expanded.
729 _heap_end = new_end;
730 }
731 // Notice that the heap can also shrink. However, this only happens
732 // during a Full GC (at least currently) and the entire marking
733 // phase will bail out and the task will not be restarted. So, let's
734 // do nothing.
735 }
737 void ConcurrentMark::reset() {
738 // Starting values for these two. This should be called in a STW
739 // phase. CM will be notified of any future g1_committed expansions
740 // will be at the end of evacuation pauses, when tasks are
741 // inactive.
742 MemRegion committed = _g1h->g1_committed();
743 _heap_start = committed.start();
744 _heap_end = committed.end();
746 // Separated the asserts so that we know which one fires.
747 assert(_heap_start != NULL, "heap bounds should look ok");
748 assert(_heap_end != NULL, "heap bounds should look ok");
749 assert(_heap_start < _heap_end, "heap bounds should look ok");
751 // Reset all the marking data structures and any necessary flags
752 reset_marking_state();
754 if (verbose_low()) {
755 gclog_or_tty->print_cr("[global] resetting");
756 }
758 // We do reset all of them, since different phases will use
759 // different number of active threads. So, it's easiest to have all
760 // of them ready.
761 for (uint i = 0; i < _max_worker_id; ++i) {
762 _tasks[i]->reset(_nextMarkBitMap);
763 }
765 // we need this to make sure that the flag is on during the evac
766 // pause with initial mark piggy-backed
767 set_concurrent_marking_in_progress();
768 }
771 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
772 _markStack.set_should_expand();
773 _markStack.setEmpty(); // Also clears the _markStack overflow flag
774 if (clear_overflow) {
775 clear_has_overflown();
776 } else {
777 assert(has_overflown(), "pre-condition");
778 }
779 _finger = _heap_start;
781 for (uint i = 0; i < _max_worker_id; ++i) {
782 CMTaskQueue* queue = _task_queues->queue(i);
783 queue->set_empty();
784 }
785 }
787 void ConcurrentMark::set_concurrency(uint active_tasks) {
788 assert(active_tasks <= _max_worker_id, "we should not have more");
790 _active_tasks = active_tasks;
791 // Need to update the three data structures below according to the
792 // number of active threads for this phase.
793 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
794 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
795 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
796 }
798 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
799 set_concurrency(active_tasks);
801 _concurrent = concurrent;
802 // We propagate this to all tasks, not just the active ones.
803 for (uint i = 0; i < _max_worker_id; ++i)
804 _tasks[i]->set_concurrent(concurrent);
806 if (concurrent) {
807 set_concurrent_marking_in_progress();
808 } else {
809 // We currently assume that the concurrent flag has been set to
810 // false before we start remark. At this point we should also be
811 // in a STW phase.
812 assert(!concurrent_marking_in_progress(), "invariant");
813 assert(_finger == _heap_end,
814 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
815 _finger, _heap_end));
816 update_g1_committed(true);
817 }
818 }
820 void ConcurrentMark::set_non_marking_state() {
821 // We set the global marking state to some default values when we're
822 // not doing marking.
823 reset_marking_state();
824 _active_tasks = 0;
825 clear_concurrent_marking_in_progress();
826 }
828 ConcurrentMark::~ConcurrentMark() {
829 // The ConcurrentMark instance is never freed.
830 ShouldNotReachHere();
831 }
833 void ConcurrentMark::clearNextBitmap() {
834 G1CollectedHeap* g1h = G1CollectedHeap::heap();
835 G1CollectorPolicy* g1p = g1h->g1_policy();
837 // Make sure that the concurrent mark thread looks to still be in
838 // the current cycle.
839 guarantee(cmThread()->during_cycle(), "invariant");
841 // We are finishing up the current cycle by clearing the next
842 // marking bitmap and getting it ready for the next cycle. During
843 // this time no other cycle can start. So, let's make sure that this
844 // is the case.
845 guarantee(!g1h->mark_in_progress(), "invariant");
847 // clear the mark bitmap (no grey objects to start with).
848 // We need to do this in chunks and offer to yield in between
849 // each chunk.
850 HeapWord* start = _nextMarkBitMap->startWord();
851 HeapWord* end = _nextMarkBitMap->endWord();
852 HeapWord* cur = start;
853 size_t chunkSize = M;
854 while (cur < end) {
855 HeapWord* next = cur + chunkSize;
856 if (next > end) {
857 next = end;
858 }
859 MemRegion mr(cur,next);
860 _nextMarkBitMap->clearRange(mr);
861 cur = next;
862 do_yield_check();
864 // Repeat the asserts from above. We'll do them as asserts here to
865 // minimize their overhead on the product. However, we'll have
866 // them as guarantees at the beginning / end of the bitmap
867 // clearing to get some checking in the product.
868 assert(cmThread()->during_cycle(), "invariant");
869 assert(!g1h->mark_in_progress(), "invariant");
870 }
872 // Clear the liveness counting data
873 clear_all_count_data();
875 // Repeat the asserts from above.
876 guarantee(cmThread()->during_cycle(), "invariant");
877 guarantee(!g1h->mark_in_progress(), "invariant");
878 }
880 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
881 public:
882 bool doHeapRegion(HeapRegion* r) {
883 if (!r->continuesHumongous()) {
884 r->note_start_of_marking();
885 }
886 return false;
887 }
888 };
890 void ConcurrentMark::checkpointRootsInitialPre() {
891 G1CollectedHeap* g1h = G1CollectedHeap::heap();
892 G1CollectorPolicy* g1p = g1h->g1_policy();
894 _has_aborted = false;
896 #ifndef PRODUCT
897 if (G1PrintReachableAtInitialMark) {
898 print_reachable("at-cycle-start",
899 VerifyOption_G1UsePrevMarking, true /* all */);
900 }
901 #endif
903 // Initialise marking structures. This has to be done in a STW phase.
904 reset();
906 // For each region note start of marking.
907 NoteStartOfMarkHRClosure startcl;
908 g1h->heap_region_iterate(&startcl);
909 }
912 void ConcurrentMark::checkpointRootsInitialPost() {
913 G1CollectedHeap* g1h = G1CollectedHeap::heap();
915 // If we force an overflow during remark, the remark operation will
916 // actually abort and we'll restart concurrent marking. If we always
917 // force an oveflow during remark we'll never actually complete the
918 // marking phase. So, we initilize this here, at the start of the
919 // cycle, so that at the remaining overflow number will decrease at
920 // every remark and we'll eventually not need to cause one.
921 force_overflow_stw()->init();
923 // Start Concurrent Marking weak-reference discovery.
924 ReferenceProcessor* rp = g1h->ref_processor_cm();
925 // enable ("weak") refs discovery
926 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
927 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
929 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
930 // This is the start of the marking cycle, we're expected all
931 // threads to have SATB queues with active set to false.
932 satb_mq_set.set_active_all_threads(true, /* new active value */
933 false /* expected_active */);
935 _root_regions.prepare_for_scan();
937 // update_g1_committed() will be called at the end of an evac pause
938 // when marking is on. So, it's also called at the end of the
939 // initial-mark pause to update the heap end, if the heap expands
940 // during it. No need to call it here.
941 }
943 /*
944 * Notice that in the next two methods, we actually leave the STS
945 * during the barrier sync and join it immediately afterwards. If we
946 * do not do this, the following deadlock can occur: one thread could
947 * be in the barrier sync code, waiting for the other thread to also
948 * sync up, whereas another one could be trying to yield, while also
949 * waiting for the other threads to sync up too.
950 *
951 * Note, however, that this code is also used during remark and in
952 * this case we should not attempt to leave / enter the STS, otherwise
953 * we'll either hit an asseert (debug / fastdebug) or deadlock
954 * (product). So we should only leave / enter the STS if we are
955 * operating concurrently.
956 *
957 * Because the thread that does the sync barrier has left the STS, it
958 * is possible to be suspended for a Full GC or an evacuation pause
959 * could occur. This is actually safe, since the entering the sync
960 * barrier is one of the last things do_marking_step() does, and it
961 * doesn't manipulate any data structures afterwards.
962 */
964 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
965 if (verbose_low()) {
966 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
967 }
969 if (concurrent()) {
970 ConcurrentGCThread::stsLeave();
971 }
972 _first_overflow_barrier_sync.enter();
973 if (concurrent()) {
974 ConcurrentGCThread::stsJoin();
975 }
976 // at this point everyone should have synced up and not be doing any
977 // more work
979 if (verbose_low()) {
980 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
981 }
983 // If we're executing the concurrent phase of marking, reset the marking
984 // state; otherwise the marking state is reset after reference processing,
985 // during the remark pause.
986 // If we reset here as a result of an overflow during the remark we will
987 // see assertion failures from any subsequent set_concurrency_and_phase()
988 // calls.
989 if (concurrent()) {
990 // let the task associated with with worker 0 do this
991 if (worker_id == 0) {
992 // task 0 is responsible for clearing the global data structures
993 // We should be here because of an overflow. During STW we should
994 // not clear the overflow flag since we rely on it being true when
995 // we exit this method to abort the pause and restart concurent
996 // marking.
997 reset_marking_state(true /* clear_overflow */);
998 force_overflow()->update();
1000 if (G1Log::fine()) {
1001 gclog_or_tty->date_stamp(PrintGCDateStamps);
1002 gclog_or_tty->stamp(PrintGCTimeStamps);
1003 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1004 }
1005 }
1006 }
1008 // after this, each task should reset its own data structures then
1009 // then go into the second barrier
1010 }
1012 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1013 if (verbose_low()) {
1014 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1015 }
1017 if (concurrent()) {
1018 ConcurrentGCThread::stsLeave();
1019 }
1020 _second_overflow_barrier_sync.enter();
1021 if (concurrent()) {
1022 ConcurrentGCThread::stsJoin();
1023 }
1024 // at this point everything should be re-initialized and ready to go
1026 if (verbose_low()) {
1027 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1028 }
1029 }
1031 #ifndef PRODUCT
1032 void ForceOverflowSettings::init() {
1033 _num_remaining = G1ConcMarkForceOverflow;
1034 _force = false;
1035 update();
1036 }
1038 void ForceOverflowSettings::update() {
1039 if (_num_remaining > 0) {
1040 _num_remaining -= 1;
1041 _force = true;
1042 } else {
1043 _force = false;
1044 }
1045 }
1047 bool ForceOverflowSettings::should_force() {
1048 if (_force) {
1049 _force = false;
1050 return true;
1051 } else {
1052 return false;
1053 }
1054 }
1055 #endif // !PRODUCT
1057 class CMConcurrentMarkingTask: public AbstractGangTask {
1058 private:
1059 ConcurrentMark* _cm;
1060 ConcurrentMarkThread* _cmt;
1062 public:
1063 void work(uint worker_id) {
1064 assert(Thread::current()->is_ConcurrentGC_thread(),
1065 "this should only be done by a conc GC thread");
1066 ResourceMark rm;
1068 double start_vtime = os::elapsedVTime();
1070 ConcurrentGCThread::stsJoin();
1072 assert(worker_id < _cm->active_tasks(), "invariant");
1073 CMTask* the_task = _cm->task(worker_id);
1074 the_task->record_start_time();
1075 if (!_cm->has_aborted()) {
1076 do {
1077 double start_vtime_sec = os::elapsedVTime();
1078 double start_time_sec = os::elapsedTime();
1079 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1081 the_task->do_marking_step(mark_step_duration_ms,
1082 true /* do_termination */,
1083 false /* is_serial*/);
1085 double end_time_sec = os::elapsedTime();
1086 double end_vtime_sec = os::elapsedVTime();
1087 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1088 double elapsed_time_sec = end_time_sec - start_time_sec;
1089 _cm->clear_has_overflown();
1091 bool ret = _cm->do_yield_check(worker_id);
1093 jlong sleep_time_ms;
1094 if (!_cm->has_aborted() && the_task->has_aborted()) {
1095 sleep_time_ms =
1096 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1097 ConcurrentGCThread::stsLeave();
1098 os::sleep(Thread::current(), sleep_time_ms, false);
1099 ConcurrentGCThread::stsJoin();
1100 }
1101 double end_time2_sec = os::elapsedTime();
1102 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1104 #if 0
1105 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1106 "overhead %1.4lf",
1107 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1108 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1109 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1110 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1111 #endif
1112 } while (!_cm->has_aborted() && the_task->has_aborted());
1113 }
1114 the_task->record_end_time();
1115 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1117 ConcurrentGCThread::stsLeave();
1119 double end_vtime = os::elapsedVTime();
1120 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1121 }
1123 CMConcurrentMarkingTask(ConcurrentMark* cm,
1124 ConcurrentMarkThread* cmt) :
1125 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1127 ~CMConcurrentMarkingTask() { }
1128 };
1130 // Calculates the number of active workers for a concurrent
1131 // phase.
1132 uint ConcurrentMark::calc_parallel_marking_threads() {
1133 if (G1CollectedHeap::use_parallel_gc_threads()) {
1134 uint n_conc_workers = 0;
1135 if (!UseDynamicNumberOfGCThreads ||
1136 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1137 !ForceDynamicNumberOfGCThreads)) {
1138 n_conc_workers = max_parallel_marking_threads();
1139 } else {
1140 n_conc_workers =
1141 AdaptiveSizePolicy::calc_default_active_workers(
1142 max_parallel_marking_threads(),
1143 1, /* Minimum workers */
1144 parallel_marking_threads(),
1145 Threads::number_of_non_daemon_threads());
1146 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1147 // that scaling has already gone into "_max_parallel_marking_threads".
1148 }
1149 assert(n_conc_workers > 0, "Always need at least 1");
1150 return n_conc_workers;
1151 }
1152 // If we are not running with any parallel GC threads we will not
1153 // have spawned any marking threads either. Hence the number of
1154 // concurrent workers should be 0.
1155 return 0;
1156 }
1158 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1159 // Currently, only survivors can be root regions.
1160 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1161 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1163 const uintx interval = PrefetchScanIntervalInBytes;
1164 HeapWord* curr = hr->bottom();
1165 const HeapWord* end = hr->top();
1166 while (curr < end) {
1167 Prefetch::read(curr, interval);
1168 oop obj = oop(curr);
1169 int size = obj->oop_iterate(&cl);
1170 assert(size == obj->size(), "sanity");
1171 curr += size;
1172 }
1173 }
1175 class CMRootRegionScanTask : public AbstractGangTask {
1176 private:
1177 ConcurrentMark* _cm;
1179 public:
1180 CMRootRegionScanTask(ConcurrentMark* cm) :
1181 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1183 void work(uint worker_id) {
1184 assert(Thread::current()->is_ConcurrentGC_thread(),
1185 "this should only be done by a conc GC thread");
1187 CMRootRegions* root_regions = _cm->root_regions();
1188 HeapRegion* hr = root_regions->claim_next();
1189 while (hr != NULL) {
1190 _cm->scanRootRegion(hr, worker_id);
1191 hr = root_regions->claim_next();
1192 }
1193 }
1194 };
1196 void ConcurrentMark::scanRootRegions() {
1197 // scan_in_progress() will have been set to true only if there was
1198 // at least one root region to scan. So, if it's false, we
1199 // should not attempt to do any further work.
1200 if (root_regions()->scan_in_progress()) {
1201 _parallel_marking_threads = calc_parallel_marking_threads();
1202 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1203 "Maximum number of marking threads exceeded");
1204 uint active_workers = MAX2(1U, parallel_marking_threads());
1206 CMRootRegionScanTask task(this);
1207 if (use_parallel_marking_threads()) {
1208 _parallel_workers->set_active_workers((int) active_workers);
1209 _parallel_workers->run_task(&task);
1210 } else {
1211 task.work(0);
1212 }
1214 // It's possible that has_aborted() is true here without actually
1215 // aborting the survivor scan earlier. This is OK as it's
1216 // mainly used for sanity checking.
1217 root_regions()->scan_finished();
1218 }
1219 }
1221 void ConcurrentMark::markFromRoots() {
1222 // we might be tempted to assert that:
1223 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1224 // "inconsistent argument?");
1225 // However that wouldn't be right, because it's possible that
1226 // a safepoint is indeed in progress as a younger generation
1227 // stop-the-world GC happens even as we mark in this generation.
1229 _restart_for_overflow = false;
1230 force_overflow_conc()->init();
1232 // _g1h has _n_par_threads
1233 _parallel_marking_threads = calc_parallel_marking_threads();
1234 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1235 "Maximum number of marking threads exceeded");
1237 uint active_workers = MAX2(1U, parallel_marking_threads());
1239 // Parallel task terminator is set in "set_concurrency_and_phase()"
1240 set_concurrency_and_phase(active_workers, true /* concurrent */);
1242 CMConcurrentMarkingTask markingTask(this, cmThread());
1243 if (use_parallel_marking_threads()) {
1244 _parallel_workers->set_active_workers((int)active_workers);
1245 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1246 // and the decisions on that MT processing is made elsewhere.
1247 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1248 _parallel_workers->run_task(&markingTask);
1249 } else {
1250 markingTask.work(0);
1251 }
1252 print_stats();
1253 }
1255 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1256 // world is stopped at this checkpoint
1257 assert(SafepointSynchronize::is_at_safepoint(),
1258 "world should be stopped");
1260 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1262 // If a full collection has happened, we shouldn't do this.
1263 if (has_aborted()) {
1264 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1265 return;
1266 }
1268 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1270 if (VerifyDuringGC) {
1271 HandleMark hm; // handle scope
1272 gclog_or_tty->print(" VerifyDuringGC:(before)");
1273 Universe::heap()->prepare_for_verify();
1274 Universe::verify(/* silent */ false,
1275 /* option */ VerifyOption_G1UsePrevMarking);
1276 }
1278 G1CollectorPolicy* g1p = g1h->g1_policy();
1279 g1p->record_concurrent_mark_remark_start();
1281 double start = os::elapsedTime();
1283 checkpointRootsFinalWork();
1285 double mark_work_end = os::elapsedTime();
1287 weakRefsWork(clear_all_soft_refs);
1289 if (has_overflown()) {
1290 // Oops. We overflowed. Restart concurrent marking.
1291 _restart_for_overflow = true;
1292 if (G1TraceMarkStackOverflow) {
1293 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1294 }
1296 // Verify the heap w.r.t. the previous marking bitmap.
1297 if (VerifyDuringGC) {
1298 HandleMark hm; // handle scope
1299 gclog_or_tty->print(" VerifyDuringGC:(overflow)");
1300 Universe::heap()->prepare_for_verify();
1301 Universe::verify(/* silent */ false,
1302 /* option */ VerifyOption_G1UsePrevMarking);
1303 }
1305 // Clear the marking state because we will be restarting
1306 // marking due to overflowing the global mark stack.
1307 reset_marking_state();
1308 } else {
1309 // Aggregate the per-task counting data that we have accumulated
1310 // while marking.
1311 aggregate_count_data();
1313 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1314 // We're done with marking.
1315 // This is the end of the marking cycle, we're expected all
1316 // threads to have SATB queues with active set to true.
1317 satb_mq_set.set_active_all_threads(false, /* new active value */
1318 true /* expected_active */);
1320 if (VerifyDuringGC) {
1321 HandleMark hm; // handle scope
1322 gclog_or_tty->print(" VerifyDuringGC:(after)");
1323 Universe::heap()->prepare_for_verify();
1324 Universe::verify(/* silent */ false,
1325 /* option */ VerifyOption_G1UseNextMarking);
1326 }
1327 assert(!restart_for_overflow(), "sanity");
1328 // Completely reset the marking state since marking completed
1329 set_non_marking_state();
1330 }
1332 // Expand the marking stack, if we have to and if we can.
1333 if (_markStack.should_expand()) {
1334 _markStack.expand();
1335 }
1337 // Statistics
1338 double now = os::elapsedTime();
1339 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1340 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1341 _remark_times.add((now - start) * 1000.0);
1343 g1p->record_concurrent_mark_remark_end();
1344 }
1346 // Base class of the closures that finalize and verify the
1347 // liveness counting data.
1348 class CMCountDataClosureBase: public HeapRegionClosure {
1349 protected:
1350 G1CollectedHeap* _g1h;
1351 ConcurrentMark* _cm;
1352 CardTableModRefBS* _ct_bs;
1354 BitMap* _region_bm;
1355 BitMap* _card_bm;
1357 // Takes a region that's not empty (i.e., it has at least one
1358 // live object in it and sets its corresponding bit on the region
1359 // bitmap to 1. If the region is "starts humongous" it will also set
1360 // to 1 the bits on the region bitmap that correspond to its
1361 // associated "continues humongous" regions.
1362 void set_bit_for_region(HeapRegion* hr) {
1363 assert(!hr->continuesHumongous(), "should have filtered those out");
1365 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1366 if (!hr->startsHumongous()) {
1367 // Normal (non-humongous) case: just set the bit.
1368 _region_bm->par_at_put(index, true);
1369 } else {
1370 // Starts humongous case: calculate how many regions are part of
1371 // this humongous region and then set the bit range.
1372 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1373 _region_bm->par_at_put_range(index, end_index, true);
1374 }
1375 }
1377 public:
1378 CMCountDataClosureBase(G1CollectedHeap* g1h,
1379 BitMap* region_bm, BitMap* card_bm):
1380 _g1h(g1h), _cm(g1h->concurrent_mark()),
1381 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1382 _region_bm(region_bm), _card_bm(card_bm) { }
1383 };
1385 // Closure that calculates the # live objects per region. Used
1386 // for verification purposes during the cleanup pause.
1387 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1388 CMBitMapRO* _bm;
1389 size_t _region_marked_bytes;
1391 public:
1392 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1393 BitMap* region_bm, BitMap* card_bm) :
1394 CMCountDataClosureBase(g1h, region_bm, card_bm),
1395 _bm(bm), _region_marked_bytes(0) { }
1397 bool doHeapRegion(HeapRegion* hr) {
1399 if (hr->continuesHumongous()) {
1400 // We will ignore these here and process them when their
1401 // associated "starts humongous" region is processed (see
1402 // set_bit_for_heap_region()). Note that we cannot rely on their
1403 // associated "starts humongous" region to have their bit set to
1404 // 1 since, due to the region chunking in the parallel region
1405 // iteration, a "continues humongous" region might be visited
1406 // before its associated "starts humongous".
1407 return false;
1408 }
1410 HeapWord* ntams = hr->next_top_at_mark_start();
1411 HeapWord* start = hr->bottom();
1413 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1414 err_msg("Preconditions not met - "
1415 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1416 start, ntams, hr->end()));
1418 // Find the first marked object at or after "start".
1419 start = _bm->getNextMarkedWordAddress(start, ntams);
1421 size_t marked_bytes = 0;
1423 while (start < ntams) {
1424 oop obj = oop(start);
1425 int obj_sz = obj->size();
1426 HeapWord* obj_end = start + obj_sz;
1428 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1429 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1431 // Note: if we're looking at the last region in heap - obj_end
1432 // could be actually just beyond the end of the heap; end_idx
1433 // will then correspond to a (non-existent) card that is also
1434 // just beyond the heap.
1435 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1436 // end of object is not card aligned - increment to cover
1437 // all the cards spanned by the object
1438 end_idx += 1;
1439 }
1441 // Set the bits in the card BM for the cards spanned by this object.
1442 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1444 // Add the size of this object to the number of marked bytes.
1445 marked_bytes += (size_t)obj_sz * HeapWordSize;
1447 // Find the next marked object after this one.
1448 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1449 }
1451 // Mark the allocated-since-marking portion...
1452 HeapWord* top = hr->top();
1453 if (ntams < top) {
1454 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1455 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1457 // Note: if we're looking at the last region in heap - top
1458 // could be actually just beyond the end of the heap; end_idx
1459 // will then correspond to a (non-existent) card that is also
1460 // just beyond the heap.
1461 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1462 // end of object is not card aligned - increment to cover
1463 // all the cards spanned by the object
1464 end_idx += 1;
1465 }
1466 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1468 // This definitely means the region has live objects.
1469 set_bit_for_region(hr);
1470 }
1472 // Update the live region bitmap.
1473 if (marked_bytes > 0) {
1474 set_bit_for_region(hr);
1475 }
1477 // Set the marked bytes for the current region so that
1478 // it can be queried by a calling verificiation routine
1479 _region_marked_bytes = marked_bytes;
1481 return false;
1482 }
1484 size_t region_marked_bytes() const { return _region_marked_bytes; }
1485 };
1487 // Heap region closure used for verifying the counting data
1488 // that was accumulated concurrently and aggregated during
1489 // the remark pause. This closure is applied to the heap
1490 // regions during the STW cleanup pause.
1492 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1493 G1CollectedHeap* _g1h;
1494 ConcurrentMark* _cm;
1495 CalcLiveObjectsClosure _calc_cl;
1496 BitMap* _region_bm; // Region BM to be verified
1497 BitMap* _card_bm; // Card BM to be verified
1498 bool _verbose; // verbose output?
1500 BitMap* _exp_region_bm; // Expected Region BM values
1501 BitMap* _exp_card_bm; // Expected card BM values
1503 int _failures;
1505 public:
1506 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1507 BitMap* region_bm,
1508 BitMap* card_bm,
1509 BitMap* exp_region_bm,
1510 BitMap* exp_card_bm,
1511 bool verbose) :
1512 _g1h(g1h), _cm(g1h->concurrent_mark()),
1513 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1514 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1515 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1516 _failures(0) { }
1518 int failures() const { return _failures; }
1520 bool doHeapRegion(HeapRegion* hr) {
1521 if (hr->continuesHumongous()) {
1522 // We will ignore these here and process them when their
1523 // associated "starts humongous" region is processed (see
1524 // set_bit_for_heap_region()). Note that we cannot rely on their
1525 // associated "starts humongous" region to have their bit set to
1526 // 1 since, due to the region chunking in the parallel region
1527 // iteration, a "continues humongous" region might be visited
1528 // before its associated "starts humongous".
1529 return false;
1530 }
1532 int failures = 0;
1534 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1535 // this region and set the corresponding bits in the expected region
1536 // and card bitmaps.
1537 bool res = _calc_cl.doHeapRegion(hr);
1538 assert(res == false, "should be continuing");
1540 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1541 Mutex::_no_safepoint_check_flag);
1543 // Verify the marked bytes for this region.
1544 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1545 size_t act_marked_bytes = hr->next_marked_bytes();
1547 // We're not OK if expected marked bytes > actual marked bytes. It means
1548 // we have missed accounting some objects during the actual marking.
1549 if (exp_marked_bytes > act_marked_bytes) {
1550 if (_verbose) {
1551 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1552 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1553 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1554 }
1555 failures += 1;
1556 }
1558 // Verify the bit, for this region, in the actual and expected
1559 // (which was just calculated) region bit maps.
1560 // We're not OK if the bit in the calculated expected region
1561 // bitmap is set and the bit in the actual region bitmap is not.
1562 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1564 bool expected = _exp_region_bm->at(index);
1565 bool actual = _region_bm->at(index);
1566 if (expected && !actual) {
1567 if (_verbose) {
1568 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1569 "expected: %s, actual: %s",
1570 hr->hrs_index(),
1571 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1572 }
1573 failures += 1;
1574 }
1576 // Verify that the card bit maps for the cards spanned by the current
1577 // region match. We have an error if we have a set bit in the expected
1578 // bit map and the corresponding bit in the actual bitmap is not set.
1580 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1581 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1583 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1584 expected = _exp_card_bm->at(i);
1585 actual = _card_bm->at(i);
1587 if (expected && !actual) {
1588 if (_verbose) {
1589 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1590 "expected: %s, actual: %s",
1591 hr->hrs_index(), i,
1592 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1593 }
1594 failures += 1;
1595 }
1596 }
1598 if (failures > 0 && _verbose) {
1599 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1600 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1601 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1602 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1603 }
1605 _failures += failures;
1607 // We could stop iteration over the heap when we
1608 // find the first violating region by returning true.
1609 return false;
1610 }
1611 };
1614 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1615 protected:
1616 G1CollectedHeap* _g1h;
1617 ConcurrentMark* _cm;
1618 BitMap* _actual_region_bm;
1619 BitMap* _actual_card_bm;
1621 uint _n_workers;
1623 BitMap* _expected_region_bm;
1624 BitMap* _expected_card_bm;
1626 int _failures;
1627 bool _verbose;
1629 public:
1630 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1631 BitMap* region_bm, BitMap* card_bm,
1632 BitMap* expected_region_bm, BitMap* expected_card_bm)
1633 : AbstractGangTask("G1 verify final counting"),
1634 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1635 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1636 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1637 _failures(0), _verbose(false),
1638 _n_workers(0) {
1639 assert(VerifyDuringGC, "don't call this otherwise");
1641 // Use the value already set as the number of active threads
1642 // in the call to run_task().
1643 if (G1CollectedHeap::use_parallel_gc_threads()) {
1644 assert( _g1h->workers()->active_workers() > 0,
1645 "Should have been previously set");
1646 _n_workers = _g1h->workers()->active_workers();
1647 } else {
1648 _n_workers = 1;
1649 }
1651 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1652 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1654 _verbose = _cm->verbose_medium();
1655 }
1657 void work(uint worker_id) {
1658 assert(worker_id < _n_workers, "invariant");
1660 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1661 _actual_region_bm, _actual_card_bm,
1662 _expected_region_bm,
1663 _expected_card_bm,
1664 _verbose);
1666 if (G1CollectedHeap::use_parallel_gc_threads()) {
1667 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1668 worker_id,
1669 _n_workers,
1670 HeapRegion::VerifyCountClaimValue);
1671 } else {
1672 _g1h->heap_region_iterate(&verify_cl);
1673 }
1675 Atomic::add(verify_cl.failures(), &_failures);
1676 }
1678 int failures() const { return _failures; }
1679 };
1681 // Closure that finalizes the liveness counting data.
1682 // Used during the cleanup pause.
1683 // Sets the bits corresponding to the interval [NTAMS, top]
1684 // (which contains the implicitly live objects) in the
1685 // card liveness bitmap. Also sets the bit for each region,
1686 // containing live data, in the region liveness bitmap.
1688 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1689 public:
1690 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1691 BitMap* region_bm,
1692 BitMap* card_bm) :
1693 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1695 bool doHeapRegion(HeapRegion* hr) {
1697 if (hr->continuesHumongous()) {
1698 // We will ignore these here and process them when their
1699 // associated "starts humongous" region is processed (see
1700 // set_bit_for_heap_region()). Note that we cannot rely on their
1701 // associated "starts humongous" region to have their bit set to
1702 // 1 since, due to the region chunking in the parallel region
1703 // iteration, a "continues humongous" region might be visited
1704 // before its associated "starts humongous".
1705 return false;
1706 }
1708 HeapWord* ntams = hr->next_top_at_mark_start();
1709 HeapWord* top = hr->top();
1711 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1713 // Mark the allocated-since-marking portion...
1714 if (ntams < top) {
1715 // This definitely means the region has live objects.
1716 set_bit_for_region(hr);
1718 // Now set the bits in the card bitmap for [ntams, top)
1719 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1720 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1722 // Note: if we're looking at the last region in heap - top
1723 // could be actually just beyond the end of the heap; end_idx
1724 // will then correspond to a (non-existent) card that is also
1725 // just beyond the heap.
1726 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1727 // end of object is not card aligned - increment to cover
1728 // all the cards spanned by the object
1729 end_idx += 1;
1730 }
1732 assert(end_idx <= _card_bm->size(),
1733 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1734 end_idx, _card_bm->size()));
1735 assert(start_idx < _card_bm->size(),
1736 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1737 start_idx, _card_bm->size()));
1739 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1740 }
1742 // Set the bit for the region if it contains live data
1743 if (hr->next_marked_bytes() > 0) {
1744 set_bit_for_region(hr);
1745 }
1747 return false;
1748 }
1749 };
1751 class G1ParFinalCountTask: public AbstractGangTask {
1752 protected:
1753 G1CollectedHeap* _g1h;
1754 ConcurrentMark* _cm;
1755 BitMap* _actual_region_bm;
1756 BitMap* _actual_card_bm;
1758 uint _n_workers;
1760 public:
1761 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1762 : AbstractGangTask("G1 final counting"),
1763 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1764 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1765 _n_workers(0) {
1766 // Use the value already set as the number of active threads
1767 // in the call to run_task().
1768 if (G1CollectedHeap::use_parallel_gc_threads()) {
1769 assert( _g1h->workers()->active_workers() > 0,
1770 "Should have been previously set");
1771 _n_workers = _g1h->workers()->active_workers();
1772 } else {
1773 _n_workers = 1;
1774 }
1775 }
1777 void work(uint worker_id) {
1778 assert(worker_id < _n_workers, "invariant");
1780 FinalCountDataUpdateClosure final_update_cl(_g1h,
1781 _actual_region_bm,
1782 _actual_card_bm);
1784 if (G1CollectedHeap::use_parallel_gc_threads()) {
1785 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1786 worker_id,
1787 _n_workers,
1788 HeapRegion::FinalCountClaimValue);
1789 } else {
1790 _g1h->heap_region_iterate(&final_update_cl);
1791 }
1792 }
1793 };
1795 class G1ParNoteEndTask;
1797 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1798 G1CollectedHeap* _g1;
1799 int _worker_num;
1800 size_t _max_live_bytes;
1801 uint _regions_claimed;
1802 size_t _freed_bytes;
1803 FreeRegionList* _local_cleanup_list;
1804 OldRegionSet* _old_proxy_set;
1805 HumongousRegionSet* _humongous_proxy_set;
1806 HRRSCleanupTask* _hrrs_cleanup_task;
1807 double _claimed_region_time;
1808 double _max_region_time;
1810 public:
1811 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1812 int worker_num,
1813 FreeRegionList* local_cleanup_list,
1814 OldRegionSet* old_proxy_set,
1815 HumongousRegionSet* humongous_proxy_set,
1816 HRRSCleanupTask* hrrs_cleanup_task) :
1817 _g1(g1), _worker_num(worker_num),
1818 _max_live_bytes(0), _regions_claimed(0),
1819 _freed_bytes(0),
1820 _claimed_region_time(0.0), _max_region_time(0.0),
1821 _local_cleanup_list(local_cleanup_list),
1822 _old_proxy_set(old_proxy_set),
1823 _humongous_proxy_set(humongous_proxy_set),
1824 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1826 size_t freed_bytes() { return _freed_bytes; }
1828 bool doHeapRegion(HeapRegion *hr) {
1829 if (hr->continuesHumongous()) {
1830 return false;
1831 }
1832 // We use a claim value of zero here because all regions
1833 // were claimed with value 1 in the FinalCount task.
1834 _g1->reset_gc_time_stamps(hr);
1835 double start = os::elapsedTime();
1836 _regions_claimed++;
1837 hr->note_end_of_marking();
1838 _max_live_bytes += hr->max_live_bytes();
1839 _g1->free_region_if_empty(hr,
1840 &_freed_bytes,
1841 _local_cleanup_list,
1842 _old_proxy_set,
1843 _humongous_proxy_set,
1844 _hrrs_cleanup_task,
1845 true /* par */);
1846 double region_time = (os::elapsedTime() - start);
1847 _claimed_region_time += region_time;
1848 if (region_time > _max_region_time) {
1849 _max_region_time = region_time;
1850 }
1851 return false;
1852 }
1854 size_t max_live_bytes() { return _max_live_bytes; }
1855 uint regions_claimed() { return _regions_claimed; }
1856 double claimed_region_time_sec() { return _claimed_region_time; }
1857 double max_region_time_sec() { return _max_region_time; }
1858 };
1860 class G1ParNoteEndTask: public AbstractGangTask {
1861 friend class G1NoteEndOfConcMarkClosure;
1863 protected:
1864 G1CollectedHeap* _g1h;
1865 size_t _max_live_bytes;
1866 size_t _freed_bytes;
1867 FreeRegionList* _cleanup_list;
1869 public:
1870 G1ParNoteEndTask(G1CollectedHeap* g1h,
1871 FreeRegionList* cleanup_list) :
1872 AbstractGangTask("G1 note end"), _g1h(g1h),
1873 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1875 void work(uint worker_id) {
1876 double start = os::elapsedTime();
1877 FreeRegionList local_cleanup_list("Local Cleanup List");
1878 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1879 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1880 HRRSCleanupTask hrrs_cleanup_task;
1881 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1882 &old_proxy_set,
1883 &humongous_proxy_set,
1884 &hrrs_cleanup_task);
1885 if (G1CollectedHeap::use_parallel_gc_threads()) {
1886 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1887 _g1h->workers()->active_workers(),
1888 HeapRegion::NoteEndClaimValue);
1889 } else {
1890 _g1h->heap_region_iterate(&g1_note_end);
1891 }
1892 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1894 // Now update the lists
1895 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1896 NULL /* free_list */,
1897 &old_proxy_set,
1898 &humongous_proxy_set,
1899 true /* par */);
1900 {
1901 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1902 _max_live_bytes += g1_note_end.max_live_bytes();
1903 _freed_bytes += g1_note_end.freed_bytes();
1905 // If we iterate over the global cleanup list at the end of
1906 // cleanup to do this printing we will not guarantee to only
1907 // generate output for the newly-reclaimed regions (the list
1908 // might not be empty at the beginning of cleanup; we might
1909 // still be working on its previous contents). So we do the
1910 // printing here, before we append the new regions to the global
1911 // cleanup list.
1913 G1HRPrinter* hr_printer = _g1h->hr_printer();
1914 if (hr_printer->is_active()) {
1915 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1916 while (iter.more_available()) {
1917 HeapRegion* hr = iter.get_next();
1918 hr_printer->cleanup(hr);
1919 }
1920 }
1922 _cleanup_list->add_as_tail(&local_cleanup_list);
1923 assert(local_cleanup_list.is_empty(), "post-condition");
1925 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1926 }
1927 }
1928 size_t max_live_bytes() { return _max_live_bytes; }
1929 size_t freed_bytes() { return _freed_bytes; }
1930 };
1932 class G1ParScrubRemSetTask: public AbstractGangTask {
1933 protected:
1934 G1RemSet* _g1rs;
1935 BitMap* _region_bm;
1936 BitMap* _card_bm;
1937 public:
1938 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1939 BitMap* region_bm, BitMap* card_bm) :
1940 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1941 _region_bm(region_bm), _card_bm(card_bm) { }
1943 void work(uint worker_id) {
1944 if (G1CollectedHeap::use_parallel_gc_threads()) {
1945 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1946 HeapRegion::ScrubRemSetClaimValue);
1947 } else {
1948 _g1rs->scrub(_region_bm, _card_bm);
1949 }
1950 }
1952 };
1954 void ConcurrentMark::cleanup() {
1955 // world is stopped at this checkpoint
1956 assert(SafepointSynchronize::is_at_safepoint(),
1957 "world should be stopped");
1958 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1960 // If a full collection has happened, we shouldn't do this.
1961 if (has_aborted()) {
1962 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1963 return;
1964 }
1966 HRSPhaseSetter x(HRSPhaseCleanup);
1967 g1h->verify_region_sets_optional();
1969 if (VerifyDuringGC) {
1970 HandleMark hm; // handle scope
1971 gclog_or_tty->print(" VerifyDuringGC:(before)");
1972 Universe::heap()->prepare_for_verify();
1973 Universe::verify(/* silent */ false,
1974 /* option */ VerifyOption_G1UsePrevMarking);
1975 }
1977 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1978 g1p->record_concurrent_mark_cleanup_start();
1980 double start = os::elapsedTime();
1982 HeapRegionRemSet::reset_for_cleanup_tasks();
1984 uint n_workers;
1986 // Do counting once more with the world stopped for good measure.
1987 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1989 if (G1CollectedHeap::use_parallel_gc_threads()) {
1990 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1991 "sanity check");
1993 g1h->set_par_threads();
1994 n_workers = g1h->n_par_threads();
1995 assert(g1h->n_par_threads() == n_workers,
1996 "Should not have been reset");
1997 g1h->workers()->run_task(&g1_par_count_task);
1998 // Done with the parallel phase so reset to 0.
1999 g1h->set_par_threads(0);
2001 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2002 "sanity check");
2003 } else {
2004 n_workers = 1;
2005 g1_par_count_task.work(0);
2006 }
2008 if (VerifyDuringGC) {
2009 // Verify that the counting data accumulated during marking matches
2010 // that calculated by walking the marking bitmap.
2012 // Bitmaps to hold expected values
2013 BitMap expected_region_bm(_region_bm.size(), false);
2014 BitMap expected_card_bm(_card_bm.size(), false);
2016 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2017 &_region_bm,
2018 &_card_bm,
2019 &expected_region_bm,
2020 &expected_card_bm);
2022 if (G1CollectedHeap::use_parallel_gc_threads()) {
2023 g1h->set_par_threads((int)n_workers);
2024 g1h->workers()->run_task(&g1_par_verify_task);
2025 // Done with the parallel phase so reset to 0.
2026 g1h->set_par_threads(0);
2028 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2029 "sanity check");
2030 } else {
2031 g1_par_verify_task.work(0);
2032 }
2034 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2035 }
2037 size_t start_used_bytes = g1h->used();
2038 g1h->set_marking_complete();
2040 double count_end = os::elapsedTime();
2041 double this_final_counting_time = (count_end - start);
2042 _total_counting_time += this_final_counting_time;
2044 if (G1PrintRegionLivenessInfo) {
2045 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2046 _g1h->heap_region_iterate(&cl);
2047 }
2049 // Install newly created mark bitMap as "prev".
2050 swapMarkBitMaps();
2052 g1h->reset_gc_time_stamp();
2054 // Note end of marking in all heap regions.
2055 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2056 if (G1CollectedHeap::use_parallel_gc_threads()) {
2057 g1h->set_par_threads((int)n_workers);
2058 g1h->workers()->run_task(&g1_par_note_end_task);
2059 g1h->set_par_threads(0);
2061 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2062 "sanity check");
2063 } else {
2064 g1_par_note_end_task.work(0);
2065 }
2066 g1h->check_gc_time_stamps();
2068 if (!cleanup_list_is_empty()) {
2069 // The cleanup list is not empty, so we'll have to process it
2070 // concurrently. Notify anyone else that might be wanting free
2071 // regions that there will be more free regions coming soon.
2072 g1h->set_free_regions_coming();
2073 }
2075 // call below, since it affects the metric by which we sort the heap
2076 // regions.
2077 if (G1ScrubRemSets) {
2078 double rs_scrub_start = os::elapsedTime();
2079 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2080 if (G1CollectedHeap::use_parallel_gc_threads()) {
2081 g1h->set_par_threads((int)n_workers);
2082 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2083 g1h->set_par_threads(0);
2085 assert(g1h->check_heap_region_claim_values(
2086 HeapRegion::ScrubRemSetClaimValue),
2087 "sanity check");
2088 } else {
2089 g1_par_scrub_rs_task.work(0);
2090 }
2092 double rs_scrub_end = os::elapsedTime();
2093 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2094 _total_rs_scrub_time += this_rs_scrub_time;
2095 }
2097 // this will also free any regions totally full of garbage objects,
2098 // and sort the regions.
2099 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2101 // Statistics.
2102 double end = os::elapsedTime();
2103 _cleanup_times.add((end - start) * 1000.0);
2105 if (G1Log::fine()) {
2106 g1h->print_size_transition(gclog_or_tty,
2107 start_used_bytes,
2108 g1h->used(),
2109 g1h->capacity());
2110 }
2112 // Clean up will have freed any regions completely full of garbage.
2113 // Update the soft reference policy with the new heap occupancy.
2114 Universe::update_heap_info_at_gc();
2116 // We need to make this be a "collection" so any collection pause that
2117 // races with it goes around and waits for completeCleanup to finish.
2118 g1h->increment_total_collections();
2120 // We reclaimed old regions so we should calculate the sizes to make
2121 // sure we update the old gen/space data.
2122 g1h->g1mm()->update_sizes();
2124 if (VerifyDuringGC) {
2125 HandleMark hm; // handle scope
2126 gclog_or_tty->print(" VerifyDuringGC:(after)");
2127 Universe::heap()->prepare_for_verify();
2128 Universe::verify(/* silent */ false,
2129 /* option */ VerifyOption_G1UsePrevMarking);
2130 }
2132 g1h->verify_region_sets_optional();
2133 }
2135 void ConcurrentMark::completeCleanup() {
2136 if (has_aborted()) return;
2138 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2140 _cleanup_list.verify_optional();
2141 FreeRegionList tmp_free_list("Tmp Free List");
2143 if (G1ConcRegionFreeingVerbose) {
2144 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2145 "cleanup list has %u entries",
2146 _cleanup_list.length());
2147 }
2149 // Noone else should be accessing the _cleanup_list at this point,
2150 // so it's not necessary to take any locks
2151 while (!_cleanup_list.is_empty()) {
2152 HeapRegion* hr = _cleanup_list.remove_head();
2153 assert(hr != NULL, "the list was not empty");
2154 hr->par_clear();
2155 tmp_free_list.add_as_tail(hr);
2157 // Instead of adding one region at a time to the secondary_free_list,
2158 // we accumulate them in the local list and move them a few at a
2159 // time. This also cuts down on the number of notify_all() calls
2160 // we do during this process. We'll also append the local list when
2161 // _cleanup_list is empty (which means we just removed the last
2162 // region from the _cleanup_list).
2163 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2164 _cleanup_list.is_empty()) {
2165 if (G1ConcRegionFreeingVerbose) {
2166 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2167 "appending %u entries to the secondary_free_list, "
2168 "cleanup list still has %u entries",
2169 tmp_free_list.length(),
2170 _cleanup_list.length());
2171 }
2173 {
2174 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2175 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2176 SecondaryFreeList_lock->notify_all();
2177 }
2179 if (G1StressConcRegionFreeing) {
2180 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2181 os::sleep(Thread::current(), (jlong) 1, false);
2182 }
2183 }
2184 }
2185 }
2186 assert(tmp_free_list.is_empty(), "post-condition");
2187 }
2189 // Supporting Object and Oop closures for reference discovery
2190 // and processing in during marking
2192 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2193 HeapWord* addr = (HeapWord*)obj;
2194 return addr != NULL &&
2195 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2196 }
2198 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2199 // Uses the CMTask associated with a worker thread (for serial reference
2200 // processing the CMTask for worker 0 is used) to preserve (mark) and
2201 // trace referent objects.
2202 //
2203 // Using the CMTask and embedded local queues avoids having the worker
2204 // threads operating on the global mark stack. This reduces the risk
2205 // of overflowing the stack - which we would rather avoid at this late
2206 // state. Also using the tasks' local queues removes the potential
2207 // of the workers interfering with each other that could occur if
2208 // operating on the global stack.
2210 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2211 ConcurrentMark* _cm;
2212 CMTask* _task;
2213 int _ref_counter_limit;
2214 int _ref_counter;
2215 bool _is_serial;
2216 public:
2217 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2218 _cm(cm), _task(task), _is_serial(is_serial),
2219 _ref_counter_limit(G1RefProcDrainInterval) {
2220 assert(_ref_counter_limit > 0, "sanity");
2221 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2222 _ref_counter = _ref_counter_limit;
2223 }
2225 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2226 virtual void do_oop( oop* p) { do_oop_work(p); }
2228 template <class T> void do_oop_work(T* p) {
2229 if (!_cm->has_overflown()) {
2230 oop obj = oopDesc::load_decode_heap_oop(p);
2231 if (_cm->verbose_high()) {
2232 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2233 "*"PTR_FORMAT" = "PTR_FORMAT,
2234 _task->worker_id(), p, (void*) obj);
2235 }
2237 _task->deal_with_reference(obj);
2238 _ref_counter--;
2240 if (_ref_counter == 0) {
2241 // We have dealt with _ref_counter_limit references, pushing them
2242 // and objects reachable from them on to the local stack (and
2243 // possibly the global stack). Call CMTask::do_marking_step() to
2244 // process these entries.
2245 //
2246 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2247 // there's nothing more to do (i.e. we're done with the entries that
2248 // were pushed as a result of the CMTask::deal_with_reference() calls
2249 // above) or we overflow.
2250 //
2251 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2252 // flag while there may still be some work to do. (See the comment at
2253 // the beginning of CMTask::do_marking_step() for those conditions -
2254 // one of which is reaching the specified time target.) It is only
2255 // when CMTask::do_marking_step() returns without setting the
2256 // has_aborted() flag that the marking step has completed.
2257 do {
2258 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2259 _task->do_marking_step(mark_step_duration_ms,
2260 false /* do_termination */,
2261 _is_serial);
2262 } while (_task->has_aborted() && !_cm->has_overflown());
2263 _ref_counter = _ref_counter_limit;
2264 }
2265 } else {
2266 if (_cm->verbose_high()) {
2267 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2268 }
2269 }
2270 }
2271 };
2273 // 'Drain' oop closure used by both serial and parallel reference processing.
2274 // Uses the CMTask associated with a given worker thread (for serial
2275 // reference processing the CMtask for worker 0 is used). Calls the
2276 // do_marking_step routine, with an unbelievably large timeout value,
2277 // to drain the marking data structures of the remaining entries
2278 // added by the 'keep alive' oop closure above.
2280 class G1CMDrainMarkingStackClosure: public VoidClosure {
2281 ConcurrentMark* _cm;
2282 CMTask* _task;
2283 bool _is_serial;
2284 public:
2285 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2286 _cm(cm), _task(task), _is_serial(is_serial) {
2287 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2288 }
2290 void do_void() {
2291 do {
2292 if (_cm->verbose_high()) {
2293 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2294 _task->worker_id(), BOOL_TO_STR(_is_serial));
2295 }
2297 // We call CMTask::do_marking_step() to completely drain the local
2298 // and global marking stacks of entries pushed by the 'keep alive'
2299 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2300 //
2301 // CMTask::do_marking_step() is called in a loop, which we'll exit
2302 // if there's nothing more to do (i.e. we'completely drained the
2303 // entries that were pushed as a a result of applying the 'keep alive'
2304 // closure to the entries on the discovered ref lists) or we overflow
2305 // the global marking stack.
2306 //
2307 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2308 // flag while there may still be some work to do. (See the comment at
2309 // the beginning of CMTask::do_marking_step() for those conditions -
2310 // one of which is reaching the specified time target.) It is only
2311 // when CMTask::do_marking_step() returns without setting the
2312 // has_aborted() flag that the marking step has completed.
2314 _task->do_marking_step(1000000000.0 /* something very large */,
2315 true /* do_termination */,
2316 _is_serial);
2317 } while (_task->has_aborted() && !_cm->has_overflown());
2318 }
2319 };
2321 // Implementation of AbstractRefProcTaskExecutor for parallel
2322 // reference processing at the end of G1 concurrent marking
2324 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2325 private:
2326 G1CollectedHeap* _g1h;
2327 ConcurrentMark* _cm;
2328 WorkGang* _workers;
2329 int _active_workers;
2331 public:
2332 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2333 ConcurrentMark* cm,
2334 WorkGang* workers,
2335 int n_workers) :
2336 _g1h(g1h), _cm(cm),
2337 _workers(workers), _active_workers(n_workers) { }
2339 // Executes the given task using concurrent marking worker threads.
2340 virtual void execute(ProcessTask& task);
2341 virtual void execute(EnqueueTask& task);
2342 };
2344 class G1CMRefProcTaskProxy: public AbstractGangTask {
2345 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2346 ProcessTask& _proc_task;
2347 G1CollectedHeap* _g1h;
2348 ConcurrentMark* _cm;
2350 public:
2351 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2352 G1CollectedHeap* g1h,
2353 ConcurrentMark* cm) :
2354 AbstractGangTask("Process reference objects in parallel"),
2355 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2356 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2357 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2358 }
2360 virtual void work(uint worker_id) {
2361 CMTask* task = _cm->task(worker_id);
2362 G1CMIsAliveClosure g1_is_alive(_g1h);
2363 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2364 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2366 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2367 }
2368 };
2370 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2371 assert(_workers != NULL, "Need parallel worker threads.");
2372 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2374 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2376 // We need to reset the concurrency level before each
2377 // proxy task execution, so that the termination protocol
2378 // and overflow handling in CMTask::do_marking_step() knows
2379 // how many workers to wait for.
2380 _cm->set_concurrency(_active_workers);
2381 _g1h->set_par_threads(_active_workers);
2382 _workers->run_task(&proc_task_proxy);
2383 _g1h->set_par_threads(0);
2384 }
2386 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2387 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2388 EnqueueTask& _enq_task;
2390 public:
2391 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2392 AbstractGangTask("Enqueue reference objects in parallel"),
2393 _enq_task(enq_task) { }
2395 virtual void work(uint worker_id) {
2396 _enq_task.work(worker_id);
2397 }
2398 };
2400 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2401 assert(_workers != NULL, "Need parallel worker threads.");
2402 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2404 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2406 // Not strictly necessary but...
2407 //
2408 // We need to reset the concurrency level before each
2409 // proxy task execution, so that the termination protocol
2410 // and overflow handling in CMTask::do_marking_step() knows
2411 // how many workers to wait for.
2412 _cm->set_concurrency(_active_workers);
2413 _g1h->set_par_threads(_active_workers);
2414 _workers->run_task(&enq_task_proxy);
2415 _g1h->set_par_threads(0);
2416 }
2418 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2419 if (has_overflown()) {
2420 // Skip processing the discovered references if we have
2421 // overflown the global marking stack. Reference objects
2422 // only get discovered once so it is OK to not
2423 // de-populate the discovered reference lists. We could have,
2424 // but the only benefit would be that, when marking restarts,
2425 // less reference objects are discovered.
2426 return;
2427 }
2429 ResourceMark rm;
2430 HandleMark hm;
2432 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2434 // Is alive closure.
2435 G1CMIsAliveClosure g1_is_alive(g1h);
2437 // Inner scope to exclude the cleaning of the string and symbol
2438 // tables from the displayed time.
2439 {
2440 if (G1Log::finer()) {
2441 gclog_or_tty->put(' ');
2442 }
2443 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2445 ReferenceProcessor* rp = g1h->ref_processor_cm();
2447 // See the comment in G1CollectedHeap::ref_processing_init()
2448 // about how reference processing currently works in G1.
2450 // Set the soft reference policy
2451 rp->setup_policy(clear_all_soft_refs);
2452 assert(_markStack.isEmpty(), "mark stack should be empty");
2454 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2455 // in serial reference processing. Note these closures are also
2456 // used for serially processing (by the the current thread) the
2457 // JNI references during parallel reference processing.
2458 //
2459 // These closures do not need to synchronize with the worker
2460 // threads involved in parallel reference processing as these
2461 // instances are executed serially by the current thread (e.g.
2462 // reference processing is not multi-threaded and is thus
2463 // performed by the current thread instead of a gang worker).
2464 //
2465 // The gang tasks involved in parallel reference procssing create
2466 // their own instances of these closures, which do their own
2467 // synchronization among themselves.
2468 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2469 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2471 // We need at least one active thread. If reference processing
2472 // is not multi-threaded we use the current (VMThread) thread,
2473 // otherwise we use the work gang from the G1CollectedHeap and
2474 // we utilize all the worker threads we can.
2475 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2476 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2477 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2479 // Parallel processing task executor.
2480 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2481 g1h->workers(), active_workers);
2482 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2484 // Set the concurrency level. The phase was already set prior to
2485 // executing the remark task.
2486 set_concurrency(active_workers);
2488 // Set the degree of MT processing here. If the discovery was done MT,
2489 // the number of threads involved during discovery could differ from
2490 // the number of active workers. This is OK as long as the discovered
2491 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2492 rp->set_active_mt_degree(active_workers);
2494 // Process the weak references.
2495 rp->process_discovered_references(&g1_is_alive,
2496 &g1_keep_alive,
2497 &g1_drain_mark_stack,
2498 executor);
2500 // The do_oop work routines of the keep_alive and drain_marking_stack
2501 // oop closures will set the has_overflown flag if we overflow the
2502 // global marking stack.
2504 assert(_markStack.overflow() || _markStack.isEmpty(),
2505 "mark stack should be empty (unless it overflowed)");
2507 if (_markStack.overflow()) {
2508 // This should have been done already when we tried to push an
2509 // entry on to the global mark stack. But let's do it again.
2510 set_has_overflown();
2511 }
2513 assert(rp->num_q() == active_workers, "why not");
2515 rp->enqueue_discovered_references(executor);
2517 rp->verify_no_references_recorded();
2518 assert(!rp->discovery_enabled(), "Post condition");
2519 }
2521 // Now clean up stale oops in StringTable
2522 StringTable::unlink(&g1_is_alive);
2523 // Clean up unreferenced symbols in symbol table.
2524 SymbolTable::unlink();
2525 }
2527 void ConcurrentMark::swapMarkBitMaps() {
2528 CMBitMapRO* temp = _prevMarkBitMap;
2529 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2530 _nextMarkBitMap = (CMBitMap*) temp;
2531 }
2533 class CMRemarkTask: public AbstractGangTask {
2534 private:
2535 ConcurrentMark* _cm;
2536 bool _is_serial;
2537 public:
2538 void work(uint worker_id) {
2539 // Since all available tasks are actually started, we should
2540 // only proceed if we're supposed to be actived.
2541 if (worker_id < _cm->active_tasks()) {
2542 CMTask* task = _cm->task(worker_id);
2543 task->record_start_time();
2544 do {
2545 task->do_marking_step(1000000000.0 /* something very large */,
2546 true /* do_termination */,
2547 _is_serial);
2548 } while (task->has_aborted() && !_cm->has_overflown());
2549 // If we overflow, then we do not want to restart. We instead
2550 // want to abort remark and do concurrent marking again.
2551 task->record_end_time();
2552 }
2553 }
2555 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2556 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2557 _cm->terminator()->reset_for_reuse(active_workers);
2558 }
2559 };
2561 void ConcurrentMark::checkpointRootsFinalWork() {
2562 ResourceMark rm;
2563 HandleMark hm;
2564 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2566 g1h->ensure_parsability(false);
2568 if (G1CollectedHeap::use_parallel_gc_threads()) {
2569 G1CollectedHeap::StrongRootsScope srs(g1h);
2570 // this is remark, so we'll use up all active threads
2571 uint active_workers = g1h->workers()->active_workers();
2572 if (active_workers == 0) {
2573 assert(active_workers > 0, "Should have been set earlier");
2574 active_workers = (uint) ParallelGCThreads;
2575 g1h->workers()->set_active_workers(active_workers);
2576 }
2577 set_concurrency_and_phase(active_workers, false /* concurrent */);
2578 // Leave _parallel_marking_threads at it's
2579 // value originally calculated in the ConcurrentMark
2580 // constructor and pass values of the active workers
2581 // through the gang in the task.
2583 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2584 // We will start all available threads, even if we decide that the
2585 // active_workers will be fewer. The extra ones will just bail out
2586 // immediately.
2587 g1h->set_par_threads(active_workers);
2588 g1h->workers()->run_task(&remarkTask);
2589 g1h->set_par_threads(0);
2590 } else {
2591 G1CollectedHeap::StrongRootsScope srs(g1h);
2592 uint active_workers = 1;
2593 set_concurrency_and_phase(active_workers, false /* concurrent */);
2595 // Note - if there's no work gang then the VMThread will be
2596 // the thread to execute the remark - serially. We have
2597 // to pass true for the is_serial parameter so that
2598 // CMTask::do_marking_step() doesn't enter the sync
2599 // barriers in the event of an overflow. Doing so will
2600 // cause an assert that the current thread is not a
2601 // concurrent GC thread.
2602 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2603 remarkTask.work(0);
2604 }
2605 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2606 guarantee(has_overflown() ||
2607 satb_mq_set.completed_buffers_num() == 0,
2608 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2609 BOOL_TO_STR(has_overflown()),
2610 satb_mq_set.completed_buffers_num()));
2612 print_stats();
2613 }
2615 #ifndef PRODUCT
2617 class PrintReachableOopClosure: public OopClosure {
2618 private:
2619 G1CollectedHeap* _g1h;
2620 outputStream* _out;
2621 VerifyOption _vo;
2622 bool _all;
2624 public:
2625 PrintReachableOopClosure(outputStream* out,
2626 VerifyOption vo,
2627 bool all) :
2628 _g1h(G1CollectedHeap::heap()),
2629 _out(out), _vo(vo), _all(all) { }
2631 void do_oop(narrowOop* p) { do_oop_work(p); }
2632 void do_oop( oop* p) { do_oop_work(p); }
2634 template <class T> void do_oop_work(T* p) {
2635 oop obj = oopDesc::load_decode_heap_oop(p);
2636 const char* str = NULL;
2637 const char* str2 = "";
2639 if (obj == NULL) {
2640 str = "";
2641 } else if (!_g1h->is_in_g1_reserved(obj)) {
2642 str = " O";
2643 } else {
2644 HeapRegion* hr = _g1h->heap_region_containing(obj);
2645 guarantee(hr != NULL, "invariant");
2646 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2647 bool marked = _g1h->is_marked(obj, _vo);
2649 if (over_tams) {
2650 str = " >";
2651 if (marked) {
2652 str2 = " AND MARKED";
2653 }
2654 } else if (marked) {
2655 str = " M";
2656 } else {
2657 str = " NOT";
2658 }
2659 }
2661 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2662 p, (void*) obj, str, str2);
2663 }
2664 };
2666 class PrintReachableObjectClosure : public ObjectClosure {
2667 private:
2668 G1CollectedHeap* _g1h;
2669 outputStream* _out;
2670 VerifyOption _vo;
2671 bool _all;
2672 HeapRegion* _hr;
2674 public:
2675 PrintReachableObjectClosure(outputStream* out,
2676 VerifyOption vo,
2677 bool all,
2678 HeapRegion* hr) :
2679 _g1h(G1CollectedHeap::heap()),
2680 _out(out), _vo(vo), _all(all), _hr(hr) { }
2682 void do_object(oop o) {
2683 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2684 bool marked = _g1h->is_marked(o, _vo);
2685 bool print_it = _all || over_tams || marked;
2687 if (print_it) {
2688 _out->print_cr(" "PTR_FORMAT"%s",
2689 o, (over_tams) ? " >" : (marked) ? " M" : "");
2690 PrintReachableOopClosure oopCl(_out, _vo, _all);
2691 o->oop_iterate_no_header(&oopCl);
2692 }
2693 }
2694 };
2696 class PrintReachableRegionClosure : public HeapRegionClosure {
2697 private:
2698 G1CollectedHeap* _g1h;
2699 outputStream* _out;
2700 VerifyOption _vo;
2701 bool _all;
2703 public:
2704 bool doHeapRegion(HeapRegion* hr) {
2705 HeapWord* b = hr->bottom();
2706 HeapWord* e = hr->end();
2707 HeapWord* t = hr->top();
2708 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2709 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2710 "TAMS: "PTR_FORMAT, b, e, t, p);
2711 _out->cr();
2713 HeapWord* from = b;
2714 HeapWord* to = t;
2716 if (to > from) {
2717 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2718 _out->cr();
2719 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2720 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2721 _out->cr();
2722 }
2724 return false;
2725 }
2727 PrintReachableRegionClosure(outputStream* out,
2728 VerifyOption vo,
2729 bool all) :
2730 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2731 };
2733 void ConcurrentMark::print_reachable(const char* str,
2734 VerifyOption vo,
2735 bool all) {
2736 gclog_or_tty->cr();
2737 gclog_or_tty->print_cr("== Doing heap dump... ");
2739 if (G1PrintReachableBaseFile == NULL) {
2740 gclog_or_tty->print_cr(" #### error: no base file defined");
2741 return;
2742 }
2744 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2745 (JVM_MAXPATHLEN - 1)) {
2746 gclog_or_tty->print_cr(" #### error: file name too long");
2747 return;
2748 }
2750 char file_name[JVM_MAXPATHLEN];
2751 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2752 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2754 fileStream fout(file_name);
2755 if (!fout.is_open()) {
2756 gclog_or_tty->print_cr(" #### error: could not open file");
2757 return;
2758 }
2760 outputStream* out = &fout;
2761 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2762 out->cr();
2764 out->print_cr("--- ITERATING OVER REGIONS");
2765 out->cr();
2766 PrintReachableRegionClosure rcl(out, vo, all);
2767 _g1h->heap_region_iterate(&rcl);
2768 out->cr();
2770 gclog_or_tty->print_cr(" done");
2771 gclog_or_tty->flush();
2772 }
2774 #endif // PRODUCT
2776 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2777 // Note we are overriding the read-only view of the prev map here, via
2778 // the cast.
2779 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2780 }
2782 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2783 _nextMarkBitMap->clearRange(mr);
2784 }
2786 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2787 clearRangePrevBitmap(mr);
2788 clearRangeNextBitmap(mr);
2789 }
2791 HeapRegion*
2792 ConcurrentMark::claim_region(uint worker_id) {
2793 // "checkpoint" the finger
2794 HeapWord* finger = _finger;
2796 // _heap_end will not change underneath our feet; it only changes at
2797 // yield points.
2798 while (finger < _heap_end) {
2799 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2801 // Note on how this code handles humongous regions. In the
2802 // normal case the finger will reach the start of a "starts
2803 // humongous" (SH) region. Its end will either be the end of the
2804 // last "continues humongous" (CH) region in the sequence, or the
2805 // standard end of the SH region (if the SH is the only region in
2806 // the sequence). That way claim_region() will skip over the CH
2807 // regions. However, there is a subtle race between a CM thread
2808 // executing this method and a mutator thread doing a humongous
2809 // object allocation. The two are not mutually exclusive as the CM
2810 // thread does not need to hold the Heap_lock when it gets
2811 // here. So there is a chance that claim_region() will come across
2812 // a free region that's in the progress of becoming a SH or a CH
2813 // region. In the former case, it will either
2814 // a) Miss the update to the region's end, in which case it will
2815 // visit every subsequent CH region, will find their bitmaps
2816 // empty, and do nothing, or
2817 // b) Will observe the update of the region's end (in which case
2818 // it will skip the subsequent CH regions).
2819 // If it comes across a region that suddenly becomes CH, the
2820 // scenario will be similar to b). So, the race between
2821 // claim_region() and a humongous object allocation might force us
2822 // to do a bit of unnecessary work (due to some unnecessary bitmap
2823 // iterations) but it should not introduce and correctness issues.
2824 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2825 HeapWord* bottom = curr_region->bottom();
2826 HeapWord* end = curr_region->end();
2827 HeapWord* limit = curr_region->next_top_at_mark_start();
2829 if (verbose_low()) {
2830 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2831 "["PTR_FORMAT", "PTR_FORMAT"), "
2832 "limit = "PTR_FORMAT,
2833 worker_id, curr_region, bottom, end, limit);
2834 }
2836 // Is the gap between reading the finger and doing the CAS too long?
2837 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2838 if (res == finger) {
2839 // we succeeded
2841 // notice that _finger == end cannot be guaranteed here since,
2842 // someone else might have moved the finger even further
2843 assert(_finger >= end, "the finger should have moved forward");
2845 if (verbose_low()) {
2846 gclog_or_tty->print_cr("[%u] we were successful with region = "
2847 PTR_FORMAT, worker_id, curr_region);
2848 }
2850 if (limit > bottom) {
2851 if (verbose_low()) {
2852 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2853 "returning it ", worker_id, curr_region);
2854 }
2855 return curr_region;
2856 } else {
2857 assert(limit == bottom,
2858 "the region limit should be at bottom");
2859 if (verbose_low()) {
2860 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2861 "returning NULL", worker_id, curr_region);
2862 }
2863 // we return NULL and the caller should try calling
2864 // claim_region() again.
2865 return NULL;
2866 }
2867 } else {
2868 assert(_finger > finger, "the finger should have moved forward");
2869 if (verbose_low()) {
2870 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2871 "global finger = "PTR_FORMAT", "
2872 "our finger = "PTR_FORMAT,
2873 worker_id, _finger, finger);
2874 }
2876 // read it again
2877 finger = _finger;
2878 }
2879 }
2881 return NULL;
2882 }
2884 #ifndef PRODUCT
2885 enum VerifyNoCSetOopsPhase {
2886 VerifyNoCSetOopsStack,
2887 VerifyNoCSetOopsQueues,
2888 VerifyNoCSetOopsSATBCompleted,
2889 VerifyNoCSetOopsSATBThread
2890 };
2892 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2893 private:
2894 G1CollectedHeap* _g1h;
2895 VerifyNoCSetOopsPhase _phase;
2896 int _info;
2898 const char* phase_str() {
2899 switch (_phase) {
2900 case VerifyNoCSetOopsStack: return "Stack";
2901 case VerifyNoCSetOopsQueues: return "Queue";
2902 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2903 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2904 default: ShouldNotReachHere();
2905 }
2906 return NULL;
2907 }
2909 void do_object_work(oop obj) {
2910 guarantee(!_g1h->obj_in_cs(obj),
2911 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2912 (void*) obj, phase_str(), _info));
2913 }
2915 public:
2916 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2918 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2919 _phase = phase;
2920 _info = info;
2921 }
2923 virtual void do_oop(oop* p) {
2924 oop obj = oopDesc::load_decode_heap_oop(p);
2925 do_object_work(obj);
2926 }
2928 virtual void do_oop(narrowOop* p) {
2929 // We should not come across narrow oops while scanning marking
2930 // stacks and SATB buffers.
2931 ShouldNotReachHere();
2932 }
2934 virtual void do_object(oop obj) {
2935 do_object_work(obj);
2936 }
2937 };
2939 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2940 bool verify_enqueued_buffers,
2941 bool verify_thread_buffers,
2942 bool verify_fingers) {
2943 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2944 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2945 return;
2946 }
2948 VerifyNoCSetOopsClosure cl;
2950 if (verify_stacks) {
2951 // Verify entries on the global mark stack
2952 cl.set_phase(VerifyNoCSetOopsStack);
2953 _markStack.oops_do(&cl);
2955 // Verify entries on the task queues
2956 for (uint i = 0; i < _max_worker_id; i += 1) {
2957 cl.set_phase(VerifyNoCSetOopsQueues, i);
2958 CMTaskQueue* queue = _task_queues->queue(i);
2959 queue->oops_do(&cl);
2960 }
2961 }
2963 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2965 // Verify entries on the enqueued SATB buffers
2966 if (verify_enqueued_buffers) {
2967 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2968 satb_qs.iterate_completed_buffers_read_only(&cl);
2969 }
2971 // Verify entries on the per-thread SATB buffers
2972 if (verify_thread_buffers) {
2973 cl.set_phase(VerifyNoCSetOopsSATBThread);
2974 satb_qs.iterate_thread_buffers_read_only(&cl);
2975 }
2977 if (verify_fingers) {
2978 // Verify the global finger
2979 HeapWord* global_finger = finger();
2980 if (global_finger != NULL && global_finger < _heap_end) {
2981 // The global finger always points to a heap region boundary. We
2982 // use heap_region_containing_raw() to get the containing region
2983 // given that the global finger could be pointing to a free region
2984 // which subsequently becomes continues humongous. If that
2985 // happens, heap_region_containing() will return the bottom of the
2986 // corresponding starts humongous region and the check below will
2987 // not hold any more.
2988 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2989 guarantee(global_finger == global_hr->bottom(),
2990 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2991 global_finger, HR_FORMAT_PARAMS(global_hr)));
2992 }
2994 // Verify the task fingers
2995 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2996 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2997 CMTask* task = _tasks[i];
2998 HeapWord* task_finger = task->finger();
2999 if (task_finger != NULL && task_finger < _heap_end) {
3000 // See above note on the global finger verification.
3001 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3002 guarantee(task_finger == task_hr->bottom() ||
3003 !task_hr->in_collection_set(),
3004 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3005 task_finger, HR_FORMAT_PARAMS(task_hr)));
3006 }
3007 }
3008 }
3009 }
3010 #endif // PRODUCT
3012 // Aggregate the counting data that was constructed concurrently
3013 // with marking.
3014 class AggregateCountDataHRClosure: public HeapRegionClosure {
3015 G1CollectedHeap* _g1h;
3016 ConcurrentMark* _cm;
3017 CardTableModRefBS* _ct_bs;
3018 BitMap* _cm_card_bm;
3019 uint _max_worker_id;
3021 public:
3022 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3023 BitMap* cm_card_bm,
3024 uint max_worker_id) :
3025 _g1h(g1h), _cm(g1h->concurrent_mark()),
3026 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3027 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3029 bool doHeapRegion(HeapRegion* hr) {
3030 if (hr->continuesHumongous()) {
3031 // We will ignore these here and process them when their
3032 // associated "starts humongous" region is processed.
3033 // Note that we cannot rely on their associated
3034 // "starts humongous" region to have their bit set to 1
3035 // since, due to the region chunking in the parallel region
3036 // iteration, a "continues humongous" region might be visited
3037 // before its associated "starts humongous".
3038 return false;
3039 }
3041 HeapWord* start = hr->bottom();
3042 HeapWord* limit = hr->next_top_at_mark_start();
3043 HeapWord* end = hr->end();
3045 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3046 err_msg("Preconditions not met - "
3047 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3048 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3049 start, limit, hr->top(), hr->end()));
3051 assert(hr->next_marked_bytes() == 0, "Precondition");
3053 if (start == limit) {
3054 // NTAMS of this region has not been set so nothing to do.
3055 return false;
3056 }
3058 // 'start' should be in the heap.
3059 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3060 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3061 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3063 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3064 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3065 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3067 // If ntams is not card aligned then we bump card bitmap index
3068 // for limit so that we get the all the cards spanned by
3069 // the object ending at ntams.
3070 // Note: if this is the last region in the heap then ntams
3071 // could be actually just beyond the end of the the heap;
3072 // limit_idx will then correspond to a (non-existent) card
3073 // that is also outside the heap.
3074 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3075 limit_idx += 1;
3076 }
3078 assert(limit_idx <= end_idx, "or else use atomics");
3080 // Aggregate the "stripe" in the count data associated with hr.
3081 uint hrs_index = hr->hrs_index();
3082 size_t marked_bytes = 0;
3084 for (uint i = 0; i < _max_worker_id; i += 1) {
3085 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3086 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3088 // Fetch the marked_bytes in this region for task i and
3089 // add it to the running total for this region.
3090 marked_bytes += marked_bytes_array[hrs_index];
3092 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3093 // into the global card bitmap.
3094 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3096 while (scan_idx < limit_idx) {
3097 assert(task_card_bm->at(scan_idx) == true, "should be");
3098 _cm_card_bm->set_bit(scan_idx);
3099 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3101 // BitMap::get_next_one_offset() can handle the case when
3102 // its left_offset parameter is greater than its right_offset
3103 // parameter. It does, however, have an early exit if
3104 // left_offset == right_offset. So let's limit the value
3105 // passed in for left offset here.
3106 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3107 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3108 }
3109 }
3111 // Update the marked bytes for this region.
3112 hr->add_to_marked_bytes(marked_bytes);
3114 // Next heap region
3115 return false;
3116 }
3117 };
3119 class G1AggregateCountDataTask: public AbstractGangTask {
3120 protected:
3121 G1CollectedHeap* _g1h;
3122 ConcurrentMark* _cm;
3123 BitMap* _cm_card_bm;
3124 uint _max_worker_id;
3125 int _active_workers;
3127 public:
3128 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3129 ConcurrentMark* cm,
3130 BitMap* cm_card_bm,
3131 uint max_worker_id,
3132 int n_workers) :
3133 AbstractGangTask("Count Aggregation"),
3134 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3135 _max_worker_id(max_worker_id),
3136 _active_workers(n_workers) { }
3138 void work(uint worker_id) {
3139 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3141 if (G1CollectedHeap::use_parallel_gc_threads()) {
3142 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3143 _active_workers,
3144 HeapRegion::AggregateCountClaimValue);
3145 } else {
3146 _g1h->heap_region_iterate(&cl);
3147 }
3148 }
3149 };
3152 void ConcurrentMark::aggregate_count_data() {
3153 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3154 _g1h->workers()->active_workers() :
3155 1);
3157 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3158 _max_worker_id, n_workers);
3160 if (G1CollectedHeap::use_parallel_gc_threads()) {
3161 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3162 "sanity check");
3163 _g1h->set_par_threads(n_workers);
3164 _g1h->workers()->run_task(&g1_par_agg_task);
3165 _g1h->set_par_threads(0);
3167 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3168 "sanity check");
3169 _g1h->reset_heap_region_claim_values();
3170 } else {
3171 g1_par_agg_task.work(0);
3172 }
3173 }
3175 // Clear the per-worker arrays used to store the per-region counting data
3176 void ConcurrentMark::clear_all_count_data() {
3177 // Clear the global card bitmap - it will be filled during
3178 // liveness count aggregation (during remark) and the
3179 // final counting task.
3180 _card_bm.clear();
3182 // Clear the global region bitmap - it will be filled as part
3183 // of the final counting task.
3184 _region_bm.clear();
3186 uint max_regions = _g1h->max_regions();
3187 assert(_max_worker_id > 0, "uninitialized");
3189 for (uint i = 0; i < _max_worker_id; i += 1) {
3190 BitMap* task_card_bm = count_card_bitmap_for(i);
3191 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3193 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3194 assert(marked_bytes_array != NULL, "uninitialized");
3196 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3197 task_card_bm->clear();
3198 }
3199 }
3201 void ConcurrentMark::print_stats() {
3202 if (verbose_stats()) {
3203 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3204 for (size_t i = 0; i < _active_tasks; ++i) {
3205 _tasks[i]->print_stats();
3206 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3207 }
3208 }
3209 }
3211 // abandon current marking iteration due to a Full GC
3212 void ConcurrentMark::abort() {
3213 // Clear all marks to force marking thread to do nothing
3214 _nextMarkBitMap->clearAll();
3215 // Clear the liveness counting data
3216 clear_all_count_data();
3217 // Empty mark stack
3218 reset_marking_state();
3219 for (uint i = 0; i < _max_worker_id; ++i) {
3220 _tasks[i]->clear_region_fields();
3221 }
3222 _has_aborted = true;
3224 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3225 satb_mq_set.abandon_partial_marking();
3226 // This can be called either during or outside marking, we'll read
3227 // the expected_active value from the SATB queue set.
3228 satb_mq_set.set_active_all_threads(
3229 false, /* new active value */
3230 satb_mq_set.is_active() /* expected_active */);
3231 }
3233 static void print_ms_time_info(const char* prefix, const char* name,
3234 NumberSeq& ns) {
3235 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3236 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3237 if (ns.num() > 0) {
3238 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3239 prefix, ns.sd(), ns.maximum());
3240 }
3241 }
3243 void ConcurrentMark::print_summary_info() {
3244 gclog_or_tty->print_cr(" Concurrent marking:");
3245 print_ms_time_info(" ", "init marks", _init_times);
3246 print_ms_time_info(" ", "remarks", _remark_times);
3247 {
3248 print_ms_time_info(" ", "final marks", _remark_mark_times);
3249 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3251 }
3252 print_ms_time_info(" ", "cleanups", _cleanup_times);
3253 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3254 _total_counting_time,
3255 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3256 (double)_cleanup_times.num()
3257 : 0.0));
3258 if (G1ScrubRemSets) {
3259 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3260 _total_rs_scrub_time,
3261 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3262 (double)_cleanup_times.num()
3263 : 0.0));
3264 }
3265 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3266 (_init_times.sum() + _remark_times.sum() +
3267 _cleanup_times.sum())/1000.0);
3268 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3269 "(%8.2f s marking).",
3270 cmThread()->vtime_accum(),
3271 cmThread()->vtime_mark_accum());
3272 }
3274 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3275 if (use_parallel_marking_threads()) {
3276 _parallel_workers->print_worker_threads_on(st);
3277 }
3278 }
3280 // We take a break if someone is trying to stop the world.
3281 bool ConcurrentMark::do_yield_check(uint worker_id) {
3282 if (should_yield()) {
3283 if (worker_id == 0) {
3284 _g1h->g1_policy()->record_concurrent_pause();
3285 }
3286 cmThread()->yield();
3287 return true;
3288 } else {
3289 return false;
3290 }
3291 }
3293 bool ConcurrentMark::should_yield() {
3294 return cmThread()->should_yield();
3295 }
3297 bool ConcurrentMark::containing_card_is_marked(void* p) {
3298 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3299 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3300 }
3302 bool ConcurrentMark::containing_cards_are_marked(void* start,
3303 void* last) {
3304 return containing_card_is_marked(start) &&
3305 containing_card_is_marked(last);
3306 }
3308 #ifndef PRODUCT
3309 // for debugging purposes
3310 void ConcurrentMark::print_finger() {
3311 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3312 _heap_start, _heap_end, _finger);
3313 for (uint i = 0; i < _max_worker_id; ++i) {
3314 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3315 }
3316 gclog_or_tty->print_cr("");
3317 }
3318 #endif
3320 void CMTask::scan_object(oop obj) {
3321 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3323 if (_cm->verbose_high()) {
3324 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3325 _worker_id, (void*) obj);
3326 }
3328 size_t obj_size = obj->size();
3329 _words_scanned += obj_size;
3331 obj->oop_iterate(_cm_oop_closure);
3332 statsOnly( ++_objs_scanned );
3333 check_limits();
3334 }
3336 // Closure for iteration over bitmaps
3337 class CMBitMapClosure : public BitMapClosure {
3338 private:
3339 // the bitmap that is being iterated over
3340 CMBitMap* _nextMarkBitMap;
3341 ConcurrentMark* _cm;
3342 CMTask* _task;
3344 public:
3345 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3346 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3348 bool do_bit(size_t offset) {
3349 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3350 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3351 assert( addr < _cm->finger(), "invariant");
3353 statsOnly( _task->increase_objs_found_on_bitmap() );
3354 assert(addr >= _task->finger(), "invariant");
3356 // We move that task's local finger along.
3357 _task->move_finger_to(addr);
3359 _task->scan_object(oop(addr));
3360 // we only partially drain the local queue and global stack
3361 _task->drain_local_queue(true);
3362 _task->drain_global_stack(true);
3364 // if the has_aborted flag has been raised, we need to bail out of
3365 // the iteration
3366 return !_task->has_aborted();
3367 }
3368 };
3370 // Closure for iterating over objects, currently only used for
3371 // processing SATB buffers.
3372 class CMObjectClosure : public ObjectClosure {
3373 private:
3374 CMTask* _task;
3376 public:
3377 void do_object(oop obj) {
3378 _task->deal_with_reference(obj);
3379 }
3381 CMObjectClosure(CMTask* task) : _task(task) { }
3382 };
3384 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3385 ConcurrentMark* cm,
3386 CMTask* task)
3387 : _g1h(g1h), _cm(cm), _task(task) {
3388 assert(_ref_processor == NULL, "should be initialized to NULL");
3390 if (G1UseConcMarkReferenceProcessing) {
3391 _ref_processor = g1h->ref_processor_cm();
3392 assert(_ref_processor != NULL, "should not be NULL");
3393 }
3394 }
3396 void CMTask::setup_for_region(HeapRegion* hr) {
3397 // Separated the asserts so that we know which one fires.
3398 assert(hr != NULL,
3399 "claim_region() should have filtered out continues humongous regions");
3400 assert(!hr->continuesHumongous(),
3401 "claim_region() should have filtered out continues humongous regions");
3403 if (_cm->verbose_low()) {
3404 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3405 _worker_id, hr);
3406 }
3408 _curr_region = hr;
3409 _finger = hr->bottom();
3410 update_region_limit();
3411 }
3413 void CMTask::update_region_limit() {
3414 HeapRegion* hr = _curr_region;
3415 HeapWord* bottom = hr->bottom();
3416 HeapWord* limit = hr->next_top_at_mark_start();
3418 if (limit == bottom) {
3419 if (_cm->verbose_low()) {
3420 gclog_or_tty->print_cr("[%u] found an empty region "
3421 "["PTR_FORMAT", "PTR_FORMAT")",
3422 _worker_id, bottom, limit);
3423 }
3424 // The region was collected underneath our feet.
3425 // We set the finger to bottom to ensure that the bitmap
3426 // iteration that will follow this will not do anything.
3427 // (this is not a condition that holds when we set the region up,
3428 // as the region is not supposed to be empty in the first place)
3429 _finger = bottom;
3430 } else if (limit >= _region_limit) {
3431 assert(limit >= _finger, "peace of mind");
3432 } else {
3433 assert(limit < _region_limit, "only way to get here");
3434 // This can happen under some pretty unusual circumstances. An
3435 // evacuation pause empties the region underneath our feet (NTAMS
3436 // at bottom). We then do some allocation in the region (NTAMS
3437 // stays at bottom), followed by the region being used as a GC
3438 // alloc region (NTAMS will move to top() and the objects
3439 // originally below it will be grayed). All objects now marked in
3440 // the region are explicitly grayed, if below the global finger,
3441 // and we do not need in fact to scan anything else. So, we simply
3442 // set _finger to be limit to ensure that the bitmap iteration
3443 // doesn't do anything.
3444 _finger = limit;
3445 }
3447 _region_limit = limit;
3448 }
3450 void CMTask::giveup_current_region() {
3451 assert(_curr_region != NULL, "invariant");
3452 if (_cm->verbose_low()) {
3453 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3454 _worker_id, _curr_region);
3455 }
3456 clear_region_fields();
3457 }
3459 void CMTask::clear_region_fields() {
3460 // Values for these three fields that indicate that we're not
3461 // holding on to a region.
3462 _curr_region = NULL;
3463 _finger = NULL;
3464 _region_limit = NULL;
3465 }
3467 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3468 if (cm_oop_closure == NULL) {
3469 assert(_cm_oop_closure != NULL, "invariant");
3470 } else {
3471 assert(_cm_oop_closure == NULL, "invariant");
3472 }
3473 _cm_oop_closure = cm_oop_closure;
3474 }
3476 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3477 guarantee(nextMarkBitMap != NULL, "invariant");
3479 if (_cm->verbose_low()) {
3480 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3481 }
3483 _nextMarkBitMap = nextMarkBitMap;
3484 clear_region_fields();
3486 _calls = 0;
3487 _elapsed_time_ms = 0.0;
3488 _termination_time_ms = 0.0;
3489 _termination_start_time_ms = 0.0;
3491 #if _MARKING_STATS_
3492 _local_pushes = 0;
3493 _local_pops = 0;
3494 _local_max_size = 0;
3495 _objs_scanned = 0;
3496 _global_pushes = 0;
3497 _global_pops = 0;
3498 _global_max_size = 0;
3499 _global_transfers_to = 0;
3500 _global_transfers_from = 0;
3501 _regions_claimed = 0;
3502 _objs_found_on_bitmap = 0;
3503 _satb_buffers_processed = 0;
3504 _steal_attempts = 0;
3505 _steals = 0;
3506 _aborted = 0;
3507 _aborted_overflow = 0;
3508 _aborted_cm_aborted = 0;
3509 _aborted_yield = 0;
3510 _aborted_timed_out = 0;
3511 _aborted_satb = 0;
3512 _aborted_termination = 0;
3513 #endif // _MARKING_STATS_
3514 }
3516 bool CMTask::should_exit_termination() {
3517 regular_clock_call();
3518 // This is called when we are in the termination protocol. We should
3519 // quit if, for some reason, this task wants to abort or the global
3520 // stack is not empty (this means that we can get work from it).
3521 return !_cm->mark_stack_empty() || has_aborted();
3522 }
3524 void CMTask::reached_limit() {
3525 assert(_words_scanned >= _words_scanned_limit ||
3526 _refs_reached >= _refs_reached_limit ,
3527 "shouldn't have been called otherwise");
3528 regular_clock_call();
3529 }
3531 void CMTask::regular_clock_call() {
3532 if (has_aborted()) return;
3534 // First, we need to recalculate the words scanned and refs reached
3535 // limits for the next clock call.
3536 recalculate_limits();
3538 // During the regular clock call we do the following
3540 // (1) If an overflow has been flagged, then we abort.
3541 if (_cm->has_overflown()) {
3542 set_has_aborted();
3543 return;
3544 }
3546 // If we are not concurrent (i.e. we're doing remark) we don't need
3547 // to check anything else. The other steps are only needed during
3548 // the concurrent marking phase.
3549 if (!concurrent()) return;
3551 // (2) If marking has been aborted for Full GC, then we also abort.
3552 if (_cm->has_aborted()) {
3553 set_has_aborted();
3554 statsOnly( ++_aborted_cm_aborted );
3555 return;
3556 }
3558 double curr_time_ms = os::elapsedVTime() * 1000.0;
3560 // (3) If marking stats are enabled, then we update the step history.
3561 #if _MARKING_STATS_
3562 if (_words_scanned >= _words_scanned_limit) {
3563 ++_clock_due_to_scanning;
3564 }
3565 if (_refs_reached >= _refs_reached_limit) {
3566 ++_clock_due_to_marking;
3567 }
3569 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3570 _interval_start_time_ms = curr_time_ms;
3571 _all_clock_intervals_ms.add(last_interval_ms);
3573 if (_cm->verbose_medium()) {
3574 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3575 "scanned = %d%s, refs reached = %d%s",
3576 _worker_id, last_interval_ms,
3577 _words_scanned,
3578 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3579 _refs_reached,
3580 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3581 }
3582 #endif // _MARKING_STATS_
3584 // (4) We check whether we should yield. If we have to, then we abort.
3585 if (_cm->should_yield()) {
3586 // We should yield. To do this we abort the task. The caller is
3587 // responsible for yielding.
3588 set_has_aborted();
3589 statsOnly( ++_aborted_yield );
3590 return;
3591 }
3593 // (5) We check whether we've reached our time quota. If we have,
3594 // then we abort.
3595 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3596 if (elapsed_time_ms > _time_target_ms) {
3597 set_has_aborted();
3598 _has_timed_out = true;
3599 statsOnly( ++_aborted_timed_out );
3600 return;
3601 }
3603 // (6) Finally, we check whether there are enough completed STAB
3604 // buffers available for processing. If there are, we abort.
3605 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3606 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3607 if (_cm->verbose_low()) {
3608 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3609 _worker_id);
3610 }
3611 // we do need to process SATB buffers, we'll abort and restart
3612 // the marking task to do so
3613 set_has_aborted();
3614 statsOnly( ++_aborted_satb );
3615 return;
3616 }
3617 }
3619 void CMTask::recalculate_limits() {
3620 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3621 _words_scanned_limit = _real_words_scanned_limit;
3623 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3624 _refs_reached_limit = _real_refs_reached_limit;
3625 }
3627 void CMTask::decrease_limits() {
3628 // This is called when we believe that we're going to do an infrequent
3629 // operation which will increase the per byte scanned cost (i.e. move
3630 // entries to/from the global stack). It basically tries to decrease the
3631 // scanning limit so that the clock is called earlier.
3633 if (_cm->verbose_medium()) {
3634 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3635 }
3637 _words_scanned_limit = _real_words_scanned_limit -
3638 3 * words_scanned_period / 4;
3639 _refs_reached_limit = _real_refs_reached_limit -
3640 3 * refs_reached_period / 4;
3641 }
3643 void CMTask::move_entries_to_global_stack() {
3644 // local array where we'll store the entries that will be popped
3645 // from the local queue
3646 oop buffer[global_stack_transfer_size];
3648 int n = 0;
3649 oop obj;
3650 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3651 buffer[n] = obj;
3652 ++n;
3653 }
3655 if (n > 0) {
3656 // we popped at least one entry from the local queue
3658 statsOnly( ++_global_transfers_to; _local_pops += n );
3660 if (!_cm->mark_stack_push(buffer, n)) {
3661 if (_cm->verbose_low()) {
3662 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3663 _worker_id);
3664 }
3665 set_has_aborted();
3666 } else {
3667 // the transfer was successful
3669 if (_cm->verbose_medium()) {
3670 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3671 _worker_id, n);
3672 }
3673 statsOnly( int tmp_size = _cm->mark_stack_size();
3674 if (tmp_size > _global_max_size) {
3675 _global_max_size = tmp_size;
3676 }
3677 _global_pushes += n );
3678 }
3679 }
3681 // this operation was quite expensive, so decrease the limits
3682 decrease_limits();
3683 }
3685 void CMTask::get_entries_from_global_stack() {
3686 // local array where we'll store the entries that will be popped
3687 // from the global stack.
3688 oop buffer[global_stack_transfer_size];
3689 int n;
3690 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3691 assert(n <= global_stack_transfer_size,
3692 "we should not pop more than the given limit");
3693 if (n > 0) {
3694 // yes, we did actually pop at least one entry
3696 statsOnly( ++_global_transfers_from; _global_pops += n );
3697 if (_cm->verbose_medium()) {
3698 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3699 _worker_id, n);
3700 }
3701 for (int i = 0; i < n; ++i) {
3702 bool success = _task_queue->push(buffer[i]);
3703 // We only call this when the local queue is empty or under a
3704 // given target limit. So, we do not expect this push to fail.
3705 assert(success, "invariant");
3706 }
3708 statsOnly( int tmp_size = _task_queue->size();
3709 if (tmp_size > _local_max_size) {
3710 _local_max_size = tmp_size;
3711 }
3712 _local_pushes += n );
3713 }
3715 // this operation was quite expensive, so decrease the limits
3716 decrease_limits();
3717 }
3719 void CMTask::drain_local_queue(bool partially) {
3720 if (has_aborted()) return;
3722 // Decide what the target size is, depending whether we're going to
3723 // drain it partially (so that other tasks can steal if they run out
3724 // of things to do) or totally (at the very end).
3725 size_t target_size;
3726 if (partially) {
3727 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3728 } else {
3729 target_size = 0;
3730 }
3732 if (_task_queue->size() > target_size) {
3733 if (_cm->verbose_high()) {
3734 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3735 _worker_id, target_size);
3736 }
3738 oop obj;
3739 bool ret = _task_queue->pop_local(obj);
3740 while (ret) {
3741 statsOnly( ++_local_pops );
3743 if (_cm->verbose_high()) {
3744 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3745 (void*) obj);
3746 }
3748 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3749 assert(!_g1h->is_on_master_free_list(
3750 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3752 scan_object(obj);
3754 if (_task_queue->size() <= target_size || has_aborted()) {
3755 ret = false;
3756 } else {
3757 ret = _task_queue->pop_local(obj);
3758 }
3759 }
3761 if (_cm->verbose_high()) {
3762 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3763 _worker_id, _task_queue->size());
3764 }
3765 }
3766 }
3768 void CMTask::drain_global_stack(bool partially) {
3769 if (has_aborted()) return;
3771 // We have a policy to drain the local queue before we attempt to
3772 // drain the global stack.
3773 assert(partially || _task_queue->size() == 0, "invariant");
3775 // Decide what the target size is, depending whether we're going to
3776 // drain it partially (so that other tasks can steal if they run out
3777 // of things to do) or totally (at the very end). Notice that,
3778 // because we move entries from the global stack in chunks or
3779 // because another task might be doing the same, we might in fact
3780 // drop below the target. But, this is not a problem.
3781 size_t target_size;
3782 if (partially) {
3783 target_size = _cm->partial_mark_stack_size_target();
3784 } else {
3785 target_size = 0;
3786 }
3788 if (_cm->mark_stack_size() > target_size) {
3789 if (_cm->verbose_low()) {
3790 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3791 _worker_id, target_size);
3792 }
3794 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3795 get_entries_from_global_stack();
3796 drain_local_queue(partially);
3797 }
3799 if (_cm->verbose_low()) {
3800 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3801 _worker_id, _cm->mark_stack_size());
3802 }
3803 }
3804 }
3806 // SATB Queue has several assumptions on whether to call the par or
3807 // non-par versions of the methods. this is why some of the code is
3808 // replicated. We should really get rid of the single-threaded version
3809 // of the code to simplify things.
3810 void CMTask::drain_satb_buffers() {
3811 if (has_aborted()) return;
3813 // We set this so that the regular clock knows that we're in the
3814 // middle of draining buffers and doesn't set the abort flag when it
3815 // notices that SATB buffers are available for draining. It'd be
3816 // very counter productive if it did that. :-)
3817 _draining_satb_buffers = true;
3819 CMObjectClosure oc(this);
3820 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3821 if (G1CollectedHeap::use_parallel_gc_threads()) {
3822 satb_mq_set.set_par_closure(_worker_id, &oc);
3823 } else {
3824 satb_mq_set.set_closure(&oc);
3825 }
3827 // This keeps claiming and applying the closure to completed buffers
3828 // until we run out of buffers or we need to abort.
3829 if (G1CollectedHeap::use_parallel_gc_threads()) {
3830 while (!has_aborted() &&
3831 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3832 if (_cm->verbose_medium()) {
3833 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3834 }
3835 statsOnly( ++_satb_buffers_processed );
3836 regular_clock_call();
3837 }
3838 } else {
3839 while (!has_aborted() &&
3840 satb_mq_set.apply_closure_to_completed_buffer()) {
3841 if (_cm->verbose_medium()) {
3842 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3843 }
3844 statsOnly( ++_satb_buffers_processed );
3845 regular_clock_call();
3846 }
3847 }
3849 if (!concurrent() && !has_aborted()) {
3850 // We should only do this during remark.
3851 if (G1CollectedHeap::use_parallel_gc_threads()) {
3852 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3853 } else {
3854 satb_mq_set.iterate_closure_all_threads();
3855 }
3856 }
3858 _draining_satb_buffers = false;
3860 assert(has_aborted() ||
3861 concurrent() ||
3862 satb_mq_set.completed_buffers_num() == 0, "invariant");
3864 if (G1CollectedHeap::use_parallel_gc_threads()) {
3865 satb_mq_set.set_par_closure(_worker_id, NULL);
3866 } else {
3867 satb_mq_set.set_closure(NULL);
3868 }
3870 // again, this was a potentially expensive operation, decrease the
3871 // limits to get the regular clock call early
3872 decrease_limits();
3873 }
3875 void CMTask::print_stats() {
3876 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3877 _worker_id, _calls);
3878 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3879 _elapsed_time_ms, _termination_time_ms);
3880 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3881 _step_times_ms.num(), _step_times_ms.avg(),
3882 _step_times_ms.sd());
3883 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3884 _step_times_ms.maximum(), _step_times_ms.sum());
3886 #if _MARKING_STATS_
3887 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3888 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3889 _all_clock_intervals_ms.sd());
3890 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3891 _all_clock_intervals_ms.maximum(),
3892 _all_clock_intervals_ms.sum());
3893 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3894 _clock_due_to_scanning, _clock_due_to_marking);
3895 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3896 _objs_scanned, _objs_found_on_bitmap);
3897 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3898 _local_pushes, _local_pops, _local_max_size);
3899 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3900 _global_pushes, _global_pops, _global_max_size);
3901 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3902 _global_transfers_to,_global_transfers_from);
3903 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3904 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3905 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3906 _steal_attempts, _steals);
3907 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3908 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3909 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3910 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3911 _aborted_timed_out, _aborted_satb, _aborted_termination);
3912 #endif // _MARKING_STATS_
3913 }
3915 /*****************************************************************************
3917 The do_marking_step(time_target_ms, ...) method is the building
3918 block of the parallel marking framework. It can be called in parallel
3919 with other invocations of do_marking_step() on different tasks
3920 (but only one per task, obviously) and concurrently with the
3921 mutator threads, or during remark, hence it eliminates the need
3922 for two versions of the code. When called during remark, it will
3923 pick up from where the task left off during the concurrent marking
3924 phase. Interestingly, tasks are also claimable during evacuation
3925 pauses too, since do_marking_step() ensures that it aborts before
3926 it needs to yield.
3928 The data structures that it uses to do marking work are the
3929 following:
3931 (1) Marking Bitmap. If there are gray objects that appear only
3932 on the bitmap (this happens either when dealing with an overflow
3933 or when the initial marking phase has simply marked the roots
3934 and didn't push them on the stack), then tasks claim heap
3935 regions whose bitmap they then scan to find gray objects. A
3936 global finger indicates where the end of the last claimed region
3937 is. A local finger indicates how far into the region a task has
3938 scanned. The two fingers are used to determine how to gray an
3939 object (i.e. whether simply marking it is OK, as it will be
3940 visited by a task in the future, or whether it needs to be also
3941 pushed on a stack).
3943 (2) Local Queue. The local queue of the task which is accessed
3944 reasonably efficiently by the task. Other tasks can steal from
3945 it when they run out of work. Throughout the marking phase, a
3946 task attempts to keep its local queue short but not totally
3947 empty, so that entries are available for stealing by other
3948 tasks. Only when there is no more work, a task will totally
3949 drain its local queue.
3951 (3) Global Mark Stack. This handles local queue overflow. During
3952 marking only sets of entries are moved between it and the local
3953 queues, as access to it requires a mutex and more fine-grain
3954 interaction with it which might cause contention. If it
3955 overflows, then the marking phase should restart and iterate
3956 over the bitmap to identify gray objects. Throughout the marking
3957 phase, tasks attempt to keep the global mark stack at a small
3958 length but not totally empty, so that entries are available for
3959 popping by other tasks. Only when there is no more work, tasks
3960 will totally drain the global mark stack.
3962 (4) SATB Buffer Queue. This is where completed SATB buffers are
3963 made available. Buffers are regularly removed from this queue
3964 and scanned for roots, so that the queue doesn't get too
3965 long. During remark, all completed buffers are processed, as
3966 well as the filled in parts of any uncompleted buffers.
3968 The do_marking_step() method tries to abort when the time target
3969 has been reached. There are a few other cases when the
3970 do_marking_step() method also aborts:
3972 (1) When the marking phase has been aborted (after a Full GC).
3974 (2) When a global overflow (on the global stack) has been
3975 triggered. Before the task aborts, it will actually sync up with
3976 the other tasks to ensure that all the marking data structures
3977 (local queues, stacks, fingers etc.) are re-initialized so that
3978 when do_marking_step() completes, the marking phase can
3979 immediately restart.
3981 (3) When enough completed SATB buffers are available. The
3982 do_marking_step() method only tries to drain SATB buffers right
3983 at the beginning. So, if enough buffers are available, the
3984 marking step aborts and the SATB buffers are processed at
3985 the beginning of the next invocation.
3987 (4) To yield. when we have to yield then we abort and yield
3988 right at the end of do_marking_step(). This saves us from a lot
3989 of hassle as, by yielding we might allow a Full GC. If this
3990 happens then objects will be compacted underneath our feet, the
3991 heap might shrink, etc. We save checking for this by just
3992 aborting and doing the yield right at the end.
3994 From the above it follows that the do_marking_step() method should
3995 be called in a loop (or, otherwise, regularly) until it completes.
3997 If a marking step completes without its has_aborted() flag being
3998 true, it means it has completed the current marking phase (and
3999 also all other marking tasks have done so and have all synced up).
4001 A method called regular_clock_call() is invoked "regularly" (in
4002 sub ms intervals) throughout marking. It is this clock method that
4003 checks all the abort conditions which were mentioned above and
4004 decides when the task should abort. A work-based scheme is used to
4005 trigger this clock method: when the number of object words the
4006 marking phase has scanned or the number of references the marking
4007 phase has visited reach a given limit. Additional invocations to
4008 the method clock have been planted in a few other strategic places
4009 too. The initial reason for the clock method was to avoid calling
4010 vtime too regularly, as it is quite expensive. So, once it was in
4011 place, it was natural to piggy-back all the other conditions on it
4012 too and not constantly check them throughout the code.
4014 If do_termination is true then do_marking_step will enter its
4015 termination protocol.
4017 The value of is_serial must be true when do_marking_step is being
4018 called serially (i.e. by the VMThread) and do_marking_step should
4019 skip any synchronization in the termination and overflow code.
4020 Examples include the serial remark code and the serial reference
4021 processing closures.
4023 The value of is_serial must be false when do_marking_step is
4024 being called by any of the worker threads in a work gang.
4025 Examples include the concurrent marking code (CMMarkingTask),
4026 the MT remark code, and the MT reference processing closures.
4028 *****************************************************************************/
4030 void CMTask::do_marking_step(double time_target_ms,
4031 bool do_termination,
4032 bool is_serial) {
4033 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4034 assert(concurrent() == _cm->concurrent(), "they should be the same");
4036 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4037 assert(_task_queues != NULL, "invariant");
4038 assert(_task_queue != NULL, "invariant");
4039 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4041 assert(!_claimed,
4042 "only one thread should claim this task at any one time");
4044 // OK, this doesn't safeguard again all possible scenarios, as it is
4045 // possible for two threads to set the _claimed flag at the same
4046 // time. But it is only for debugging purposes anyway and it will
4047 // catch most problems.
4048 _claimed = true;
4050 _start_time_ms = os::elapsedVTime() * 1000.0;
4051 statsOnly( _interval_start_time_ms = _start_time_ms );
4053 // If do_stealing is true then do_marking_step will attempt to
4054 // steal work from the other CMTasks. It only makes sense to
4055 // enable stealing when the termination protocol is enabled
4056 // and do_marking_step() is not being called serially.
4057 bool do_stealing = do_termination && !is_serial;
4059 double diff_prediction_ms =
4060 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4061 _time_target_ms = time_target_ms - diff_prediction_ms;
4063 // set up the variables that are used in the work-based scheme to
4064 // call the regular clock method
4065 _words_scanned = 0;
4066 _refs_reached = 0;
4067 recalculate_limits();
4069 // clear all flags
4070 clear_has_aborted();
4071 _has_timed_out = false;
4072 _draining_satb_buffers = false;
4074 ++_calls;
4076 if (_cm->verbose_low()) {
4077 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4078 "target = %1.2lfms >>>>>>>>>>",
4079 _worker_id, _calls, _time_target_ms);
4080 }
4082 // Set up the bitmap and oop closures. Anything that uses them is
4083 // eventually called from this method, so it is OK to allocate these
4084 // statically.
4085 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4086 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4087 set_cm_oop_closure(&cm_oop_closure);
4089 if (_cm->has_overflown()) {
4090 // This can happen if the mark stack overflows during a GC pause
4091 // and this task, after a yield point, restarts. We have to abort
4092 // as we need to get into the overflow protocol which happens
4093 // right at the end of this task.
4094 set_has_aborted();
4095 }
4097 // First drain any available SATB buffers. After this, we will not
4098 // look at SATB buffers before the next invocation of this method.
4099 // If enough completed SATB buffers are queued up, the regular clock
4100 // will abort this task so that it restarts.
4101 drain_satb_buffers();
4102 // ...then partially drain the local queue and the global stack
4103 drain_local_queue(true);
4104 drain_global_stack(true);
4106 do {
4107 if (!has_aborted() && _curr_region != NULL) {
4108 // This means that we're already holding on to a region.
4109 assert(_finger != NULL, "if region is not NULL, then the finger "
4110 "should not be NULL either");
4112 // We might have restarted this task after an evacuation pause
4113 // which might have evacuated the region we're holding on to
4114 // underneath our feet. Let's read its limit again to make sure
4115 // that we do not iterate over a region of the heap that
4116 // contains garbage (update_region_limit() will also move
4117 // _finger to the start of the region if it is found empty).
4118 update_region_limit();
4119 // We will start from _finger not from the start of the region,
4120 // as we might be restarting this task after aborting half-way
4121 // through scanning this region. In this case, _finger points to
4122 // the address where we last found a marked object. If this is a
4123 // fresh region, _finger points to start().
4124 MemRegion mr = MemRegion(_finger, _region_limit);
4126 if (_cm->verbose_low()) {
4127 gclog_or_tty->print_cr("[%u] we're scanning part "
4128 "["PTR_FORMAT", "PTR_FORMAT") "
4129 "of region "HR_FORMAT,
4130 _worker_id, _finger, _region_limit,
4131 HR_FORMAT_PARAMS(_curr_region));
4132 }
4134 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4135 "humongous regions should go around loop once only");
4137 // Some special cases:
4138 // If the memory region is empty, we can just give up the region.
4139 // If the current region is humongous then we only need to check
4140 // the bitmap for the bit associated with the start of the object,
4141 // scan the object if it's live, and give up the region.
4142 // Otherwise, let's iterate over the bitmap of the part of the region
4143 // that is left.
4144 // If the iteration is successful, give up the region.
4145 if (mr.is_empty()) {
4146 giveup_current_region();
4147 regular_clock_call();
4148 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4149 if (_nextMarkBitMap->isMarked(mr.start())) {
4150 // The object is marked - apply the closure
4151 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4152 bitmap_closure.do_bit(offset);
4153 }
4154 // Even if this task aborted while scanning the humongous object
4155 // we can (and should) give up the current region.
4156 giveup_current_region();
4157 regular_clock_call();
4158 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4159 giveup_current_region();
4160 regular_clock_call();
4161 } else {
4162 assert(has_aborted(), "currently the only way to do so");
4163 // The only way to abort the bitmap iteration is to return
4164 // false from the do_bit() method. However, inside the
4165 // do_bit() method we move the _finger to point to the
4166 // object currently being looked at. So, if we bail out, we
4167 // have definitely set _finger to something non-null.
4168 assert(_finger != NULL, "invariant");
4170 // Region iteration was actually aborted. So now _finger
4171 // points to the address of the object we last scanned. If we
4172 // leave it there, when we restart this task, we will rescan
4173 // the object. It is easy to avoid this. We move the finger by
4174 // enough to point to the next possible object header (the
4175 // bitmap knows by how much we need to move it as it knows its
4176 // granularity).
4177 assert(_finger < _region_limit, "invariant");
4178 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4179 // Check if bitmap iteration was aborted while scanning the last object
4180 if (new_finger >= _region_limit) {
4181 giveup_current_region();
4182 } else {
4183 move_finger_to(new_finger);
4184 }
4185 }
4186 }
4187 // At this point we have either completed iterating over the
4188 // region we were holding on to, or we have aborted.
4190 // We then partially drain the local queue and the global stack.
4191 // (Do we really need this?)
4192 drain_local_queue(true);
4193 drain_global_stack(true);
4195 // Read the note on the claim_region() method on why it might
4196 // return NULL with potentially more regions available for
4197 // claiming and why we have to check out_of_regions() to determine
4198 // whether we're done or not.
4199 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4200 // We are going to try to claim a new region. We should have
4201 // given up on the previous one.
4202 // Separated the asserts so that we know which one fires.
4203 assert(_curr_region == NULL, "invariant");
4204 assert(_finger == NULL, "invariant");
4205 assert(_region_limit == NULL, "invariant");
4206 if (_cm->verbose_low()) {
4207 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4208 }
4209 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4210 if (claimed_region != NULL) {
4211 // Yes, we managed to claim one
4212 statsOnly( ++_regions_claimed );
4214 if (_cm->verbose_low()) {
4215 gclog_or_tty->print_cr("[%u] we successfully claimed "
4216 "region "PTR_FORMAT,
4217 _worker_id, claimed_region);
4218 }
4220 setup_for_region(claimed_region);
4221 assert(_curr_region == claimed_region, "invariant");
4222 }
4223 // It is important to call the regular clock here. It might take
4224 // a while to claim a region if, for example, we hit a large
4225 // block of empty regions. So we need to call the regular clock
4226 // method once round the loop to make sure it's called
4227 // frequently enough.
4228 regular_clock_call();
4229 }
4231 if (!has_aborted() && _curr_region == NULL) {
4232 assert(_cm->out_of_regions(),
4233 "at this point we should be out of regions");
4234 }
4235 } while ( _curr_region != NULL && !has_aborted());
4237 if (!has_aborted()) {
4238 // We cannot check whether the global stack is empty, since other
4239 // tasks might be pushing objects to it concurrently.
4240 assert(_cm->out_of_regions(),
4241 "at this point we should be out of regions");
4243 if (_cm->verbose_low()) {
4244 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4245 }
4247 // Try to reduce the number of available SATB buffers so that
4248 // remark has less work to do.
4249 drain_satb_buffers();
4250 }
4252 // Since we've done everything else, we can now totally drain the
4253 // local queue and global stack.
4254 drain_local_queue(false);
4255 drain_global_stack(false);
4257 // Attempt at work stealing from other task's queues.
4258 if (do_stealing && !has_aborted()) {
4259 // We have not aborted. This means that we have finished all that
4260 // we could. Let's try to do some stealing...
4262 // We cannot check whether the global stack is empty, since other
4263 // tasks might be pushing objects to it concurrently.
4264 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4265 "only way to reach here");
4267 if (_cm->verbose_low()) {
4268 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4269 }
4271 while (!has_aborted()) {
4272 oop obj;
4273 statsOnly( ++_steal_attempts );
4275 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4276 if (_cm->verbose_medium()) {
4277 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4278 _worker_id, (void*) obj);
4279 }
4281 statsOnly( ++_steals );
4283 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4284 "any stolen object should be marked");
4285 scan_object(obj);
4287 // And since we're towards the end, let's totally drain the
4288 // local queue and global stack.
4289 drain_local_queue(false);
4290 drain_global_stack(false);
4291 } else {
4292 break;
4293 }
4294 }
4295 }
4297 // If we are about to wrap up and go into termination, check if we
4298 // should raise the overflow flag.
4299 if (do_termination && !has_aborted()) {
4300 if (_cm->force_overflow()->should_force()) {
4301 _cm->set_has_overflown();
4302 regular_clock_call();
4303 }
4304 }
4306 // We still haven't aborted. Now, let's try to get into the
4307 // termination protocol.
4308 if (do_termination && !has_aborted()) {
4309 // We cannot check whether the global stack is empty, since other
4310 // tasks might be concurrently pushing objects on it.
4311 // Separated the asserts so that we know which one fires.
4312 assert(_cm->out_of_regions(), "only way to reach here");
4313 assert(_task_queue->size() == 0, "only way to reach here");
4315 if (_cm->verbose_low()) {
4316 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4317 }
4319 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4321 // The CMTask class also extends the TerminatorTerminator class,
4322 // hence its should_exit_termination() method will also decide
4323 // whether to exit the termination protocol or not.
4324 bool finished = (is_serial ||
4325 _cm->terminator()->offer_termination(this));
4326 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4327 _termination_time_ms +=
4328 termination_end_time_ms - _termination_start_time_ms;
4330 if (finished) {
4331 // We're all done.
4333 if (_worker_id == 0) {
4334 // let's allow task 0 to do this
4335 if (concurrent()) {
4336 assert(_cm->concurrent_marking_in_progress(), "invariant");
4337 // we need to set this to false before the next
4338 // safepoint. This way we ensure that the marking phase
4339 // doesn't observe any more heap expansions.
4340 _cm->clear_concurrent_marking_in_progress();
4341 }
4342 }
4344 // We can now guarantee that the global stack is empty, since
4345 // all other tasks have finished. We separated the guarantees so
4346 // that, if a condition is false, we can immediately find out
4347 // which one.
4348 guarantee(_cm->out_of_regions(), "only way to reach here");
4349 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4350 guarantee(_task_queue->size() == 0, "only way to reach here");
4351 guarantee(!_cm->has_overflown(), "only way to reach here");
4352 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4354 if (_cm->verbose_low()) {
4355 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4356 }
4357 } else {
4358 // Apparently there's more work to do. Let's abort this task. It
4359 // will restart it and we can hopefully find more things to do.
4361 if (_cm->verbose_low()) {
4362 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4363 _worker_id);
4364 }
4366 set_has_aborted();
4367 statsOnly( ++_aborted_termination );
4368 }
4369 }
4371 // Mainly for debugging purposes to make sure that a pointer to the
4372 // closure which was statically allocated in this frame doesn't
4373 // escape it by accident.
4374 set_cm_oop_closure(NULL);
4375 double end_time_ms = os::elapsedVTime() * 1000.0;
4376 double elapsed_time_ms = end_time_ms - _start_time_ms;
4377 // Update the step history.
4378 _step_times_ms.add(elapsed_time_ms);
4380 if (has_aborted()) {
4381 // The task was aborted for some reason.
4383 statsOnly( ++_aborted );
4385 if (_has_timed_out) {
4386 double diff_ms = elapsed_time_ms - _time_target_ms;
4387 // Keep statistics of how well we did with respect to hitting
4388 // our target only if we actually timed out (if we aborted for
4389 // other reasons, then the results might get skewed).
4390 _marking_step_diffs_ms.add(diff_ms);
4391 }
4393 if (_cm->has_overflown()) {
4394 // This is the interesting one. We aborted because a global
4395 // overflow was raised. This means we have to restart the
4396 // marking phase and start iterating over regions. However, in
4397 // order to do this we have to make sure that all tasks stop
4398 // what they are doing and re-initialise in a safe manner. We
4399 // will achieve this with the use of two barrier sync points.
4401 if (_cm->verbose_low()) {
4402 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4403 }
4405 if (!is_serial) {
4406 // We only need to enter the sync barrier if being called
4407 // from a parallel context
4408 _cm->enter_first_sync_barrier(_worker_id);
4410 // When we exit this sync barrier we know that all tasks have
4411 // stopped doing marking work. So, it's now safe to
4412 // re-initialise our data structures. At the end of this method,
4413 // task 0 will clear the global data structures.
4414 }
4416 statsOnly( ++_aborted_overflow );
4418 // We clear the local state of this task...
4419 clear_region_fields();
4421 if (!is_serial) {
4422 // ...and enter the second barrier.
4423 _cm->enter_second_sync_barrier(_worker_id);
4424 }
4425 // At this point, if we're during the concurrent phase of
4426 // marking, everything has been re-initialized and we're
4427 // ready to restart.
4428 }
4430 if (_cm->verbose_low()) {
4431 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4432 "elapsed = %1.2lfms <<<<<<<<<<",
4433 _worker_id, _time_target_ms, elapsed_time_ms);
4434 if (_cm->has_aborted()) {
4435 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4436 _worker_id);
4437 }
4438 }
4439 } else {
4440 if (_cm->verbose_low()) {
4441 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4442 "elapsed = %1.2lfms <<<<<<<<<<",
4443 _worker_id, _time_target_ms, elapsed_time_ms);
4444 }
4445 }
4447 _claimed = false;
4448 }
4450 CMTask::CMTask(uint worker_id,
4451 ConcurrentMark* cm,
4452 size_t* marked_bytes,
4453 BitMap* card_bm,
4454 CMTaskQueue* task_queue,
4455 CMTaskQueueSet* task_queues)
4456 : _g1h(G1CollectedHeap::heap()),
4457 _worker_id(worker_id), _cm(cm),
4458 _claimed(false),
4459 _nextMarkBitMap(NULL), _hash_seed(17),
4460 _task_queue(task_queue),
4461 _task_queues(task_queues),
4462 _cm_oop_closure(NULL),
4463 _marked_bytes_array(marked_bytes),
4464 _card_bm(card_bm) {
4465 guarantee(task_queue != NULL, "invariant");
4466 guarantee(task_queues != NULL, "invariant");
4468 statsOnly( _clock_due_to_scanning = 0;
4469 _clock_due_to_marking = 0 );
4471 _marking_step_diffs_ms.add(0.5);
4472 }
4474 // These are formatting macros that are used below to ensure
4475 // consistent formatting. The *_H_* versions are used to format the
4476 // header for a particular value and they should be kept consistent
4477 // with the corresponding macro. Also note that most of the macros add
4478 // the necessary white space (as a prefix) which makes them a bit
4479 // easier to compose.
4481 // All the output lines are prefixed with this string to be able to
4482 // identify them easily in a large log file.
4483 #define G1PPRL_LINE_PREFIX "###"
4485 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4486 #ifdef _LP64
4487 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4488 #else // _LP64
4489 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4490 #endif // _LP64
4492 // For per-region info
4493 #define G1PPRL_TYPE_FORMAT " %-4s"
4494 #define G1PPRL_TYPE_H_FORMAT " %4s"
4495 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4496 #define G1PPRL_BYTE_H_FORMAT " %9s"
4497 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4498 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4500 // For summary info
4501 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4502 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4503 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4504 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4506 G1PrintRegionLivenessInfoClosure::
4507 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4508 : _out(out),
4509 _total_used_bytes(0), _total_capacity_bytes(0),
4510 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4511 _hum_used_bytes(0), _hum_capacity_bytes(0),
4512 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4513 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4514 MemRegion g1_committed = g1h->g1_committed();
4515 MemRegion g1_reserved = g1h->g1_reserved();
4516 double now = os::elapsedTime();
4518 // Print the header of the output.
4519 _out->cr();
4520 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4521 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4522 G1PPRL_SUM_ADDR_FORMAT("committed")
4523 G1PPRL_SUM_ADDR_FORMAT("reserved")
4524 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4525 g1_committed.start(), g1_committed.end(),
4526 g1_reserved.start(), g1_reserved.end(),
4527 HeapRegion::GrainBytes);
4528 _out->print_cr(G1PPRL_LINE_PREFIX);
4529 _out->print_cr(G1PPRL_LINE_PREFIX
4530 G1PPRL_TYPE_H_FORMAT
4531 G1PPRL_ADDR_BASE_H_FORMAT
4532 G1PPRL_BYTE_H_FORMAT
4533 G1PPRL_BYTE_H_FORMAT
4534 G1PPRL_BYTE_H_FORMAT
4535 G1PPRL_DOUBLE_H_FORMAT,
4536 "type", "address-range",
4537 "used", "prev-live", "next-live", "gc-eff");
4538 _out->print_cr(G1PPRL_LINE_PREFIX
4539 G1PPRL_TYPE_H_FORMAT
4540 G1PPRL_ADDR_BASE_H_FORMAT
4541 G1PPRL_BYTE_H_FORMAT
4542 G1PPRL_BYTE_H_FORMAT
4543 G1PPRL_BYTE_H_FORMAT
4544 G1PPRL_DOUBLE_H_FORMAT,
4545 "", "",
4546 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4547 }
4549 // It takes as a parameter a reference to one of the _hum_* fields, it
4550 // deduces the corresponding value for a region in a humongous region
4551 // series (either the region size, or what's left if the _hum_* field
4552 // is < the region size), and updates the _hum_* field accordingly.
4553 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4554 size_t bytes = 0;
4555 // The > 0 check is to deal with the prev and next live bytes which
4556 // could be 0.
4557 if (*hum_bytes > 0) {
4558 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4559 *hum_bytes -= bytes;
4560 }
4561 return bytes;
4562 }
4564 // It deduces the values for a region in a humongous region series
4565 // from the _hum_* fields and updates those accordingly. It assumes
4566 // that that _hum_* fields have already been set up from the "starts
4567 // humongous" region and we visit the regions in address order.
4568 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4569 size_t* capacity_bytes,
4570 size_t* prev_live_bytes,
4571 size_t* next_live_bytes) {
4572 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4573 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4574 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4575 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4576 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4577 }
4579 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4580 const char* type = "";
4581 HeapWord* bottom = r->bottom();
4582 HeapWord* end = r->end();
4583 size_t capacity_bytes = r->capacity();
4584 size_t used_bytes = r->used();
4585 size_t prev_live_bytes = r->live_bytes();
4586 size_t next_live_bytes = r->next_live_bytes();
4587 double gc_eff = r->gc_efficiency();
4588 if (r->used() == 0) {
4589 type = "FREE";
4590 } else if (r->is_survivor()) {
4591 type = "SURV";
4592 } else if (r->is_young()) {
4593 type = "EDEN";
4594 } else if (r->startsHumongous()) {
4595 type = "HUMS";
4597 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4598 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4599 "they should have been zeroed after the last time we used them");
4600 // Set up the _hum_* fields.
4601 _hum_capacity_bytes = capacity_bytes;
4602 _hum_used_bytes = used_bytes;
4603 _hum_prev_live_bytes = prev_live_bytes;
4604 _hum_next_live_bytes = next_live_bytes;
4605 get_hum_bytes(&used_bytes, &capacity_bytes,
4606 &prev_live_bytes, &next_live_bytes);
4607 end = bottom + HeapRegion::GrainWords;
4608 } else if (r->continuesHumongous()) {
4609 type = "HUMC";
4610 get_hum_bytes(&used_bytes, &capacity_bytes,
4611 &prev_live_bytes, &next_live_bytes);
4612 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4613 } else {
4614 type = "OLD";
4615 }
4617 _total_used_bytes += used_bytes;
4618 _total_capacity_bytes += capacity_bytes;
4619 _total_prev_live_bytes += prev_live_bytes;
4620 _total_next_live_bytes += next_live_bytes;
4622 // Print a line for this particular region.
4623 _out->print_cr(G1PPRL_LINE_PREFIX
4624 G1PPRL_TYPE_FORMAT
4625 G1PPRL_ADDR_BASE_FORMAT
4626 G1PPRL_BYTE_FORMAT
4627 G1PPRL_BYTE_FORMAT
4628 G1PPRL_BYTE_FORMAT
4629 G1PPRL_DOUBLE_FORMAT,
4630 type, bottom, end,
4631 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4633 return false;
4634 }
4636 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4637 // Print the footer of the output.
4638 _out->print_cr(G1PPRL_LINE_PREFIX);
4639 _out->print_cr(G1PPRL_LINE_PREFIX
4640 " SUMMARY"
4641 G1PPRL_SUM_MB_FORMAT("capacity")
4642 G1PPRL_SUM_MB_PERC_FORMAT("used")
4643 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4644 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4645 bytes_to_mb(_total_capacity_bytes),
4646 bytes_to_mb(_total_used_bytes),
4647 perc(_total_used_bytes, _total_capacity_bytes),
4648 bytes_to_mb(_total_prev_live_bytes),
4649 perc(_total_prev_live_bytes, _total_capacity_bytes),
4650 bytes_to_mb(_total_next_live_bytes),
4651 perc(_total_next_live_bytes, _total_capacity_bytes));
4652 _out->cr();
4653 }