Mon, 07 Jul 2014 10:12:40 +0200
8049421: G1 Class Unloading after completing a concurrent mark cycle
Reviewed-by: tschatzl, ehelin, brutisso, coleenp, roland, iveresov
Contributed-by: stefan.karlsson@oracle.com, mikael.gerdin@oracle.com
1 /*
2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "code/codeCache.hpp"
28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
33 #include "gc_implementation/g1/g1Log.hpp"
34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
35 #include "gc_implementation/g1/g1RemSet.hpp"
36 #include "gc_implementation/g1/heapRegion.inline.hpp"
37 #include "gc_implementation/g1/heapRegionRemSet.hpp"
38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
39 #include "gc_implementation/shared/vmGCOperations.hpp"
40 #include "gc_implementation/shared/gcTimer.hpp"
41 #include "gc_implementation/shared/gcTrace.hpp"
42 #include "gc_implementation/shared/gcTraceTime.hpp"
43 #include "memory/allocation.hpp"
44 #include "memory/genOopClosures.inline.hpp"
45 #include "memory/referencePolicy.hpp"
46 #include "memory/resourceArea.hpp"
47 #include "oops/oop.inline.hpp"
48 #include "runtime/handles.inline.hpp"
49 #include "runtime/java.hpp"
50 #include "runtime/prefetch.inline.hpp"
51 #include "services/memTracker.hpp"
53 // Concurrent marking bit map wrapper
55 CMBitMapRO::CMBitMapRO(int shifter) :
56 _bm(),
57 _shifter(shifter) {
58 _bmStartWord = 0;
59 _bmWordSize = 0;
60 }
62 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
63 const HeapWord* limit) const {
64 // First we must round addr *up* to a possible object boundary.
65 addr = (HeapWord*)align_size_up((intptr_t)addr,
66 HeapWordSize << _shifter);
67 size_t addrOffset = heapWordToOffset(addr);
68 if (limit == NULL) {
69 limit = _bmStartWord + _bmWordSize;
70 }
71 size_t limitOffset = heapWordToOffset(limit);
72 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
73 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
74 assert(nextAddr >= addr, "get_next_one postcondition");
75 assert(nextAddr == limit || isMarked(nextAddr),
76 "get_next_one postcondition");
77 return nextAddr;
78 }
80 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
81 const HeapWord* limit) const {
82 size_t addrOffset = heapWordToOffset(addr);
83 if (limit == NULL) {
84 limit = _bmStartWord + _bmWordSize;
85 }
86 size_t limitOffset = heapWordToOffset(limit);
87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
88 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
89 assert(nextAddr >= addr, "get_next_one postcondition");
90 assert(nextAddr == limit || !isMarked(nextAddr),
91 "get_next_one postcondition");
92 return nextAddr;
93 }
95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
97 return (int) (diff >> _shifter);
98 }
100 #ifndef PRODUCT
101 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
102 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
103 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
104 "size inconsistency");
105 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
106 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
107 }
108 #endif
110 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
111 _bm.print_on_error(st, prefix);
112 }
114 bool CMBitMap::allocate(ReservedSpace heap_rs) {
115 _bmStartWord = (HeapWord*)(heap_rs.base());
116 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
117 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
118 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
119 if (!brs.is_reserved()) {
120 warning("ConcurrentMark marking bit map allocation failure");
121 return false;
122 }
123 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
124 // For now we'll just commit all of the bit map up front.
125 // Later on we'll try to be more parsimonious with swap.
126 if (!_virtual_space.initialize(brs, brs.size())) {
127 warning("ConcurrentMark marking bit map backing store failure");
128 return false;
129 }
130 assert(_virtual_space.committed_size() == brs.size(),
131 "didn't reserve backing store for all of concurrent marking bit map?");
132 _bm.set_map((BitMap::bm_word_t*)_virtual_space.low());
133 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
134 _bmWordSize, "inconsistency in bit map sizing");
135 _bm.set_size(_bmWordSize >> _shifter);
136 return true;
137 }
139 void CMBitMap::clearAll() {
140 _bm.clear();
141 return;
142 }
144 void CMBitMap::markRange(MemRegion mr) {
145 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
146 assert(!mr.is_empty(), "unexpected empty region");
147 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
148 ((HeapWord *) mr.end())),
149 "markRange memory region end is not card aligned");
150 // convert address range into offset range
151 _bm.at_put_range(heapWordToOffset(mr.start()),
152 heapWordToOffset(mr.end()), true);
153 }
155 void CMBitMap::clearRange(MemRegion mr) {
156 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
157 assert(!mr.is_empty(), "unexpected empty region");
158 // convert address range into offset range
159 _bm.at_put_range(heapWordToOffset(mr.start()),
160 heapWordToOffset(mr.end()), false);
161 }
163 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
164 HeapWord* end_addr) {
165 HeapWord* start = getNextMarkedWordAddress(addr);
166 start = MIN2(start, end_addr);
167 HeapWord* end = getNextUnmarkedWordAddress(start);
168 end = MIN2(end, end_addr);
169 assert(start <= end, "Consistency check");
170 MemRegion mr(start, end);
171 if (!mr.is_empty()) {
172 clearRange(mr);
173 }
174 return mr;
175 }
177 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
178 _base(NULL), _cm(cm)
179 #ifdef ASSERT
180 , _drain_in_progress(false)
181 , _drain_in_progress_yields(false)
182 #endif
183 {}
185 bool CMMarkStack::allocate(size_t capacity) {
186 // allocate a stack of the requisite depth
187 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
188 if (!rs.is_reserved()) {
189 warning("ConcurrentMark MarkStack allocation failure");
190 return false;
191 }
192 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
193 if (!_virtual_space.initialize(rs, rs.size())) {
194 warning("ConcurrentMark MarkStack backing store failure");
195 // Release the virtual memory reserved for the marking stack
196 rs.release();
197 return false;
198 }
199 assert(_virtual_space.committed_size() == rs.size(),
200 "Didn't reserve backing store for all of ConcurrentMark stack?");
201 _base = (oop*) _virtual_space.low();
202 setEmpty();
203 _capacity = (jint) capacity;
204 _saved_index = -1;
205 _should_expand = false;
206 NOT_PRODUCT(_max_depth = 0);
207 return true;
208 }
210 void CMMarkStack::expand() {
211 // Called, during remark, if we've overflown the marking stack during marking.
212 assert(isEmpty(), "stack should been emptied while handling overflow");
213 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
214 // Clear expansion flag
215 _should_expand = false;
216 if (_capacity == (jint) MarkStackSizeMax) {
217 if (PrintGCDetails && Verbose) {
218 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
219 }
220 return;
221 }
222 // Double capacity if possible
223 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
224 // Do not give up existing stack until we have managed to
225 // get the double capacity that we desired.
226 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
227 sizeof(oop)));
228 if (rs.is_reserved()) {
229 // Release the backing store associated with old stack
230 _virtual_space.release();
231 // Reinitialize virtual space for new stack
232 if (!_virtual_space.initialize(rs, rs.size())) {
233 fatal("Not enough swap for expanded marking stack capacity");
234 }
235 _base = (oop*)(_virtual_space.low());
236 _index = 0;
237 _capacity = new_capacity;
238 } else {
239 if (PrintGCDetails && Verbose) {
240 // Failed to double capacity, continue;
241 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
242 SIZE_FORMAT"K to " SIZE_FORMAT"K",
243 _capacity / K, new_capacity / K);
244 }
245 }
246 }
248 void CMMarkStack::set_should_expand() {
249 // If we're resetting the marking state because of an
250 // marking stack overflow, record that we should, if
251 // possible, expand the stack.
252 _should_expand = _cm->has_overflown();
253 }
255 CMMarkStack::~CMMarkStack() {
256 if (_base != NULL) {
257 _base = NULL;
258 _virtual_space.release();
259 }
260 }
262 void CMMarkStack::par_push(oop ptr) {
263 while (true) {
264 if (isFull()) {
265 _overflow = true;
266 return;
267 }
268 // Otherwise...
269 jint index = _index;
270 jint next_index = index+1;
271 jint res = Atomic::cmpxchg(next_index, &_index, index);
272 if (res == index) {
273 _base[index] = ptr;
274 // Note that we don't maintain this atomically. We could, but it
275 // doesn't seem necessary.
276 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
277 return;
278 }
279 // Otherwise, we need to try again.
280 }
281 }
283 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
284 while (true) {
285 if (isFull()) {
286 _overflow = true;
287 return;
288 }
289 // Otherwise...
290 jint index = _index;
291 jint next_index = index + n;
292 if (next_index > _capacity) {
293 _overflow = true;
294 return;
295 }
296 jint res = Atomic::cmpxchg(next_index, &_index, index);
297 if (res == index) {
298 for (int i = 0; i < n; i++) {
299 int ind = index + i;
300 assert(ind < _capacity, "By overflow test above.");
301 _base[ind] = ptr_arr[i];
302 }
303 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
304 return;
305 }
306 // Otherwise, we need to try again.
307 }
308 }
310 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
311 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
312 jint start = _index;
313 jint next_index = start + n;
314 if (next_index > _capacity) {
315 _overflow = true;
316 return;
317 }
318 // Otherwise.
319 _index = next_index;
320 for (int i = 0; i < n; i++) {
321 int ind = start + i;
322 assert(ind < _capacity, "By overflow test above.");
323 _base[ind] = ptr_arr[i];
324 }
325 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
326 }
328 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
329 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
330 jint index = _index;
331 if (index == 0) {
332 *n = 0;
333 return false;
334 } else {
335 int k = MIN2(max, index);
336 jint new_ind = index - k;
337 for (int j = 0; j < k; j++) {
338 ptr_arr[j] = _base[new_ind + j];
339 }
340 _index = new_ind;
341 *n = k;
342 return true;
343 }
344 }
346 template<class OopClosureClass>
347 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
348 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
349 || SafepointSynchronize::is_at_safepoint(),
350 "Drain recursion must be yield-safe.");
351 bool res = true;
352 debug_only(_drain_in_progress = true);
353 debug_only(_drain_in_progress_yields = yield_after);
354 while (!isEmpty()) {
355 oop newOop = pop();
356 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
357 assert(newOop->is_oop(), "Expected an oop");
358 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
359 "only grey objects on this stack");
360 newOop->oop_iterate(cl);
361 if (yield_after && _cm->do_yield_check()) {
362 res = false;
363 break;
364 }
365 }
366 debug_only(_drain_in_progress = false);
367 return res;
368 }
370 void CMMarkStack::note_start_of_gc() {
371 assert(_saved_index == -1,
372 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
373 _saved_index = _index;
374 }
376 void CMMarkStack::note_end_of_gc() {
377 // This is intentionally a guarantee, instead of an assert. If we
378 // accidentally add something to the mark stack during GC, it
379 // will be a correctness issue so it's better if we crash. we'll
380 // only check this once per GC anyway, so it won't be a performance
381 // issue in any way.
382 guarantee(_saved_index == _index,
383 err_msg("saved index: %d index: %d", _saved_index, _index));
384 _saved_index = -1;
385 }
387 void CMMarkStack::oops_do(OopClosure* f) {
388 assert(_saved_index == _index,
389 err_msg("saved index: %d index: %d", _saved_index, _index));
390 for (int i = 0; i < _index; i += 1) {
391 f->do_oop(&_base[i]);
392 }
393 }
395 bool ConcurrentMark::not_yet_marked(oop obj) const {
396 return _g1h->is_obj_ill(obj);
397 }
399 CMRootRegions::CMRootRegions() :
400 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
401 _should_abort(false), _next_survivor(NULL) { }
403 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
404 _young_list = g1h->young_list();
405 _cm = cm;
406 }
408 void CMRootRegions::prepare_for_scan() {
409 assert(!scan_in_progress(), "pre-condition");
411 // Currently, only survivors can be root regions.
412 assert(_next_survivor == NULL, "pre-condition");
413 _next_survivor = _young_list->first_survivor_region();
414 _scan_in_progress = (_next_survivor != NULL);
415 _should_abort = false;
416 }
418 HeapRegion* CMRootRegions::claim_next() {
419 if (_should_abort) {
420 // If someone has set the should_abort flag, we return NULL to
421 // force the caller to bail out of their loop.
422 return NULL;
423 }
425 // Currently, only survivors can be root regions.
426 HeapRegion* res = _next_survivor;
427 if (res != NULL) {
428 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
429 // Read it again in case it changed while we were waiting for the lock.
430 res = _next_survivor;
431 if (res != NULL) {
432 if (res == _young_list->last_survivor_region()) {
433 // We just claimed the last survivor so store NULL to indicate
434 // that we're done.
435 _next_survivor = NULL;
436 } else {
437 _next_survivor = res->get_next_young_region();
438 }
439 } else {
440 // Someone else claimed the last survivor while we were trying
441 // to take the lock so nothing else to do.
442 }
443 }
444 assert(res == NULL || res->is_survivor(), "post-condition");
446 return res;
447 }
449 void CMRootRegions::scan_finished() {
450 assert(scan_in_progress(), "pre-condition");
452 // Currently, only survivors can be root regions.
453 if (!_should_abort) {
454 assert(_next_survivor == NULL, "we should have claimed all survivors");
455 }
456 _next_survivor = NULL;
458 {
459 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
460 _scan_in_progress = false;
461 RootRegionScan_lock->notify_all();
462 }
463 }
465 bool CMRootRegions::wait_until_scan_finished() {
466 if (!scan_in_progress()) return false;
468 {
469 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
470 while (scan_in_progress()) {
471 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
472 }
473 }
474 return true;
475 }
477 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
478 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
479 #endif // _MSC_VER
481 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
482 return MAX2((n_par_threads + 2) / 4, 1U);
483 }
485 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
486 _g1h(g1h),
487 _markBitMap1(log2_intptr(MinObjAlignment)),
488 _markBitMap2(log2_intptr(MinObjAlignment)),
489 _parallel_marking_threads(0),
490 _max_parallel_marking_threads(0),
491 _sleep_factor(0.0),
492 _marking_task_overhead(1.0),
493 _cleanup_sleep_factor(0.0),
494 _cleanup_task_overhead(1.0),
495 _cleanup_list("Cleanup List"),
496 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
497 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
498 CardTableModRefBS::card_shift,
499 false /* in_resource_area*/),
501 _prevMarkBitMap(&_markBitMap1),
502 _nextMarkBitMap(&_markBitMap2),
504 _markStack(this),
505 // _finger set in set_non_marking_state
507 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
508 // _active_tasks set in set_non_marking_state
509 // _tasks set inside the constructor
510 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
511 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
513 _has_overflown(false),
514 _concurrent(false),
515 _has_aborted(false),
516 _aborted_gc_id(GCId::undefined()),
517 _restart_for_overflow(false),
518 _concurrent_marking_in_progress(false),
520 // _verbose_level set below
522 _init_times(),
523 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
524 _cleanup_times(),
525 _total_counting_time(0.0),
526 _total_rs_scrub_time(0.0),
528 _parallel_workers(NULL),
530 _count_card_bitmaps(NULL),
531 _count_marked_bytes(NULL),
532 _completed_initialization(false) {
533 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
534 if (verbose_level < no_verbose) {
535 verbose_level = no_verbose;
536 }
537 if (verbose_level > high_verbose) {
538 verbose_level = high_verbose;
539 }
540 _verbose_level = verbose_level;
542 if (verbose_low()) {
543 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
544 "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
545 }
547 if (!_markBitMap1.allocate(heap_rs)) {
548 warning("Failed to allocate first CM bit map");
549 return;
550 }
551 if (!_markBitMap2.allocate(heap_rs)) {
552 warning("Failed to allocate second CM bit map");
553 return;
554 }
556 // Create & start a ConcurrentMark thread.
557 _cmThread = new ConcurrentMarkThread(this);
558 assert(cmThread() != NULL, "CM Thread should have been created");
559 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
560 if (_cmThread->osthread() == NULL) {
561 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
562 }
564 assert(CGC_lock != NULL, "Where's the CGC_lock?");
565 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
566 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
568 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
569 satb_qs.set_buffer_size(G1SATBBufferSize);
571 _root_regions.init(_g1h, this);
573 if (ConcGCThreads > ParallelGCThreads) {
574 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
575 "than ParallelGCThreads (" UINTX_FORMAT ").",
576 ConcGCThreads, ParallelGCThreads);
577 return;
578 }
579 if (ParallelGCThreads == 0) {
580 // if we are not running with any parallel GC threads we will not
581 // spawn any marking threads either
582 _parallel_marking_threads = 0;
583 _max_parallel_marking_threads = 0;
584 _sleep_factor = 0.0;
585 _marking_task_overhead = 1.0;
586 } else {
587 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
588 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
589 // if both are set
590 _sleep_factor = 0.0;
591 _marking_task_overhead = 1.0;
592 } else if (G1MarkingOverheadPercent > 0) {
593 // We will calculate the number of parallel marking threads based
594 // on a target overhead with respect to the soft real-time goal
595 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
596 double overall_cm_overhead =
597 (double) MaxGCPauseMillis * marking_overhead /
598 (double) GCPauseIntervalMillis;
599 double cpu_ratio = 1.0 / (double) os::processor_count();
600 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
601 double marking_task_overhead =
602 overall_cm_overhead / marking_thread_num *
603 (double) os::processor_count();
604 double sleep_factor =
605 (1.0 - marking_task_overhead) / marking_task_overhead;
607 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
608 _sleep_factor = sleep_factor;
609 _marking_task_overhead = marking_task_overhead;
610 } else {
611 // Calculate the number of parallel marking threads by scaling
612 // the number of parallel GC threads.
613 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
614 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
615 _sleep_factor = 0.0;
616 _marking_task_overhead = 1.0;
617 }
619 assert(ConcGCThreads > 0, "Should have been set");
620 _parallel_marking_threads = (uint) ConcGCThreads;
621 _max_parallel_marking_threads = _parallel_marking_threads;
623 if (parallel_marking_threads() > 1) {
624 _cleanup_task_overhead = 1.0;
625 } else {
626 _cleanup_task_overhead = marking_task_overhead();
627 }
628 _cleanup_sleep_factor =
629 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
631 #if 0
632 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
633 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
634 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
635 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
636 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
637 #endif
639 guarantee(parallel_marking_threads() > 0, "peace of mind");
640 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
641 _max_parallel_marking_threads, false, true);
642 if (_parallel_workers == NULL) {
643 vm_exit_during_initialization("Failed necessary allocation.");
644 } else {
645 _parallel_workers->initialize_workers();
646 }
647 }
649 if (FLAG_IS_DEFAULT(MarkStackSize)) {
650 uintx mark_stack_size =
651 MIN2(MarkStackSizeMax,
652 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
653 // Verify that the calculated value for MarkStackSize is in range.
654 // It would be nice to use the private utility routine from Arguments.
655 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
656 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
657 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
658 mark_stack_size, (uintx) 1, MarkStackSizeMax);
659 return;
660 }
661 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
662 } else {
663 // Verify MarkStackSize is in range.
664 if (FLAG_IS_CMDLINE(MarkStackSize)) {
665 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
666 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
667 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
668 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
669 MarkStackSize, (uintx) 1, MarkStackSizeMax);
670 return;
671 }
672 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
673 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
674 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
675 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
676 MarkStackSize, MarkStackSizeMax);
677 return;
678 }
679 }
680 }
681 }
683 if (!_markStack.allocate(MarkStackSize)) {
684 warning("Failed to allocate CM marking stack");
685 return;
686 }
688 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
689 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
691 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
692 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
694 BitMap::idx_t card_bm_size = _card_bm.size();
696 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
697 _active_tasks = _max_worker_id;
699 size_t max_regions = (size_t) _g1h->max_regions();
700 for (uint i = 0; i < _max_worker_id; ++i) {
701 CMTaskQueue* task_queue = new CMTaskQueue();
702 task_queue->initialize();
703 _task_queues->register_queue(i, task_queue);
705 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
706 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
708 _tasks[i] = new CMTask(i, this,
709 _count_marked_bytes[i],
710 &_count_card_bitmaps[i],
711 task_queue, _task_queues);
713 _accum_task_vtime[i] = 0.0;
714 }
716 // Calculate the card number for the bottom of the heap. Used
717 // in biasing indexes into the accounting card bitmaps.
718 _heap_bottom_card_num =
719 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
720 CardTableModRefBS::card_shift);
722 // Clear all the liveness counting data
723 clear_all_count_data();
725 // so that the call below can read a sensible value
726 _heap_start = (HeapWord*) heap_rs.base();
727 set_non_marking_state();
728 _completed_initialization = true;
729 }
731 void ConcurrentMark::update_g1_committed(bool force) {
732 // If concurrent marking is not in progress, then we do not need to
733 // update _heap_end.
734 if (!concurrent_marking_in_progress() && !force) return;
736 MemRegion committed = _g1h->g1_committed();
737 assert(committed.start() == _heap_start, "start shouldn't change");
738 HeapWord* new_end = committed.end();
739 if (new_end > _heap_end) {
740 // The heap has been expanded.
742 _heap_end = new_end;
743 }
744 // Notice that the heap can also shrink. However, this only happens
745 // during a Full GC (at least currently) and the entire marking
746 // phase will bail out and the task will not be restarted. So, let's
747 // do nothing.
748 }
750 void ConcurrentMark::reset() {
751 // Starting values for these two. This should be called in a STW
752 // phase. CM will be notified of any future g1_committed expansions
753 // will be at the end of evacuation pauses, when tasks are
754 // inactive.
755 MemRegion committed = _g1h->g1_committed();
756 _heap_start = committed.start();
757 _heap_end = committed.end();
759 // Separated the asserts so that we know which one fires.
760 assert(_heap_start != NULL, "heap bounds should look ok");
761 assert(_heap_end != NULL, "heap bounds should look ok");
762 assert(_heap_start < _heap_end, "heap bounds should look ok");
764 // Reset all the marking data structures and any necessary flags
765 reset_marking_state();
767 if (verbose_low()) {
768 gclog_or_tty->print_cr("[global] resetting");
769 }
771 // We do reset all of them, since different phases will use
772 // different number of active threads. So, it's easiest to have all
773 // of them ready.
774 for (uint i = 0; i < _max_worker_id; ++i) {
775 _tasks[i]->reset(_nextMarkBitMap);
776 }
778 // we need this to make sure that the flag is on during the evac
779 // pause with initial mark piggy-backed
780 set_concurrent_marking_in_progress();
781 }
784 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
785 _markStack.set_should_expand();
786 _markStack.setEmpty(); // Also clears the _markStack overflow flag
787 if (clear_overflow) {
788 clear_has_overflown();
789 } else {
790 assert(has_overflown(), "pre-condition");
791 }
792 _finger = _heap_start;
794 for (uint i = 0; i < _max_worker_id; ++i) {
795 CMTaskQueue* queue = _task_queues->queue(i);
796 queue->set_empty();
797 }
798 }
800 void ConcurrentMark::set_concurrency(uint active_tasks) {
801 assert(active_tasks <= _max_worker_id, "we should not have more");
803 _active_tasks = active_tasks;
804 // Need to update the three data structures below according to the
805 // number of active threads for this phase.
806 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
807 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
808 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
809 }
811 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
812 set_concurrency(active_tasks);
814 _concurrent = concurrent;
815 // We propagate this to all tasks, not just the active ones.
816 for (uint i = 0; i < _max_worker_id; ++i)
817 _tasks[i]->set_concurrent(concurrent);
819 if (concurrent) {
820 set_concurrent_marking_in_progress();
821 } else {
822 // We currently assume that the concurrent flag has been set to
823 // false before we start remark. At this point we should also be
824 // in a STW phase.
825 assert(!concurrent_marking_in_progress(), "invariant");
826 assert(out_of_regions(),
827 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
828 p2i(_finger), p2i(_heap_end)));
829 update_g1_committed(true);
830 }
831 }
833 void ConcurrentMark::set_non_marking_state() {
834 // We set the global marking state to some default values when we're
835 // not doing marking.
836 reset_marking_state();
837 _active_tasks = 0;
838 clear_concurrent_marking_in_progress();
839 }
841 ConcurrentMark::~ConcurrentMark() {
842 // The ConcurrentMark instance is never freed.
843 ShouldNotReachHere();
844 }
846 void ConcurrentMark::clearNextBitmap() {
847 G1CollectedHeap* g1h = G1CollectedHeap::heap();
848 G1CollectorPolicy* g1p = g1h->g1_policy();
850 // Make sure that the concurrent mark thread looks to still be in
851 // the current cycle.
852 guarantee(cmThread()->during_cycle(), "invariant");
854 // We are finishing up the current cycle by clearing the next
855 // marking bitmap and getting it ready for the next cycle. During
856 // this time no other cycle can start. So, let's make sure that this
857 // is the case.
858 guarantee(!g1h->mark_in_progress(), "invariant");
860 // clear the mark bitmap (no grey objects to start with).
861 // We need to do this in chunks and offer to yield in between
862 // each chunk.
863 HeapWord* start = _nextMarkBitMap->startWord();
864 HeapWord* end = _nextMarkBitMap->endWord();
865 HeapWord* cur = start;
866 size_t chunkSize = M;
867 while (cur < end) {
868 HeapWord* next = cur + chunkSize;
869 if (next > end) {
870 next = end;
871 }
872 MemRegion mr(cur,next);
873 _nextMarkBitMap->clearRange(mr);
874 cur = next;
875 do_yield_check();
877 // Repeat the asserts from above. We'll do them as asserts here to
878 // minimize their overhead on the product. However, we'll have
879 // them as guarantees at the beginning / end of the bitmap
880 // clearing to get some checking in the product.
881 assert(cmThread()->during_cycle(), "invariant");
882 assert(!g1h->mark_in_progress(), "invariant");
883 }
885 // Clear the liveness counting data
886 clear_all_count_data();
888 // Repeat the asserts from above.
889 guarantee(cmThread()->during_cycle(), "invariant");
890 guarantee(!g1h->mark_in_progress(), "invariant");
891 }
893 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
894 public:
895 bool doHeapRegion(HeapRegion* r) {
896 if (!r->continuesHumongous()) {
897 r->note_start_of_marking();
898 }
899 return false;
900 }
901 };
903 void ConcurrentMark::checkpointRootsInitialPre() {
904 G1CollectedHeap* g1h = G1CollectedHeap::heap();
905 G1CollectorPolicy* g1p = g1h->g1_policy();
907 _has_aborted = false;
909 #ifndef PRODUCT
910 if (G1PrintReachableAtInitialMark) {
911 print_reachable("at-cycle-start",
912 VerifyOption_G1UsePrevMarking, true /* all */);
913 }
914 #endif
916 // Initialise marking structures. This has to be done in a STW phase.
917 reset();
919 // For each region note start of marking.
920 NoteStartOfMarkHRClosure startcl;
921 g1h->heap_region_iterate(&startcl);
922 }
925 void ConcurrentMark::checkpointRootsInitialPost() {
926 G1CollectedHeap* g1h = G1CollectedHeap::heap();
928 // If we force an overflow during remark, the remark operation will
929 // actually abort and we'll restart concurrent marking. If we always
930 // force an oveflow during remark we'll never actually complete the
931 // marking phase. So, we initilize this here, at the start of the
932 // cycle, so that at the remaining overflow number will decrease at
933 // every remark and we'll eventually not need to cause one.
934 force_overflow_stw()->init();
936 // Start Concurrent Marking weak-reference discovery.
937 ReferenceProcessor* rp = g1h->ref_processor_cm();
938 // enable ("weak") refs discovery
939 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
940 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
942 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
943 // This is the start of the marking cycle, we're expected all
944 // threads to have SATB queues with active set to false.
945 satb_mq_set.set_active_all_threads(true, /* new active value */
946 false /* expected_active */);
948 _root_regions.prepare_for_scan();
950 // update_g1_committed() will be called at the end of an evac pause
951 // when marking is on. So, it's also called at the end of the
952 // initial-mark pause to update the heap end, if the heap expands
953 // during it. No need to call it here.
954 }
956 /*
957 * Notice that in the next two methods, we actually leave the STS
958 * during the barrier sync and join it immediately afterwards. If we
959 * do not do this, the following deadlock can occur: one thread could
960 * be in the barrier sync code, waiting for the other thread to also
961 * sync up, whereas another one could be trying to yield, while also
962 * waiting for the other threads to sync up too.
963 *
964 * Note, however, that this code is also used during remark and in
965 * this case we should not attempt to leave / enter the STS, otherwise
966 * we'll either hit an asseert (debug / fastdebug) or deadlock
967 * (product). So we should only leave / enter the STS if we are
968 * operating concurrently.
969 *
970 * Because the thread that does the sync barrier has left the STS, it
971 * is possible to be suspended for a Full GC or an evacuation pause
972 * could occur. This is actually safe, since the entering the sync
973 * barrier is one of the last things do_marking_step() does, and it
974 * doesn't manipulate any data structures afterwards.
975 */
977 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
978 if (verbose_low()) {
979 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
980 }
982 if (concurrent()) {
983 SuspendibleThreadSet::leave();
984 }
986 bool barrier_aborted = !_first_overflow_barrier_sync.enter();
988 if (concurrent()) {
989 SuspendibleThreadSet::join();
990 }
991 // at this point everyone should have synced up and not be doing any
992 // more work
994 if (verbose_low()) {
995 if (barrier_aborted) {
996 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
997 } else {
998 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
999 }
1000 }
1002 if (barrier_aborted) {
1003 // If the barrier aborted we ignore the overflow condition and
1004 // just abort the whole marking phase as quickly as possible.
1005 return;
1006 }
1008 // If we're executing the concurrent phase of marking, reset the marking
1009 // state; otherwise the marking state is reset after reference processing,
1010 // during the remark pause.
1011 // If we reset here as a result of an overflow during the remark we will
1012 // see assertion failures from any subsequent set_concurrency_and_phase()
1013 // calls.
1014 if (concurrent()) {
1015 // let the task associated with with worker 0 do this
1016 if (worker_id == 0) {
1017 // task 0 is responsible for clearing the global data structures
1018 // We should be here because of an overflow. During STW we should
1019 // not clear the overflow flag since we rely on it being true when
1020 // we exit this method to abort the pause and restart concurent
1021 // marking.
1022 reset_marking_state(true /* clear_overflow */);
1023 force_overflow()->update();
1025 if (G1Log::fine()) {
1026 gclog_or_tty->gclog_stamp(concurrent_gc_id());
1027 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1028 }
1029 }
1030 }
1032 // after this, each task should reset its own data structures then
1033 // then go into the second barrier
1034 }
1036 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1037 if (verbose_low()) {
1038 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1039 }
1041 if (concurrent()) {
1042 SuspendibleThreadSet::leave();
1043 }
1045 bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1047 if (concurrent()) {
1048 SuspendibleThreadSet::join();
1049 }
1050 // at this point everything should be re-initialized and ready to go
1052 if (verbose_low()) {
1053 if (barrier_aborted) {
1054 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1055 } else {
1056 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1057 }
1058 }
1059 }
1061 #ifndef PRODUCT
1062 void ForceOverflowSettings::init() {
1063 _num_remaining = G1ConcMarkForceOverflow;
1064 _force = false;
1065 update();
1066 }
1068 void ForceOverflowSettings::update() {
1069 if (_num_remaining > 0) {
1070 _num_remaining -= 1;
1071 _force = true;
1072 } else {
1073 _force = false;
1074 }
1075 }
1077 bool ForceOverflowSettings::should_force() {
1078 if (_force) {
1079 _force = false;
1080 return true;
1081 } else {
1082 return false;
1083 }
1084 }
1085 #endif // !PRODUCT
1087 class CMConcurrentMarkingTask: public AbstractGangTask {
1088 private:
1089 ConcurrentMark* _cm;
1090 ConcurrentMarkThread* _cmt;
1092 public:
1093 void work(uint worker_id) {
1094 assert(Thread::current()->is_ConcurrentGC_thread(),
1095 "this should only be done by a conc GC thread");
1096 ResourceMark rm;
1098 double start_vtime = os::elapsedVTime();
1100 SuspendibleThreadSet::join();
1102 assert(worker_id < _cm->active_tasks(), "invariant");
1103 CMTask* the_task = _cm->task(worker_id);
1104 the_task->record_start_time();
1105 if (!_cm->has_aborted()) {
1106 do {
1107 double start_vtime_sec = os::elapsedVTime();
1108 double start_time_sec = os::elapsedTime();
1109 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1111 the_task->do_marking_step(mark_step_duration_ms,
1112 true /* do_termination */,
1113 false /* is_serial*/);
1115 double end_time_sec = os::elapsedTime();
1116 double end_vtime_sec = os::elapsedVTime();
1117 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1118 double elapsed_time_sec = end_time_sec - start_time_sec;
1119 _cm->clear_has_overflown();
1121 bool ret = _cm->do_yield_check(worker_id);
1123 jlong sleep_time_ms;
1124 if (!_cm->has_aborted() && the_task->has_aborted()) {
1125 sleep_time_ms =
1126 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1127 SuspendibleThreadSet::leave();
1128 os::sleep(Thread::current(), sleep_time_ms, false);
1129 SuspendibleThreadSet::join();
1130 }
1131 double end_time2_sec = os::elapsedTime();
1132 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1134 #if 0
1135 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1136 "overhead %1.4lf",
1137 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1138 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1139 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1140 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1141 #endif
1142 } while (!_cm->has_aborted() && the_task->has_aborted());
1143 }
1144 the_task->record_end_time();
1145 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1147 SuspendibleThreadSet::leave();
1149 double end_vtime = os::elapsedVTime();
1150 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1151 }
1153 CMConcurrentMarkingTask(ConcurrentMark* cm,
1154 ConcurrentMarkThread* cmt) :
1155 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1157 ~CMConcurrentMarkingTask() { }
1158 };
1160 // Calculates the number of active workers for a concurrent
1161 // phase.
1162 uint ConcurrentMark::calc_parallel_marking_threads() {
1163 if (G1CollectedHeap::use_parallel_gc_threads()) {
1164 uint n_conc_workers = 0;
1165 if (!UseDynamicNumberOfGCThreads ||
1166 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1167 !ForceDynamicNumberOfGCThreads)) {
1168 n_conc_workers = max_parallel_marking_threads();
1169 } else {
1170 n_conc_workers =
1171 AdaptiveSizePolicy::calc_default_active_workers(
1172 max_parallel_marking_threads(),
1173 1, /* Minimum workers */
1174 parallel_marking_threads(),
1175 Threads::number_of_non_daemon_threads());
1176 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1177 // that scaling has already gone into "_max_parallel_marking_threads".
1178 }
1179 assert(n_conc_workers > 0, "Always need at least 1");
1180 return n_conc_workers;
1181 }
1182 // If we are not running with any parallel GC threads we will not
1183 // have spawned any marking threads either. Hence the number of
1184 // concurrent workers should be 0.
1185 return 0;
1186 }
1188 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1189 // Currently, only survivors can be root regions.
1190 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1191 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1193 const uintx interval = PrefetchScanIntervalInBytes;
1194 HeapWord* curr = hr->bottom();
1195 const HeapWord* end = hr->top();
1196 while (curr < end) {
1197 Prefetch::read(curr, interval);
1198 oop obj = oop(curr);
1199 int size = obj->oop_iterate(&cl);
1200 assert(size == obj->size(), "sanity");
1201 curr += size;
1202 }
1203 }
1205 class CMRootRegionScanTask : public AbstractGangTask {
1206 private:
1207 ConcurrentMark* _cm;
1209 public:
1210 CMRootRegionScanTask(ConcurrentMark* cm) :
1211 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1213 void work(uint worker_id) {
1214 assert(Thread::current()->is_ConcurrentGC_thread(),
1215 "this should only be done by a conc GC thread");
1217 CMRootRegions* root_regions = _cm->root_regions();
1218 HeapRegion* hr = root_regions->claim_next();
1219 while (hr != NULL) {
1220 _cm->scanRootRegion(hr, worker_id);
1221 hr = root_regions->claim_next();
1222 }
1223 }
1224 };
1226 void ConcurrentMark::scanRootRegions() {
1227 // Start of concurrent marking.
1228 ClassLoaderDataGraph::clear_claimed_marks();
1230 // scan_in_progress() will have been set to true only if there was
1231 // at least one root region to scan. So, if it's false, we
1232 // should not attempt to do any further work.
1233 if (root_regions()->scan_in_progress()) {
1234 _parallel_marking_threads = calc_parallel_marking_threads();
1235 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1236 "Maximum number of marking threads exceeded");
1237 uint active_workers = MAX2(1U, parallel_marking_threads());
1239 CMRootRegionScanTask task(this);
1240 if (use_parallel_marking_threads()) {
1241 _parallel_workers->set_active_workers((int) active_workers);
1242 _parallel_workers->run_task(&task);
1243 } else {
1244 task.work(0);
1245 }
1247 // It's possible that has_aborted() is true here without actually
1248 // aborting the survivor scan earlier. This is OK as it's
1249 // mainly used for sanity checking.
1250 root_regions()->scan_finished();
1251 }
1252 }
1254 void ConcurrentMark::markFromRoots() {
1255 // we might be tempted to assert that:
1256 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1257 // "inconsistent argument?");
1258 // However that wouldn't be right, because it's possible that
1259 // a safepoint is indeed in progress as a younger generation
1260 // stop-the-world GC happens even as we mark in this generation.
1262 _restart_for_overflow = false;
1263 force_overflow_conc()->init();
1265 // _g1h has _n_par_threads
1266 _parallel_marking_threads = calc_parallel_marking_threads();
1267 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1268 "Maximum number of marking threads exceeded");
1270 uint active_workers = MAX2(1U, parallel_marking_threads());
1272 // Parallel task terminator is set in "set_concurrency_and_phase()"
1273 set_concurrency_and_phase(active_workers, true /* concurrent */);
1275 CMConcurrentMarkingTask markingTask(this, cmThread());
1276 if (use_parallel_marking_threads()) {
1277 _parallel_workers->set_active_workers((int)active_workers);
1278 // Don't set _n_par_threads because it affects MT in process_roots()
1279 // and the decisions on that MT processing is made elsewhere.
1280 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1281 _parallel_workers->run_task(&markingTask);
1282 } else {
1283 markingTask.work(0);
1284 }
1285 print_stats();
1286 }
1288 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1289 // world is stopped at this checkpoint
1290 assert(SafepointSynchronize::is_at_safepoint(),
1291 "world should be stopped");
1293 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1295 // If a full collection has happened, we shouldn't do this.
1296 if (has_aborted()) {
1297 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1298 return;
1299 }
1301 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1303 if (VerifyDuringGC) {
1304 HandleMark hm; // handle scope
1305 Universe::heap()->prepare_for_verify();
1306 Universe::verify(VerifyOption_G1UsePrevMarking,
1307 " VerifyDuringGC:(before)");
1308 }
1310 G1CollectorPolicy* g1p = g1h->g1_policy();
1311 g1p->record_concurrent_mark_remark_start();
1313 double start = os::elapsedTime();
1315 checkpointRootsFinalWork();
1317 double mark_work_end = os::elapsedTime();
1319 weakRefsWork(clear_all_soft_refs);
1321 if (has_overflown()) {
1322 // Oops. We overflowed. Restart concurrent marking.
1323 _restart_for_overflow = true;
1324 if (G1TraceMarkStackOverflow) {
1325 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1326 }
1328 // Verify the heap w.r.t. the previous marking bitmap.
1329 if (VerifyDuringGC) {
1330 HandleMark hm; // handle scope
1331 Universe::heap()->prepare_for_verify();
1332 Universe::verify(VerifyOption_G1UsePrevMarking,
1333 " VerifyDuringGC:(overflow)");
1334 }
1336 // Clear the marking state because we will be restarting
1337 // marking due to overflowing the global mark stack.
1338 reset_marking_state();
1339 } else {
1340 // Aggregate the per-task counting data that we have accumulated
1341 // while marking.
1342 aggregate_count_data();
1344 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1345 // We're done with marking.
1346 // This is the end of the marking cycle, we're expected all
1347 // threads to have SATB queues with active set to true.
1348 satb_mq_set.set_active_all_threads(false, /* new active value */
1349 true /* expected_active */);
1351 if (VerifyDuringGC) {
1352 HandleMark hm; // handle scope
1353 Universe::heap()->prepare_for_verify();
1354 Universe::verify(VerifyOption_G1UseNextMarking,
1355 " VerifyDuringGC:(after)");
1356 }
1357 assert(!restart_for_overflow(), "sanity");
1358 // Completely reset the marking state since marking completed
1359 set_non_marking_state();
1360 }
1362 // Expand the marking stack, if we have to and if we can.
1363 if (_markStack.should_expand()) {
1364 _markStack.expand();
1365 }
1367 // Statistics
1368 double now = os::elapsedTime();
1369 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1370 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1371 _remark_times.add((now - start) * 1000.0);
1373 g1p->record_concurrent_mark_remark_end();
1375 G1CMIsAliveClosure is_alive(g1h);
1376 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1377 }
1379 // Base class of the closures that finalize and verify the
1380 // liveness counting data.
1381 class CMCountDataClosureBase: public HeapRegionClosure {
1382 protected:
1383 G1CollectedHeap* _g1h;
1384 ConcurrentMark* _cm;
1385 CardTableModRefBS* _ct_bs;
1387 BitMap* _region_bm;
1388 BitMap* _card_bm;
1390 // Takes a region that's not empty (i.e., it has at least one
1391 // live object in it and sets its corresponding bit on the region
1392 // bitmap to 1. If the region is "starts humongous" it will also set
1393 // to 1 the bits on the region bitmap that correspond to its
1394 // associated "continues humongous" regions.
1395 void set_bit_for_region(HeapRegion* hr) {
1396 assert(!hr->continuesHumongous(), "should have filtered those out");
1398 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1399 if (!hr->startsHumongous()) {
1400 // Normal (non-humongous) case: just set the bit.
1401 _region_bm->par_at_put(index, true);
1402 } else {
1403 // Starts humongous case: calculate how many regions are part of
1404 // this humongous region and then set the bit range.
1405 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1406 _region_bm->par_at_put_range(index, end_index, true);
1407 }
1408 }
1410 public:
1411 CMCountDataClosureBase(G1CollectedHeap* g1h,
1412 BitMap* region_bm, BitMap* card_bm):
1413 _g1h(g1h), _cm(g1h->concurrent_mark()),
1414 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1415 _region_bm(region_bm), _card_bm(card_bm) { }
1416 };
1418 // Closure that calculates the # live objects per region. Used
1419 // for verification purposes during the cleanup pause.
1420 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1421 CMBitMapRO* _bm;
1422 size_t _region_marked_bytes;
1424 public:
1425 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1426 BitMap* region_bm, BitMap* card_bm) :
1427 CMCountDataClosureBase(g1h, region_bm, card_bm),
1428 _bm(bm), _region_marked_bytes(0) { }
1430 bool doHeapRegion(HeapRegion* hr) {
1432 if (hr->continuesHumongous()) {
1433 // We will ignore these here and process them when their
1434 // associated "starts humongous" region is processed (see
1435 // set_bit_for_heap_region()). Note that we cannot rely on their
1436 // associated "starts humongous" region to have their bit set to
1437 // 1 since, due to the region chunking in the parallel region
1438 // iteration, a "continues humongous" region might be visited
1439 // before its associated "starts humongous".
1440 return false;
1441 }
1443 HeapWord* ntams = hr->next_top_at_mark_start();
1444 HeapWord* start = hr->bottom();
1446 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1447 err_msg("Preconditions not met - "
1448 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1449 p2i(start), p2i(ntams), p2i(hr->end())));
1451 // Find the first marked object at or after "start".
1452 start = _bm->getNextMarkedWordAddress(start, ntams);
1454 size_t marked_bytes = 0;
1456 while (start < ntams) {
1457 oop obj = oop(start);
1458 int obj_sz = obj->size();
1459 HeapWord* obj_end = start + obj_sz;
1461 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1462 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1464 // Note: if we're looking at the last region in heap - obj_end
1465 // could be actually just beyond the end of the heap; end_idx
1466 // will then correspond to a (non-existent) card that is also
1467 // just beyond the heap.
1468 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1469 // end of object is not card aligned - increment to cover
1470 // all the cards spanned by the object
1471 end_idx += 1;
1472 }
1474 // Set the bits in the card BM for the cards spanned by this object.
1475 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1477 // Add the size of this object to the number of marked bytes.
1478 marked_bytes += (size_t)obj_sz * HeapWordSize;
1480 // Find the next marked object after this one.
1481 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1482 }
1484 // Mark the allocated-since-marking portion...
1485 HeapWord* top = hr->top();
1486 if (ntams < top) {
1487 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1488 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1490 // Note: if we're looking at the last region in heap - top
1491 // could be actually just beyond the end of the heap; end_idx
1492 // will then correspond to a (non-existent) card that is also
1493 // just beyond the heap.
1494 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1495 // end of object is not card aligned - increment to cover
1496 // all the cards spanned by the object
1497 end_idx += 1;
1498 }
1499 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1501 // This definitely means the region has live objects.
1502 set_bit_for_region(hr);
1503 }
1505 // Update the live region bitmap.
1506 if (marked_bytes > 0) {
1507 set_bit_for_region(hr);
1508 }
1510 // Set the marked bytes for the current region so that
1511 // it can be queried by a calling verificiation routine
1512 _region_marked_bytes = marked_bytes;
1514 return false;
1515 }
1517 size_t region_marked_bytes() const { return _region_marked_bytes; }
1518 };
1520 // Heap region closure used for verifying the counting data
1521 // that was accumulated concurrently and aggregated during
1522 // the remark pause. This closure is applied to the heap
1523 // regions during the STW cleanup pause.
1525 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1526 G1CollectedHeap* _g1h;
1527 ConcurrentMark* _cm;
1528 CalcLiveObjectsClosure _calc_cl;
1529 BitMap* _region_bm; // Region BM to be verified
1530 BitMap* _card_bm; // Card BM to be verified
1531 bool _verbose; // verbose output?
1533 BitMap* _exp_region_bm; // Expected Region BM values
1534 BitMap* _exp_card_bm; // Expected card BM values
1536 int _failures;
1538 public:
1539 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1540 BitMap* region_bm,
1541 BitMap* card_bm,
1542 BitMap* exp_region_bm,
1543 BitMap* exp_card_bm,
1544 bool verbose) :
1545 _g1h(g1h), _cm(g1h->concurrent_mark()),
1546 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1547 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1548 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1549 _failures(0) { }
1551 int failures() const { return _failures; }
1553 bool doHeapRegion(HeapRegion* hr) {
1554 if (hr->continuesHumongous()) {
1555 // We will ignore these here and process them when their
1556 // associated "starts humongous" region is processed (see
1557 // set_bit_for_heap_region()). Note that we cannot rely on their
1558 // associated "starts humongous" region to have their bit set to
1559 // 1 since, due to the region chunking in the parallel region
1560 // iteration, a "continues humongous" region might be visited
1561 // before its associated "starts humongous".
1562 return false;
1563 }
1565 int failures = 0;
1567 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1568 // this region and set the corresponding bits in the expected region
1569 // and card bitmaps.
1570 bool res = _calc_cl.doHeapRegion(hr);
1571 assert(res == false, "should be continuing");
1573 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1574 Mutex::_no_safepoint_check_flag);
1576 // Verify the marked bytes for this region.
1577 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1578 size_t act_marked_bytes = hr->next_marked_bytes();
1580 // We're not OK if expected marked bytes > actual marked bytes. It means
1581 // we have missed accounting some objects during the actual marking.
1582 if (exp_marked_bytes > act_marked_bytes) {
1583 if (_verbose) {
1584 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1585 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1586 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1587 }
1588 failures += 1;
1589 }
1591 // Verify the bit, for this region, in the actual and expected
1592 // (which was just calculated) region bit maps.
1593 // We're not OK if the bit in the calculated expected region
1594 // bitmap is set and the bit in the actual region bitmap is not.
1595 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1597 bool expected = _exp_region_bm->at(index);
1598 bool actual = _region_bm->at(index);
1599 if (expected && !actual) {
1600 if (_verbose) {
1601 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1602 "expected: %s, actual: %s",
1603 hr->hrs_index(),
1604 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1605 }
1606 failures += 1;
1607 }
1609 // Verify that the card bit maps for the cards spanned by the current
1610 // region match. We have an error if we have a set bit in the expected
1611 // bit map and the corresponding bit in the actual bitmap is not set.
1613 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1614 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1616 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1617 expected = _exp_card_bm->at(i);
1618 actual = _card_bm->at(i);
1620 if (expected && !actual) {
1621 if (_verbose) {
1622 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1623 "expected: %s, actual: %s",
1624 hr->hrs_index(), i,
1625 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1626 }
1627 failures += 1;
1628 }
1629 }
1631 if (failures > 0 && _verbose) {
1632 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1633 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1634 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1635 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1636 }
1638 _failures += failures;
1640 // We could stop iteration over the heap when we
1641 // find the first violating region by returning true.
1642 return false;
1643 }
1644 };
1646 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1647 protected:
1648 G1CollectedHeap* _g1h;
1649 ConcurrentMark* _cm;
1650 BitMap* _actual_region_bm;
1651 BitMap* _actual_card_bm;
1653 uint _n_workers;
1655 BitMap* _expected_region_bm;
1656 BitMap* _expected_card_bm;
1658 int _failures;
1659 bool _verbose;
1661 public:
1662 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1663 BitMap* region_bm, BitMap* card_bm,
1664 BitMap* expected_region_bm, BitMap* expected_card_bm)
1665 : AbstractGangTask("G1 verify final counting"),
1666 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1667 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1668 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1669 _failures(0), _verbose(false),
1670 _n_workers(0) {
1671 assert(VerifyDuringGC, "don't call this otherwise");
1673 // Use the value already set as the number of active threads
1674 // in the call to run_task().
1675 if (G1CollectedHeap::use_parallel_gc_threads()) {
1676 assert( _g1h->workers()->active_workers() > 0,
1677 "Should have been previously set");
1678 _n_workers = _g1h->workers()->active_workers();
1679 } else {
1680 _n_workers = 1;
1681 }
1683 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1684 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1686 _verbose = _cm->verbose_medium();
1687 }
1689 void work(uint worker_id) {
1690 assert(worker_id < _n_workers, "invariant");
1692 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1693 _actual_region_bm, _actual_card_bm,
1694 _expected_region_bm,
1695 _expected_card_bm,
1696 _verbose);
1698 if (G1CollectedHeap::use_parallel_gc_threads()) {
1699 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1700 worker_id,
1701 _n_workers,
1702 HeapRegion::VerifyCountClaimValue);
1703 } else {
1704 _g1h->heap_region_iterate(&verify_cl);
1705 }
1707 Atomic::add(verify_cl.failures(), &_failures);
1708 }
1710 int failures() const { return _failures; }
1711 };
1713 // Closure that finalizes the liveness counting data.
1714 // Used during the cleanup pause.
1715 // Sets the bits corresponding to the interval [NTAMS, top]
1716 // (which contains the implicitly live objects) in the
1717 // card liveness bitmap. Also sets the bit for each region,
1718 // containing live data, in the region liveness bitmap.
1720 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1721 public:
1722 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1723 BitMap* region_bm,
1724 BitMap* card_bm) :
1725 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1727 bool doHeapRegion(HeapRegion* hr) {
1729 if (hr->continuesHumongous()) {
1730 // We will ignore these here and process them when their
1731 // associated "starts humongous" region is processed (see
1732 // set_bit_for_heap_region()). Note that we cannot rely on their
1733 // associated "starts humongous" region to have their bit set to
1734 // 1 since, due to the region chunking in the parallel region
1735 // iteration, a "continues humongous" region might be visited
1736 // before its associated "starts humongous".
1737 return false;
1738 }
1740 HeapWord* ntams = hr->next_top_at_mark_start();
1741 HeapWord* top = hr->top();
1743 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1745 // Mark the allocated-since-marking portion...
1746 if (ntams < top) {
1747 // This definitely means the region has live objects.
1748 set_bit_for_region(hr);
1750 // Now set the bits in the card bitmap for [ntams, top)
1751 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1752 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1754 // Note: if we're looking at the last region in heap - top
1755 // could be actually just beyond the end of the heap; end_idx
1756 // will then correspond to a (non-existent) card that is also
1757 // just beyond the heap.
1758 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1759 // end of object is not card aligned - increment to cover
1760 // all the cards spanned by the object
1761 end_idx += 1;
1762 }
1764 assert(end_idx <= _card_bm->size(),
1765 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1766 end_idx, _card_bm->size()));
1767 assert(start_idx < _card_bm->size(),
1768 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1769 start_idx, _card_bm->size()));
1771 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1772 }
1774 // Set the bit for the region if it contains live data
1775 if (hr->next_marked_bytes() > 0) {
1776 set_bit_for_region(hr);
1777 }
1779 return false;
1780 }
1781 };
1783 class G1ParFinalCountTask: public AbstractGangTask {
1784 protected:
1785 G1CollectedHeap* _g1h;
1786 ConcurrentMark* _cm;
1787 BitMap* _actual_region_bm;
1788 BitMap* _actual_card_bm;
1790 uint _n_workers;
1792 public:
1793 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1794 : AbstractGangTask("G1 final counting"),
1795 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1796 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1797 _n_workers(0) {
1798 // Use the value already set as the number of active threads
1799 // in the call to run_task().
1800 if (G1CollectedHeap::use_parallel_gc_threads()) {
1801 assert( _g1h->workers()->active_workers() > 0,
1802 "Should have been previously set");
1803 _n_workers = _g1h->workers()->active_workers();
1804 } else {
1805 _n_workers = 1;
1806 }
1807 }
1809 void work(uint worker_id) {
1810 assert(worker_id < _n_workers, "invariant");
1812 FinalCountDataUpdateClosure final_update_cl(_g1h,
1813 _actual_region_bm,
1814 _actual_card_bm);
1816 if (G1CollectedHeap::use_parallel_gc_threads()) {
1817 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1818 worker_id,
1819 _n_workers,
1820 HeapRegion::FinalCountClaimValue);
1821 } else {
1822 _g1h->heap_region_iterate(&final_update_cl);
1823 }
1824 }
1825 };
1827 class G1ParNoteEndTask;
1829 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1830 G1CollectedHeap* _g1;
1831 size_t _max_live_bytes;
1832 uint _regions_claimed;
1833 size_t _freed_bytes;
1834 FreeRegionList* _local_cleanup_list;
1835 HeapRegionSetCount _old_regions_removed;
1836 HeapRegionSetCount _humongous_regions_removed;
1837 HRRSCleanupTask* _hrrs_cleanup_task;
1838 double _claimed_region_time;
1839 double _max_region_time;
1841 public:
1842 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1843 FreeRegionList* local_cleanup_list,
1844 HRRSCleanupTask* hrrs_cleanup_task) :
1845 _g1(g1),
1846 _max_live_bytes(0), _regions_claimed(0),
1847 _freed_bytes(0),
1848 _claimed_region_time(0.0), _max_region_time(0.0),
1849 _local_cleanup_list(local_cleanup_list),
1850 _old_regions_removed(),
1851 _humongous_regions_removed(),
1852 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1854 size_t freed_bytes() { return _freed_bytes; }
1855 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1856 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1858 bool doHeapRegion(HeapRegion *hr) {
1859 if (hr->continuesHumongous()) {
1860 return false;
1861 }
1862 // We use a claim value of zero here because all regions
1863 // were claimed with value 1 in the FinalCount task.
1864 _g1->reset_gc_time_stamps(hr);
1865 double start = os::elapsedTime();
1866 _regions_claimed++;
1867 hr->note_end_of_marking();
1868 _max_live_bytes += hr->max_live_bytes();
1870 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1871 _freed_bytes += hr->used();
1872 hr->set_containing_set(NULL);
1873 if (hr->isHumongous()) {
1874 assert(hr->startsHumongous(), "we should only see starts humongous");
1875 _humongous_regions_removed.increment(1u, hr->capacity());
1876 _g1->free_humongous_region(hr, _local_cleanup_list, true);
1877 } else {
1878 _old_regions_removed.increment(1u, hr->capacity());
1879 _g1->free_region(hr, _local_cleanup_list, true);
1880 }
1881 } else {
1882 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1883 }
1885 double region_time = (os::elapsedTime() - start);
1886 _claimed_region_time += region_time;
1887 if (region_time > _max_region_time) {
1888 _max_region_time = region_time;
1889 }
1890 return false;
1891 }
1893 size_t max_live_bytes() { return _max_live_bytes; }
1894 uint regions_claimed() { return _regions_claimed; }
1895 double claimed_region_time_sec() { return _claimed_region_time; }
1896 double max_region_time_sec() { return _max_region_time; }
1897 };
1899 class G1ParNoteEndTask: public AbstractGangTask {
1900 friend class G1NoteEndOfConcMarkClosure;
1902 protected:
1903 G1CollectedHeap* _g1h;
1904 size_t _max_live_bytes;
1905 size_t _freed_bytes;
1906 FreeRegionList* _cleanup_list;
1908 public:
1909 G1ParNoteEndTask(G1CollectedHeap* g1h,
1910 FreeRegionList* cleanup_list) :
1911 AbstractGangTask("G1 note end"), _g1h(g1h),
1912 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1914 void work(uint worker_id) {
1915 double start = os::elapsedTime();
1916 FreeRegionList local_cleanup_list("Local Cleanup List");
1917 HRRSCleanupTask hrrs_cleanup_task;
1918 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1919 &hrrs_cleanup_task);
1920 if (G1CollectedHeap::use_parallel_gc_threads()) {
1921 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1922 _g1h->workers()->active_workers(),
1923 HeapRegion::NoteEndClaimValue);
1924 } else {
1925 _g1h->heap_region_iterate(&g1_note_end);
1926 }
1927 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1929 // Now update the lists
1930 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1931 {
1932 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1933 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1934 _max_live_bytes += g1_note_end.max_live_bytes();
1935 _freed_bytes += g1_note_end.freed_bytes();
1937 // If we iterate over the global cleanup list at the end of
1938 // cleanup to do this printing we will not guarantee to only
1939 // generate output for the newly-reclaimed regions (the list
1940 // might not be empty at the beginning of cleanup; we might
1941 // still be working on its previous contents). So we do the
1942 // printing here, before we append the new regions to the global
1943 // cleanup list.
1945 G1HRPrinter* hr_printer = _g1h->hr_printer();
1946 if (hr_printer->is_active()) {
1947 FreeRegionListIterator iter(&local_cleanup_list);
1948 while (iter.more_available()) {
1949 HeapRegion* hr = iter.get_next();
1950 hr_printer->cleanup(hr);
1951 }
1952 }
1954 _cleanup_list->add_ordered(&local_cleanup_list);
1955 assert(local_cleanup_list.is_empty(), "post-condition");
1957 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1958 }
1959 }
1960 size_t max_live_bytes() { return _max_live_bytes; }
1961 size_t freed_bytes() { return _freed_bytes; }
1962 };
1964 class G1ParScrubRemSetTask: public AbstractGangTask {
1965 protected:
1966 G1RemSet* _g1rs;
1967 BitMap* _region_bm;
1968 BitMap* _card_bm;
1969 public:
1970 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1971 BitMap* region_bm, BitMap* card_bm) :
1972 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1973 _region_bm(region_bm), _card_bm(card_bm) { }
1975 void work(uint worker_id) {
1976 if (G1CollectedHeap::use_parallel_gc_threads()) {
1977 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1978 HeapRegion::ScrubRemSetClaimValue);
1979 } else {
1980 _g1rs->scrub(_region_bm, _card_bm);
1981 }
1982 }
1984 };
1986 void ConcurrentMark::cleanup() {
1987 // world is stopped at this checkpoint
1988 assert(SafepointSynchronize::is_at_safepoint(),
1989 "world should be stopped");
1990 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1992 // If a full collection has happened, we shouldn't do this.
1993 if (has_aborted()) {
1994 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1995 return;
1996 }
1998 g1h->verify_region_sets_optional();
2000 if (VerifyDuringGC) {
2001 HandleMark hm; // handle scope
2002 Universe::heap()->prepare_for_verify();
2003 Universe::verify(VerifyOption_G1UsePrevMarking,
2004 " VerifyDuringGC:(before)");
2005 }
2007 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2008 g1p->record_concurrent_mark_cleanup_start();
2010 double start = os::elapsedTime();
2012 HeapRegionRemSet::reset_for_cleanup_tasks();
2014 uint n_workers;
2016 // Do counting once more with the world stopped for good measure.
2017 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2019 if (G1CollectedHeap::use_parallel_gc_threads()) {
2020 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2021 "sanity check");
2023 g1h->set_par_threads();
2024 n_workers = g1h->n_par_threads();
2025 assert(g1h->n_par_threads() == n_workers,
2026 "Should not have been reset");
2027 g1h->workers()->run_task(&g1_par_count_task);
2028 // Done with the parallel phase so reset to 0.
2029 g1h->set_par_threads(0);
2031 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2032 "sanity check");
2033 } else {
2034 n_workers = 1;
2035 g1_par_count_task.work(0);
2036 }
2038 if (VerifyDuringGC) {
2039 // Verify that the counting data accumulated during marking matches
2040 // that calculated by walking the marking bitmap.
2042 // Bitmaps to hold expected values
2043 BitMap expected_region_bm(_region_bm.size(), true);
2044 BitMap expected_card_bm(_card_bm.size(), true);
2046 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2047 &_region_bm,
2048 &_card_bm,
2049 &expected_region_bm,
2050 &expected_card_bm);
2052 if (G1CollectedHeap::use_parallel_gc_threads()) {
2053 g1h->set_par_threads((int)n_workers);
2054 g1h->workers()->run_task(&g1_par_verify_task);
2055 // Done with the parallel phase so reset to 0.
2056 g1h->set_par_threads(0);
2058 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2059 "sanity check");
2060 } else {
2061 g1_par_verify_task.work(0);
2062 }
2064 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2065 }
2067 size_t start_used_bytes = g1h->used();
2068 g1h->set_marking_complete();
2070 double count_end = os::elapsedTime();
2071 double this_final_counting_time = (count_end - start);
2072 _total_counting_time += this_final_counting_time;
2074 if (G1PrintRegionLivenessInfo) {
2075 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2076 _g1h->heap_region_iterate(&cl);
2077 }
2079 // Install newly created mark bitMap as "prev".
2080 swapMarkBitMaps();
2082 g1h->reset_gc_time_stamp();
2084 // Note end of marking in all heap regions.
2085 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2086 if (G1CollectedHeap::use_parallel_gc_threads()) {
2087 g1h->set_par_threads((int)n_workers);
2088 g1h->workers()->run_task(&g1_par_note_end_task);
2089 g1h->set_par_threads(0);
2091 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2092 "sanity check");
2093 } else {
2094 g1_par_note_end_task.work(0);
2095 }
2096 g1h->check_gc_time_stamps();
2098 if (!cleanup_list_is_empty()) {
2099 // The cleanup list is not empty, so we'll have to process it
2100 // concurrently. Notify anyone else that might be wanting free
2101 // regions that there will be more free regions coming soon.
2102 g1h->set_free_regions_coming();
2103 }
2105 // call below, since it affects the metric by which we sort the heap
2106 // regions.
2107 if (G1ScrubRemSets) {
2108 double rs_scrub_start = os::elapsedTime();
2109 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2110 if (G1CollectedHeap::use_parallel_gc_threads()) {
2111 g1h->set_par_threads((int)n_workers);
2112 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2113 g1h->set_par_threads(0);
2115 assert(g1h->check_heap_region_claim_values(
2116 HeapRegion::ScrubRemSetClaimValue),
2117 "sanity check");
2118 } else {
2119 g1_par_scrub_rs_task.work(0);
2120 }
2122 double rs_scrub_end = os::elapsedTime();
2123 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2124 _total_rs_scrub_time += this_rs_scrub_time;
2125 }
2127 // this will also free any regions totally full of garbage objects,
2128 // and sort the regions.
2129 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2131 // Statistics.
2132 double end = os::elapsedTime();
2133 _cleanup_times.add((end - start) * 1000.0);
2135 if (G1Log::fine()) {
2136 g1h->print_size_transition(gclog_or_tty,
2137 start_used_bytes,
2138 g1h->used(),
2139 g1h->capacity());
2140 }
2142 // Clean up will have freed any regions completely full of garbage.
2143 // Update the soft reference policy with the new heap occupancy.
2144 Universe::update_heap_info_at_gc();
2146 if (VerifyDuringGC) {
2147 HandleMark hm; // handle scope
2148 Universe::heap()->prepare_for_verify();
2149 Universe::verify(VerifyOption_G1UsePrevMarking,
2150 " VerifyDuringGC:(after)");
2151 }
2153 g1h->verify_region_sets_optional();
2155 // We need to make this be a "collection" so any collection pause that
2156 // races with it goes around and waits for completeCleanup to finish.
2157 g1h->increment_total_collections();
2159 // Clean out dead classes and update Metaspace sizes.
2160 ClassLoaderDataGraph::purge();
2161 MetaspaceGC::compute_new_size();
2163 // We reclaimed old regions so we should calculate the sizes to make
2164 // sure we update the old gen/space data.
2165 g1h->g1mm()->update_sizes();
2167 g1h->trace_heap_after_concurrent_cycle();
2168 }
2170 void ConcurrentMark::completeCleanup() {
2171 if (has_aborted()) return;
2173 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2175 _cleanup_list.verify_optional();
2176 FreeRegionList tmp_free_list("Tmp Free List");
2178 if (G1ConcRegionFreeingVerbose) {
2179 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2180 "cleanup list has %u entries",
2181 _cleanup_list.length());
2182 }
2184 // Noone else should be accessing the _cleanup_list at this point,
2185 // so it's not necessary to take any locks
2186 while (!_cleanup_list.is_empty()) {
2187 HeapRegion* hr = _cleanup_list.remove_head();
2188 assert(hr != NULL, "Got NULL from a non-empty list");
2189 hr->par_clear();
2190 tmp_free_list.add_ordered(hr);
2192 // Instead of adding one region at a time to the secondary_free_list,
2193 // we accumulate them in the local list and move them a few at a
2194 // time. This also cuts down on the number of notify_all() calls
2195 // we do during this process. We'll also append the local list when
2196 // _cleanup_list is empty (which means we just removed the last
2197 // region from the _cleanup_list).
2198 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2199 _cleanup_list.is_empty()) {
2200 if (G1ConcRegionFreeingVerbose) {
2201 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2202 "appending %u entries to the secondary_free_list, "
2203 "cleanup list still has %u entries",
2204 tmp_free_list.length(),
2205 _cleanup_list.length());
2206 }
2208 {
2209 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2210 g1h->secondary_free_list_add(&tmp_free_list);
2211 SecondaryFreeList_lock->notify_all();
2212 }
2214 if (G1StressConcRegionFreeing) {
2215 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2216 os::sleep(Thread::current(), (jlong) 1, false);
2217 }
2218 }
2219 }
2220 }
2221 assert(tmp_free_list.is_empty(), "post-condition");
2222 }
2224 // Supporting Object and Oop closures for reference discovery
2225 // and processing in during marking
2227 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2228 HeapWord* addr = (HeapWord*)obj;
2229 return addr != NULL &&
2230 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2231 }
2233 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2234 // Uses the CMTask associated with a worker thread (for serial reference
2235 // processing the CMTask for worker 0 is used) to preserve (mark) and
2236 // trace referent objects.
2237 //
2238 // Using the CMTask and embedded local queues avoids having the worker
2239 // threads operating on the global mark stack. This reduces the risk
2240 // of overflowing the stack - which we would rather avoid at this late
2241 // state. Also using the tasks' local queues removes the potential
2242 // of the workers interfering with each other that could occur if
2243 // operating on the global stack.
2245 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2246 ConcurrentMark* _cm;
2247 CMTask* _task;
2248 int _ref_counter_limit;
2249 int _ref_counter;
2250 bool _is_serial;
2251 public:
2252 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2253 _cm(cm), _task(task), _is_serial(is_serial),
2254 _ref_counter_limit(G1RefProcDrainInterval) {
2255 assert(_ref_counter_limit > 0, "sanity");
2256 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2257 _ref_counter = _ref_counter_limit;
2258 }
2260 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2261 virtual void do_oop( oop* p) { do_oop_work(p); }
2263 template <class T> void do_oop_work(T* p) {
2264 if (!_cm->has_overflown()) {
2265 oop obj = oopDesc::load_decode_heap_oop(p);
2266 if (_cm->verbose_high()) {
2267 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2268 "*"PTR_FORMAT" = "PTR_FORMAT,
2269 _task->worker_id(), p2i(p), p2i((void*) obj));
2270 }
2272 _task->deal_with_reference(obj);
2273 _ref_counter--;
2275 if (_ref_counter == 0) {
2276 // We have dealt with _ref_counter_limit references, pushing them
2277 // and objects reachable from them on to the local stack (and
2278 // possibly the global stack). Call CMTask::do_marking_step() to
2279 // process these entries.
2280 //
2281 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2282 // there's nothing more to do (i.e. we're done with the entries that
2283 // were pushed as a result of the CMTask::deal_with_reference() calls
2284 // above) or we overflow.
2285 //
2286 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2287 // flag while there may still be some work to do. (See the comment at
2288 // the beginning of CMTask::do_marking_step() for those conditions -
2289 // one of which is reaching the specified time target.) It is only
2290 // when CMTask::do_marking_step() returns without setting the
2291 // has_aborted() flag that the marking step has completed.
2292 do {
2293 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2294 _task->do_marking_step(mark_step_duration_ms,
2295 false /* do_termination */,
2296 _is_serial);
2297 } while (_task->has_aborted() && !_cm->has_overflown());
2298 _ref_counter = _ref_counter_limit;
2299 }
2300 } else {
2301 if (_cm->verbose_high()) {
2302 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2303 }
2304 }
2305 }
2306 };
2308 // 'Drain' oop closure used by both serial and parallel reference processing.
2309 // Uses the CMTask associated with a given worker thread (for serial
2310 // reference processing the CMtask for worker 0 is used). Calls the
2311 // do_marking_step routine, with an unbelievably large timeout value,
2312 // to drain the marking data structures of the remaining entries
2313 // added by the 'keep alive' oop closure above.
2315 class G1CMDrainMarkingStackClosure: public VoidClosure {
2316 ConcurrentMark* _cm;
2317 CMTask* _task;
2318 bool _is_serial;
2319 public:
2320 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2321 _cm(cm), _task(task), _is_serial(is_serial) {
2322 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2323 }
2325 void do_void() {
2326 do {
2327 if (_cm->verbose_high()) {
2328 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2329 _task->worker_id(), BOOL_TO_STR(_is_serial));
2330 }
2332 // We call CMTask::do_marking_step() to completely drain the local
2333 // and global marking stacks of entries pushed by the 'keep alive'
2334 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2335 //
2336 // CMTask::do_marking_step() is called in a loop, which we'll exit
2337 // if there's nothing more to do (i.e. we'completely drained the
2338 // entries that were pushed as a a result of applying the 'keep alive'
2339 // closure to the entries on the discovered ref lists) or we overflow
2340 // the global marking stack.
2341 //
2342 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2343 // flag while there may still be some work to do. (See the comment at
2344 // the beginning of CMTask::do_marking_step() for those conditions -
2345 // one of which is reaching the specified time target.) It is only
2346 // when CMTask::do_marking_step() returns without setting the
2347 // has_aborted() flag that the marking step has completed.
2349 _task->do_marking_step(1000000000.0 /* something very large */,
2350 true /* do_termination */,
2351 _is_serial);
2352 } while (_task->has_aborted() && !_cm->has_overflown());
2353 }
2354 };
2356 // Implementation of AbstractRefProcTaskExecutor for parallel
2357 // reference processing at the end of G1 concurrent marking
2359 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2360 private:
2361 G1CollectedHeap* _g1h;
2362 ConcurrentMark* _cm;
2363 WorkGang* _workers;
2364 int _active_workers;
2366 public:
2367 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2368 ConcurrentMark* cm,
2369 WorkGang* workers,
2370 int n_workers) :
2371 _g1h(g1h), _cm(cm),
2372 _workers(workers), _active_workers(n_workers) { }
2374 // Executes the given task using concurrent marking worker threads.
2375 virtual void execute(ProcessTask& task);
2376 virtual void execute(EnqueueTask& task);
2377 };
2379 class G1CMRefProcTaskProxy: public AbstractGangTask {
2380 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2381 ProcessTask& _proc_task;
2382 G1CollectedHeap* _g1h;
2383 ConcurrentMark* _cm;
2385 public:
2386 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2387 G1CollectedHeap* g1h,
2388 ConcurrentMark* cm) :
2389 AbstractGangTask("Process reference objects in parallel"),
2390 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2391 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2392 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2393 }
2395 virtual void work(uint worker_id) {
2396 CMTask* task = _cm->task(worker_id);
2397 G1CMIsAliveClosure g1_is_alive(_g1h);
2398 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2399 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2401 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2402 }
2403 };
2405 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2406 assert(_workers != NULL, "Need parallel worker threads.");
2407 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2409 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2411 // We need to reset the concurrency level before each
2412 // proxy task execution, so that the termination protocol
2413 // and overflow handling in CMTask::do_marking_step() knows
2414 // how many workers to wait for.
2415 _cm->set_concurrency(_active_workers);
2416 _g1h->set_par_threads(_active_workers);
2417 _workers->run_task(&proc_task_proxy);
2418 _g1h->set_par_threads(0);
2419 }
2421 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2422 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2423 EnqueueTask& _enq_task;
2425 public:
2426 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2427 AbstractGangTask("Enqueue reference objects in parallel"),
2428 _enq_task(enq_task) { }
2430 virtual void work(uint worker_id) {
2431 _enq_task.work(worker_id);
2432 }
2433 };
2435 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2436 assert(_workers != NULL, "Need parallel worker threads.");
2437 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2439 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2441 // Not strictly necessary but...
2442 //
2443 // We need to reset the concurrency level before each
2444 // proxy task execution, so that the termination protocol
2445 // and overflow handling in CMTask::do_marking_step() knows
2446 // how many workers to wait for.
2447 _cm->set_concurrency(_active_workers);
2448 _g1h->set_par_threads(_active_workers);
2449 _workers->run_task(&enq_task_proxy);
2450 _g1h->set_par_threads(0);
2451 }
2453 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2454 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2455 }
2457 // Helper class to get rid of some boilerplate code.
2458 class G1RemarkGCTraceTime : public GCTraceTime {
2459 static bool doit_and_prepend(bool doit) {
2460 if (doit) {
2461 gclog_or_tty->put(' ');
2462 }
2463 return doit;
2464 }
2466 public:
2467 G1RemarkGCTraceTime(const char* title, bool doit)
2468 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2469 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2470 }
2471 };
2473 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2474 if (has_overflown()) {
2475 // Skip processing the discovered references if we have
2476 // overflown the global marking stack. Reference objects
2477 // only get discovered once so it is OK to not
2478 // de-populate the discovered reference lists. We could have,
2479 // but the only benefit would be that, when marking restarts,
2480 // less reference objects are discovered.
2481 return;
2482 }
2484 ResourceMark rm;
2485 HandleMark hm;
2487 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2489 // Is alive closure.
2490 G1CMIsAliveClosure g1_is_alive(g1h);
2492 // Inner scope to exclude the cleaning of the string and symbol
2493 // tables from the displayed time.
2494 {
2495 if (G1Log::finer()) {
2496 gclog_or_tty->put(' ');
2497 }
2498 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2500 ReferenceProcessor* rp = g1h->ref_processor_cm();
2502 // See the comment in G1CollectedHeap::ref_processing_init()
2503 // about how reference processing currently works in G1.
2505 // Set the soft reference policy
2506 rp->setup_policy(clear_all_soft_refs);
2507 assert(_markStack.isEmpty(), "mark stack should be empty");
2509 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2510 // in serial reference processing. Note these closures are also
2511 // used for serially processing (by the the current thread) the
2512 // JNI references during parallel reference processing.
2513 //
2514 // These closures do not need to synchronize with the worker
2515 // threads involved in parallel reference processing as these
2516 // instances are executed serially by the current thread (e.g.
2517 // reference processing is not multi-threaded and is thus
2518 // performed by the current thread instead of a gang worker).
2519 //
2520 // The gang tasks involved in parallel reference procssing create
2521 // their own instances of these closures, which do their own
2522 // synchronization among themselves.
2523 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2524 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2526 // We need at least one active thread. If reference processing
2527 // is not multi-threaded we use the current (VMThread) thread,
2528 // otherwise we use the work gang from the G1CollectedHeap and
2529 // we utilize all the worker threads we can.
2530 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2531 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2532 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2534 // Parallel processing task executor.
2535 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2536 g1h->workers(), active_workers);
2537 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2539 // Set the concurrency level. The phase was already set prior to
2540 // executing the remark task.
2541 set_concurrency(active_workers);
2543 // Set the degree of MT processing here. If the discovery was done MT,
2544 // the number of threads involved during discovery could differ from
2545 // the number of active workers. This is OK as long as the discovered
2546 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2547 rp->set_active_mt_degree(active_workers);
2549 // Process the weak references.
2550 const ReferenceProcessorStats& stats =
2551 rp->process_discovered_references(&g1_is_alive,
2552 &g1_keep_alive,
2553 &g1_drain_mark_stack,
2554 executor,
2555 g1h->gc_timer_cm(),
2556 concurrent_gc_id());
2557 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2559 // The do_oop work routines of the keep_alive and drain_marking_stack
2560 // oop closures will set the has_overflown flag if we overflow the
2561 // global marking stack.
2563 assert(_markStack.overflow() || _markStack.isEmpty(),
2564 "mark stack should be empty (unless it overflowed)");
2566 if (_markStack.overflow()) {
2567 // This should have been done already when we tried to push an
2568 // entry on to the global mark stack. But let's do it again.
2569 set_has_overflown();
2570 }
2572 assert(rp->num_q() == active_workers, "why not");
2574 rp->enqueue_discovered_references(executor);
2576 rp->verify_no_references_recorded();
2577 assert(!rp->discovery_enabled(), "Post condition");
2578 }
2580 if (has_overflown()) {
2581 // We can not trust g1_is_alive if the marking stack overflowed
2582 return;
2583 }
2585 assert(_markStack.isEmpty(), "Marking should have completed");
2587 // Unload Klasses, String, Symbols, Code Cache, etc.
2589 G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2591 bool purged_classes;
2593 {
2594 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2595 purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2596 }
2598 {
2599 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2600 weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2601 }
2603 if (G1StringDedup::is_enabled()) {
2604 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2605 G1StringDedup::unlink(&g1_is_alive);
2606 }
2607 }
2609 void ConcurrentMark::swapMarkBitMaps() {
2610 CMBitMapRO* temp = _prevMarkBitMap;
2611 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2612 _nextMarkBitMap = (CMBitMap*) temp;
2613 }
2615 class CMObjectClosure;
2617 // Closure for iterating over objects, currently only used for
2618 // processing SATB buffers.
2619 class CMObjectClosure : public ObjectClosure {
2620 private:
2621 CMTask* _task;
2623 public:
2624 void do_object(oop obj) {
2625 _task->deal_with_reference(obj);
2626 }
2628 CMObjectClosure(CMTask* task) : _task(task) { }
2629 };
2631 class G1RemarkThreadsClosure : public ThreadClosure {
2632 CMObjectClosure _cm_obj;
2633 G1CMOopClosure _cm_cl;
2634 MarkingCodeBlobClosure _code_cl;
2635 int _thread_parity;
2636 bool _is_par;
2638 public:
2639 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2640 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2641 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2643 void do_thread(Thread* thread) {
2644 if (thread->is_Java_thread()) {
2645 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2646 JavaThread* jt = (JavaThread*)thread;
2648 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2649 // however the liveness of oops reachable from nmethods have very complex lifecycles:
2650 // * Alive if on the stack of an executing method
2651 // * Weakly reachable otherwise
2652 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2653 // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2654 jt->nmethods_do(&_code_cl);
2656 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2657 }
2658 } else if (thread->is_VM_thread()) {
2659 if (thread->claim_oops_do(_is_par, _thread_parity)) {
2660 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2661 }
2662 }
2663 }
2664 };
2666 class CMRemarkTask: public AbstractGangTask {
2667 private:
2668 ConcurrentMark* _cm;
2669 bool _is_serial;
2670 public:
2671 void work(uint worker_id) {
2672 // Since all available tasks are actually started, we should
2673 // only proceed if we're supposed to be actived.
2674 if (worker_id < _cm->active_tasks()) {
2675 CMTask* task = _cm->task(worker_id);
2676 task->record_start_time();
2677 {
2678 ResourceMark rm;
2679 HandleMark hm;
2681 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2682 Threads::threads_do(&threads_f);
2683 }
2685 do {
2686 task->do_marking_step(1000000000.0 /* something very large */,
2687 true /* do_termination */,
2688 _is_serial);
2689 } while (task->has_aborted() && !_cm->has_overflown());
2690 // If we overflow, then we do not want to restart. We instead
2691 // want to abort remark and do concurrent marking again.
2692 task->record_end_time();
2693 }
2694 }
2696 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2697 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2698 _cm->terminator()->reset_for_reuse(active_workers);
2699 }
2700 };
2702 void ConcurrentMark::checkpointRootsFinalWork() {
2703 ResourceMark rm;
2704 HandleMark hm;
2705 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2707 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2709 g1h->ensure_parsability(false);
2711 if (G1CollectedHeap::use_parallel_gc_threads()) {
2712 G1CollectedHeap::StrongRootsScope srs(g1h);
2713 // this is remark, so we'll use up all active threads
2714 uint active_workers = g1h->workers()->active_workers();
2715 if (active_workers == 0) {
2716 assert(active_workers > 0, "Should have been set earlier");
2717 active_workers = (uint) ParallelGCThreads;
2718 g1h->workers()->set_active_workers(active_workers);
2719 }
2720 set_concurrency_and_phase(active_workers, false /* concurrent */);
2721 // Leave _parallel_marking_threads at it's
2722 // value originally calculated in the ConcurrentMark
2723 // constructor and pass values of the active workers
2724 // through the gang in the task.
2726 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2727 // We will start all available threads, even if we decide that the
2728 // active_workers will be fewer. The extra ones will just bail out
2729 // immediately.
2730 g1h->set_par_threads(active_workers);
2731 g1h->workers()->run_task(&remarkTask);
2732 g1h->set_par_threads(0);
2733 } else {
2734 G1CollectedHeap::StrongRootsScope srs(g1h);
2735 uint active_workers = 1;
2736 set_concurrency_and_phase(active_workers, false /* concurrent */);
2738 // Note - if there's no work gang then the VMThread will be
2739 // the thread to execute the remark - serially. We have
2740 // to pass true for the is_serial parameter so that
2741 // CMTask::do_marking_step() doesn't enter the sync
2742 // barriers in the event of an overflow. Doing so will
2743 // cause an assert that the current thread is not a
2744 // concurrent GC thread.
2745 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2746 remarkTask.work(0);
2747 }
2748 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2749 guarantee(has_overflown() ||
2750 satb_mq_set.completed_buffers_num() == 0,
2751 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2752 BOOL_TO_STR(has_overflown()),
2753 satb_mq_set.completed_buffers_num()));
2755 print_stats();
2756 }
2758 #ifndef PRODUCT
2760 class PrintReachableOopClosure: public OopClosure {
2761 private:
2762 G1CollectedHeap* _g1h;
2763 outputStream* _out;
2764 VerifyOption _vo;
2765 bool _all;
2767 public:
2768 PrintReachableOopClosure(outputStream* out,
2769 VerifyOption vo,
2770 bool all) :
2771 _g1h(G1CollectedHeap::heap()),
2772 _out(out), _vo(vo), _all(all) { }
2774 void do_oop(narrowOop* p) { do_oop_work(p); }
2775 void do_oop( oop* p) { do_oop_work(p); }
2777 template <class T> void do_oop_work(T* p) {
2778 oop obj = oopDesc::load_decode_heap_oop(p);
2779 const char* str = NULL;
2780 const char* str2 = "";
2782 if (obj == NULL) {
2783 str = "";
2784 } else if (!_g1h->is_in_g1_reserved(obj)) {
2785 str = " O";
2786 } else {
2787 HeapRegion* hr = _g1h->heap_region_containing(obj);
2788 guarantee(hr != NULL, "invariant");
2789 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2790 bool marked = _g1h->is_marked(obj, _vo);
2792 if (over_tams) {
2793 str = " >";
2794 if (marked) {
2795 str2 = " AND MARKED";
2796 }
2797 } else if (marked) {
2798 str = " M";
2799 } else {
2800 str = " NOT";
2801 }
2802 }
2804 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2805 p2i(p), p2i((void*) obj), str, str2);
2806 }
2807 };
2809 class PrintReachableObjectClosure : public ObjectClosure {
2810 private:
2811 G1CollectedHeap* _g1h;
2812 outputStream* _out;
2813 VerifyOption _vo;
2814 bool _all;
2815 HeapRegion* _hr;
2817 public:
2818 PrintReachableObjectClosure(outputStream* out,
2819 VerifyOption vo,
2820 bool all,
2821 HeapRegion* hr) :
2822 _g1h(G1CollectedHeap::heap()),
2823 _out(out), _vo(vo), _all(all), _hr(hr) { }
2825 void do_object(oop o) {
2826 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2827 bool marked = _g1h->is_marked(o, _vo);
2828 bool print_it = _all || over_tams || marked;
2830 if (print_it) {
2831 _out->print_cr(" "PTR_FORMAT"%s",
2832 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2833 PrintReachableOopClosure oopCl(_out, _vo, _all);
2834 o->oop_iterate_no_header(&oopCl);
2835 }
2836 }
2837 };
2839 class PrintReachableRegionClosure : public HeapRegionClosure {
2840 private:
2841 G1CollectedHeap* _g1h;
2842 outputStream* _out;
2843 VerifyOption _vo;
2844 bool _all;
2846 public:
2847 bool doHeapRegion(HeapRegion* hr) {
2848 HeapWord* b = hr->bottom();
2849 HeapWord* e = hr->end();
2850 HeapWord* t = hr->top();
2851 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2852 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2853 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2854 _out->cr();
2856 HeapWord* from = b;
2857 HeapWord* to = t;
2859 if (to > from) {
2860 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2861 _out->cr();
2862 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2863 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2864 _out->cr();
2865 }
2867 return false;
2868 }
2870 PrintReachableRegionClosure(outputStream* out,
2871 VerifyOption vo,
2872 bool all) :
2873 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2874 };
2876 void ConcurrentMark::print_reachable(const char* str,
2877 VerifyOption vo,
2878 bool all) {
2879 gclog_or_tty->cr();
2880 gclog_or_tty->print_cr("== Doing heap dump... ");
2882 if (G1PrintReachableBaseFile == NULL) {
2883 gclog_or_tty->print_cr(" #### error: no base file defined");
2884 return;
2885 }
2887 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2888 (JVM_MAXPATHLEN - 1)) {
2889 gclog_or_tty->print_cr(" #### error: file name too long");
2890 return;
2891 }
2893 char file_name[JVM_MAXPATHLEN];
2894 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2895 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2897 fileStream fout(file_name);
2898 if (!fout.is_open()) {
2899 gclog_or_tty->print_cr(" #### error: could not open file");
2900 return;
2901 }
2903 outputStream* out = &fout;
2904 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2905 out->cr();
2907 out->print_cr("--- ITERATING OVER REGIONS");
2908 out->cr();
2909 PrintReachableRegionClosure rcl(out, vo, all);
2910 _g1h->heap_region_iterate(&rcl);
2911 out->cr();
2913 gclog_or_tty->print_cr(" done");
2914 gclog_or_tty->flush();
2915 }
2917 #endif // PRODUCT
2919 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2920 // Note we are overriding the read-only view of the prev map here, via
2921 // the cast.
2922 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2923 }
2925 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2926 _nextMarkBitMap->clearRange(mr);
2927 }
2929 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2930 clearRangePrevBitmap(mr);
2931 clearRangeNextBitmap(mr);
2932 }
2934 HeapRegion*
2935 ConcurrentMark::claim_region(uint worker_id) {
2936 // "checkpoint" the finger
2937 HeapWord* finger = _finger;
2939 // _heap_end will not change underneath our feet; it only changes at
2940 // yield points.
2941 while (finger < _heap_end) {
2942 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2944 // Note on how this code handles humongous regions. In the
2945 // normal case the finger will reach the start of a "starts
2946 // humongous" (SH) region. Its end will either be the end of the
2947 // last "continues humongous" (CH) region in the sequence, or the
2948 // standard end of the SH region (if the SH is the only region in
2949 // the sequence). That way claim_region() will skip over the CH
2950 // regions. However, there is a subtle race between a CM thread
2951 // executing this method and a mutator thread doing a humongous
2952 // object allocation. The two are not mutually exclusive as the CM
2953 // thread does not need to hold the Heap_lock when it gets
2954 // here. So there is a chance that claim_region() will come across
2955 // a free region that's in the progress of becoming a SH or a CH
2956 // region. In the former case, it will either
2957 // a) Miss the update to the region's end, in which case it will
2958 // visit every subsequent CH region, will find their bitmaps
2959 // empty, and do nothing, or
2960 // b) Will observe the update of the region's end (in which case
2961 // it will skip the subsequent CH regions).
2962 // If it comes across a region that suddenly becomes CH, the
2963 // scenario will be similar to b). So, the race between
2964 // claim_region() and a humongous object allocation might force us
2965 // to do a bit of unnecessary work (due to some unnecessary bitmap
2966 // iterations) but it should not introduce and correctness issues.
2967 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2968 HeapWord* bottom = curr_region->bottom();
2969 HeapWord* end = curr_region->end();
2970 HeapWord* limit = curr_region->next_top_at_mark_start();
2972 if (verbose_low()) {
2973 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2974 "["PTR_FORMAT", "PTR_FORMAT"), "
2975 "limit = "PTR_FORMAT,
2976 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2977 }
2979 // Is the gap between reading the finger and doing the CAS too long?
2980 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2981 if (res == finger) {
2982 // we succeeded
2984 // notice that _finger == end cannot be guaranteed here since,
2985 // someone else might have moved the finger even further
2986 assert(_finger >= end, "the finger should have moved forward");
2988 if (verbose_low()) {
2989 gclog_or_tty->print_cr("[%u] we were successful with region = "
2990 PTR_FORMAT, worker_id, p2i(curr_region));
2991 }
2993 if (limit > bottom) {
2994 if (verbose_low()) {
2995 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2996 "returning it ", worker_id, p2i(curr_region));
2997 }
2998 return curr_region;
2999 } else {
3000 assert(limit == bottom,
3001 "the region limit should be at bottom");
3002 if (verbose_low()) {
3003 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3004 "returning NULL", worker_id, p2i(curr_region));
3005 }
3006 // we return NULL and the caller should try calling
3007 // claim_region() again.
3008 return NULL;
3009 }
3010 } else {
3011 assert(_finger > finger, "the finger should have moved forward");
3012 if (verbose_low()) {
3013 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3014 "global finger = "PTR_FORMAT", "
3015 "our finger = "PTR_FORMAT,
3016 worker_id, p2i(_finger), p2i(finger));
3017 }
3019 // read it again
3020 finger = _finger;
3021 }
3022 }
3024 return NULL;
3025 }
3027 #ifndef PRODUCT
3028 enum VerifyNoCSetOopsPhase {
3029 VerifyNoCSetOopsStack,
3030 VerifyNoCSetOopsQueues,
3031 VerifyNoCSetOopsSATBCompleted,
3032 VerifyNoCSetOopsSATBThread
3033 };
3035 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
3036 private:
3037 G1CollectedHeap* _g1h;
3038 VerifyNoCSetOopsPhase _phase;
3039 int _info;
3041 const char* phase_str() {
3042 switch (_phase) {
3043 case VerifyNoCSetOopsStack: return "Stack";
3044 case VerifyNoCSetOopsQueues: return "Queue";
3045 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3046 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
3047 default: ShouldNotReachHere();
3048 }
3049 return NULL;
3050 }
3052 void do_object_work(oop obj) {
3053 guarantee(!_g1h->obj_in_cs(obj),
3054 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3055 p2i((void*) obj), phase_str(), _info));
3056 }
3058 public:
3059 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3061 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3062 _phase = phase;
3063 _info = info;
3064 }
3066 virtual void do_oop(oop* p) {
3067 oop obj = oopDesc::load_decode_heap_oop(p);
3068 do_object_work(obj);
3069 }
3071 virtual void do_oop(narrowOop* p) {
3072 // We should not come across narrow oops while scanning marking
3073 // stacks and SATB buffers.
3074 ShouldNotReachHere();
3075 }
3077 virtual void do_object(oop obj) {
3078 do_object_work(obj);
3079 }
3080 };
3082 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3083 bool verify_enqueued_buffers,
3084 bool verify_thread_buffers,
3085 bool verify_fingers) {
3086 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3087 if (!G1CollectedHeap::heap()->mark_in_progress()) {
3088 return;
3089 }
3091 VerifyNoCSetOopsClosure cl;
3093 if (verify_stacks) {
3094 // Verify entries on the global mark stack
3095 cl.set_phase(VerifyNoCSetOopsStack);
3096 _markStack.oops_do(&cl);
3098 // Verify entries on the task queues
3099 for (uint i = 0; i < _max_worker_id; i += 1) {
3100 cl.set_phase(VerifyNoCSetOopsQueues, i);
3101 CMTaskQueue* queue = _task_queues->queue(i);
3102 queue->oops_do(&cl);
3103 }
3104 }
3106 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3108 // Verify entries on the enqueued SATB buffers
3109 if (verify_enqueued_buffers) {
3110 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3111 satb_qs.iterate_completed_buffers_read_only(&cl);
3112 }
3114 // Verify entries on the per-thread SATB buffers
3115 if (verify_thread_buffers) {
3116 cl.set_phase(VerifyNoCSetOopsSATBThread);
3117 satb_qs.iterate_thread_buffers_read_only(&cl);
3118 }
3120 if (verify_fingers) {
3121 // Verify the global finger
3122 HeapWord* global_finger = finger();
3123 if (global_finger != NULL && global_finger < _heap_end) {
3124 // The global finger always points to a heap region boundary. We
3125 // use heap_region_containing_raw() to get the containing region
3126 // given that the global finger could be pointing to a free region
3127 // which subsequently becomes continues humongous. If that
3128 // happens, heap_region_containing() will return the bottom of the
3129 // corresponding starts humongous region and the check below will
3130 // not hold any more.
3131 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3132 guarantee(global_finger == global_hr->bottom(),
3133 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3134 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3135 }
3137 // Verify the task fingers
3138 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3139 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3140 CMTask* task = _tasks[i];
3141 HeapWord* task_finger = task->finger();
3142 if (task_finger != NULL && task_finger < _heap_end) {
3143 // See above note on the global finger verification.
3144 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3145 guarantee(task_finger == task_hr->bottom() ||
3146 !task_hr->in_collection_set(),
3147 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3148 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3149 }
3150 }
3151 }
3152 }
3153 #endif // PRODUCT
3155 // Aggregate the counting data that was constructed concurrently
3156 // with marking.
3157 class AggregateCountDataHRClosure: public HeapRegionClosure {
3158 G1CollectedHeap* _g1h;
3159 ConcurrentMark* _cm;
3160 CardTableModRefBS* _ct_bs;
3161 BitMap* _cm_card_bm;
3162 uint _max_worker_id;
3164 public:
3165 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3166 BitMap* cm_card_bm,
3167 uint max_worker_id) :
3168 _g1h(g1h), _cm(g1h->concurrent_mark()),
3169 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3170 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3172 bool doHeapRegion(HeapRegion* hr) {
3173 if (hr->continuesHumongous()) {
3174 // We will ignore these here and process them when their
3175 // associated "starts humongous" region is processed.
3176 // Note that we cannot rely on their associated
3177 // "starts humongous" region to have their bit set to 1
3178 // since, due to the region chunking in the parallel region
3179 // iteration, a "continues humongous" region might be visited
3180 // before its associated "starts humongous".
3181 return false;
3182 }
3184 HeapWord* start = hr->bottom();
3185 HeapWord* limit = hr->next_top_at_mark_start();
3186 HeapWord* end = hr->end();
3188 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3189 err_msg("Preconditions not met - "
3190 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3191 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3192 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3194 assert(hr->next_marked_bytes() == 0, "Precondition");
3196 if (start == limit) {
3197 // NTAMS of this region has not been set so nothing to do.
3198 return false;
3199 }
3201 // 'start' should be in the heap.
3202 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3203 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3204 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3206 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3207 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3208 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3210 // If ntams is not card aligned then we bump card bitmap index
3211 // for limit so that we get the all the cards spanned by
3212 // the object ending at ntams.
3213 // Note: if this is the last region in the heap then ntams
3214 // could be actually just beyond the end of the the heap;
3215 // limit_idx will then correspond to a (non-existent) card
3216 // that is also outside the heap.
3217 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3218 limit_idx += 1;
3219 }
3221 assert(limit_idx <= end_idx, "or else use atomics");
3223 // Aggregate the "stripe" in the count data associated with hr.
3224 uint hrs_index = hr->hrs_index();
3225 size_t marked_bytes = 0;
3227 for (uint i = 0; i < _max_worker_id; i += 1) {
3228 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3229 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3231 // Fetch the marked_bytes in this region for task i and
3232 // add it to the running total for this region.
3233 marked_bytes += marked_bytes_array[hrs_index];
3235 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3236 // into the global card bitmap.
3237 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3239 while (scan_idx < limit_idx) {
3240 assert(task_card_bm->at(scan_idx) == true, "should be");
3241 _cm_card_bm->set_bit(scan_idx);
3242 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3244 // BitMap::get_next_one_offset() can handle the case when
3245 // its left_offset parameter is greater than its right_offset
3246 // parameter. It does, however, have an early exit if
3247 // left_offset == right_offset. So let's limit the value
3248 // passed in for left offset here.
3249 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3250 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3251 }
3252 }
3254 // Update the marked bytes for this region.
3255 hr->add_to_marked_bytes(marked_bytes);
3257 // Next heap region
3258 return false;
3259 }
3260 };
3262 class G1AggregateCountDataTask: public AbstractGangTask {
3263 protected:
3264 G1CollectedHeap* _g1h;
3265 ConcurrentMark* _cm;
3266 BitMap* _cm_card_bm;
3267 uint _max_worker_id;
3268 int _active_workers;
3270 public:
3271 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3272 ConcurrentMark* cm,
3273 BitMap* cm_card_bm,
3274 uint max_worker_id,
3275 int n_workers) :
3276 AbstractGangTask("Count Aggregation"),
3277 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3278 _max_worker_id(max_worker_id),
3279 _active_workers(n_workers) { }
3281 void work(uint worker_id) {
3282 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3284 if (G1CollectedHeap::use_parallel_gc_threads()) {
3285 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3286 _active_workers,
3287 HeapRegion::AggregateCountClaimValue);
3288 } else {
3289 _g1h->heap_region_iterate(&cl);
3290 }
3291 }
3292 };
3295 void ConcurrentMark::aggregate_count_data() {
3296 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3297 _g1h->workers()->active_workers() :
3298 1);
3300 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3301 _max_worker_id, n_workers);
3303 if (G1CollectedHeap::use_parallel_gc_threads()) {
3304 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3305 "sanity check");
3306 _g1h->set_par_threads(n_workers);
3307 _g1h->workers()->run_task(&g1_par_agg_task);
3308 _g1h->set_par_threads(0);
3310 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3311 "sanity check");
3312 _g1h->reset_heap_region_claim_values();
3313 } else {
3314 g1_par_agg_task.work(0);
3315 }
3316 }
3318 // Clear the per-worker arrays used to store the per-region counting data
3319 void ConcurrentMark::clear_all_count_data() {
3320 // Clear the global card bitmap - it will be filled during
3321 // liveness count aggregation (during remark) and the
3322 // final counting task.
3323 _card_bm.clear();
3325 // Clear the global region bitmap - it will be filled as part
3326 // of the final counting task.
3327 _region_bm.clear();
3329 uint max_regions = _g1h->max_regions();
3330 assert(_max_worker_id > 0, "uninitialized");
3332 for (uint i = 0; i < _max_worker_id; i += 1) {
3333 BitMap* task_card_bm = count_card_bitmap_for(i);
3334 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3336 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3337 assert(marked_bytes_array != NULL, "uninitialized");
3339 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3340 task_card_bm->clear();
3341 }
3342 }
3344 void ConcurrentMark::print_stats() {
3345 if (verbose_stats()) {
3346 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3347 for (size_t i = 0; i < _active_tasks; ++i) {
3348 _tasks[i]->print_stats();
3349 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3350 }
3351 }
3352 }
3354 // abandon current marking iteration due to a Full GC
3355 void ConcurrentMark::abort() {
3356 // Clear all marks to force marking thread to do nothing
3357 _nextMarkBitMap->clearAll();
3358 // Clear the liveness counting data
3359 clear_all_count_data();
3360 // Empty mark stack
3361 reset_marking_state();
3362 for (uint i = 0; i < _max_worker_id; ++i) {
3363 _tasks[i]->clear_region_fields();
3364 }
3365 _first_overflow_barrier_sync.abort();
3366 _second_overflow_barrier_sync.abort();
3367 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3368 if (!gc_id.is_undefined()) {
3369 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3370 // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3371 _aborted_gc_id = gc_id;
3372 }
3373 _has_aborted = true;
3375 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3376 satb_mq_set.abandon_partial_marking();
3377 // This can be called either during or outside marking, we'll read
3378 // the expected_active value from the SATB queue set.
3379 satb_mq_set.set_active_all_threads(
3380 false, /* new active value */
3381 satb_mq_set.is_active() /* expected_active */);
3383 _g1h->trace_heap_after_concurrent_cycle();
3384 _g1h->register_concurrent_cycle_end();
3385 }
3387 const GCId& ConcurrentMark::concurrent_gc_id() {
3388 if (has_aborted()) {
3389 return _aborted_gc_id;
3390 }
3391 return _g1h->gc_tracer_cm()->gc_id();
3392 }
3394 static void print_ms_time_info(const char* prefix, const char* name,
3395 NumberSeq& ns) {
3396 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3397 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3398 if (ns.num() > 0) {
3399 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3400 prefix, ns.sd(), ns.maximum());
3401 }
3402 }
3404 void ConcurrentMark::print_summary_info() {
3405 gclog_or_tty->print_cr(" Concurrent marking:");
3406 print_ms_time_info(" ", "init marks", _init_times);
3407 print_ms_time_info(" ", "remarks", _remark_times);
3408 {
3409 print_ms_time_info(" ", "final marks", _remark_mark_times);
3410 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3412 }
3413 print_ms_time_info(" ", "cleanups", _cleanup_times);
3414 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3415 _total_counting_time,
3416 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3417 (double)_cleanup_times.num()
3418 : 0.0));
3419 if (G1ScrubRemSets) {
3420 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3421 _total_rs_scrub_time,
3422 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3423 (double)_cleanup_times.num()
3424 : 0.0));
3425 }
3426 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3427 (_init_times.sum() + _remark_times.sum() +
3428 _cleanup_times.sum())/1000.0);
3429 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3430 "(%8.2f s marking).",
3431 cmThread()->vtime_accum(),
3432 cmThread()->vtime_mark_accum());
3433 }
3435 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3436 if (use_parallel_marking_threads()) {
3437 _parallel_workers->print_worker_threads_on(st);
3438 }
3439 }
3441 void ConcurrentMark::print_on_error(outputStream* st) const {
3442 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3443 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3444 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3445 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3446 }
3448 // We take a break if someone is trying to stop the world.
3449 bool ConcurrentMark::do_yield_check(uint worker_id) {
3450 if (SuspendibleThreadSet::should_yield()) {
3451 if (worker_id == 0) {
3452 _g1h->g1_policy()->record_concurrent_pause();
3453 }
3454 SuspendibleThreadSet::yield();
3455 return true;
3456 } else {
3457 return false;
3458 }
3459 }
3461 bool ConcurrentMark::containing_card_is_marked(void* p) {
3462 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3463 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3464 }
3466 bool ConcurrentMark::containing_cards_are_marked(void* start,
3467 void* last) {
3468 return containing_card_is_marked(start) &&
3469 containing_card_is_marked(last);
3470 }
3472 #ifndef PRODUCT
3473 // for debugging purposes
3474 void ConcurrentMark::print_finger() {
3475 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3476 p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3477 for (uint i = 0; i < _max_worker_id; ++i) {
3478 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3479 }
3480 gclog_or_tty->cr();
3481 }
3482 #endif
3484 void CMTask::scan_object(oop obj) {
3485 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3487 if (_cm->verbose_high()) {
3488 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3489 _worker_id, p2i((void*) obj));
3490 }
3492 size_t obj_size = obj->size();
3493 _words_scanned += obj_size;
3495 obj->oop_iterate(_cm_oop_closure);
3496 statsOnly( ++_objs_scanned );
3497 check_limits();
3498 }
3500 // Closure for iteration over bitmaps
3501 class CMBitMapClosure : public BitMapClosure {
3502 private:
3503 // the bitmap that is being iterated over
3504 CMBitMap* _nextMarkBitMap;
3505 ConcurrentMark* _cm;
3506 CMTask* _task;
3508 public:
3509 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3510 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3512 bool do_bit(size_t offset) {
3513 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3514 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3515 assert( addr < _cm->finger(), "invariant");
3517 statsOnly( _task->increase_objs_found_on_bitmap() );
3518 assert(addr >= _task->finger(), "invariant");
3520 // We move that task's local finger along.
3521 _task->move_finger_to(addr);
3523 _task->scan_object(oop(addr));
3524 // we only partially drain the local queue and global stack
3525 _task->drain_local_queue(true);
3526 _task->drain_global_stack(true);
3528 // if the has_aborted flag has been raised, we need to bail out of
3529 // the iteration
3530 return !_task->has_aborted();
3531 }
3532 };
3534 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3535 ConcurrentMark* cm,
3536 CMTask* task)
3537 : _g1h(g1h), _cm(cm), _task(task) {
3538 assert(_ref_processor == NULL, "should be initialized to NULL");
3540 if (G1UseConcMarkReferenceProcessing) {
3541 _ref_processor = g1h->ref_processor_cm();
3542 assert(_ref_processor != NULL, "should not be NULL");
3543 }
3544 }
3546 void CMTask::setup_for_region(HeapRegion* hr) {
3547 // Separated the asserts so that we know which one fires.
3548 assert(hr != NULL,
3549 "claim_region() should have filtered out continues humongous regions");
3550 assert(!hr->continuesHumongous(),
3551 "claim_region() should have filtered out continues humongous regions");
3553 if (_cm->verbose_low()) {
3554 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3555 _worker_id, p2i(hr));
3556 }
3558 _curr_region = hr;
3559 _finger = hr->bottom();
3560 update_region_limit();
3561 }
3563 void CMTask::update_region_limit() {
3564 HeapRegion* hr = _curr_region;
3565 HeapWord* bottom = hr->bottom();
3566 HeapWord* limit = hr->next_top_at_mark_start();
3568 if (limit == bottom) {
3569 if (_cm->verbose_low()) {
3570 gclog_or_tty->print_cr("[%u] found an empty region "
3571 "["PTR_FORMAT", "PTR_FORMAT")",
3572 _worker_id, p2i(bottom), p2i(limit));
3573 }
3574 // The region was collected underneath our feet.
3575 // We set the finger to bottom to ensure that the bitmap
3576 // iteration that will follow this will not do anything.
3577 // (this is not a condition that holds when we set the region up,
3578 // as the region is not supposed to be empty in the first place)
3579 _finger = bottom;
3580 } else if (limit >= _region_limit) {
3581 assert(limit >= _finger, "peace of mind");
3582 } else {
3583 assert(limit < _region_limit, "only way to get here");
3584 // This can happen under some pretty unusual circumstances. An
3585 // evacuation pause empties the region underneath our feet (NTAMS
3586 // at bottom). We then do some allocation in the region (NTAMS
3587 // stays at bottom), followed by the region being used as a GC
3588 // alloc region (NTAMS will move to top() and the objects
3589 // originally below it will be grayed). All objects now marked in
3590 // the region are explicitly grayed, if below the global finger,
3591 // and we do not need in fact to scan anything else. So, we simply
3592 // set _finger to be limit to ensure that the bitmap iteration
3593 // doesn't do anything.
3594 _finger = limit;
3595 }
3597 _region_limit = limit;
3598 }
3600 void CMTask::giveup_current_region() {
3601 assert(_curr_region != NULL, "invariant");
3602 if (_cm->verbose_low()) {
3603 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3604 _worker_id, p2i(_curr_region));
3605 }
3606 clear_region_fields();
3607 }
3609 void CMTask::clear_region_fields() {
3610 // Values for these three fields that indicate that we're not
3611 // holding on to a region.
3612 _curr_region = NULL;
3613 _finger = NULL;
3614 _region_limit = NULL;
3615 }
3617 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3618 if (cm_oop_closure == NULL) {
3619 assert(_cm_oop_closure != NULL, "invariant");
3620 } else {
3621 assert(_cm_oop_closure == NULL, "invariant");
3622 }
3623 _cm_oop_closure = cm_oop_closure;
3624 }
3626 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3627 guarantee(nextMarkBitMap != NULL, "invariant");
3629 if (_cm->verbose_low()) {
3630 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3631 }
3633 _nextMarkBitMap = nextMarkBitMap;
3634 clear_region_fields();
3636 _calls = 0;
3637 _elapsed_time_ms = 0.0;
3638 _termination_time_ms = 0.0;
3639 _termination_start_time_ms = 0.0;
3641 #if _MARKING_STATS_
3642 _local_pushes = 0;
3643 _local_pops = 0;
3644 _local_max_size = 0;
3645 _objs_scanned = 0;
3646 _global_pushes = 0;
3647 _global_pops = 0;
3648 _global_max_size = 0;
3649 _global_transfers_to = 0;
3650 _global_transfers_from = 0;
3651 _regions_claimed = 0;
3652 _objs_found_on_bitmap = 0;
3653 _satb_buffers_processed = 0;
3654 _steal_attempts = 0;
3655 _steals = 0;
3656 _aborted = 0;
3657 _aborted_overflow = 0;
3658 _aborted_cm_aborted = 0;
3659 _aborted_yield = 0;
3660 _aborted_timed_out = 0;
3661 _aborted_satb = 0;
3662 _aborted_termination = 0;
3663 #endif // _MARKING_STATS_
3664 }
3666 bool CMTask::should_exit_termination() {
3667 regular_clock_call();
3668 // This is called when we are in the termination protocol. We should
3669 // quit if, for some reason, this task wants to abort or the global
3670 // stack is not empty (this means that we can get work from it).
3671 return !_cm->mark_stack_empty() || has_aborted();
3672 }
3674 void CMTask::reached_limit() {
3675 assert(_words_scanned >= _words_scanned_limit ||
3676 _refs_reached >= _refs_reached_limit ,
3677 "shouldn't have been called otherwise");
3678 regular_clock_call();
3679 }
3681 void CMTask::regular_clock_call() {
3682 if (has_aborted()) return;
3684 // First, we need to recalculate the words scanned and refs reached
3685 // limits for the next clock call.
3686 recalculate_limits();
3688 // During the regular clock call we do the following
3690 // (1) If an overflow has been flagged, then we abort.
3691 if (_cm->has_overflown()) {
3692 set_has_aborted();
3693 return;
3694 }
3696 // If we are not concurrent (i.e. we're doing remark) we don't need
3697 // to check anything else. The other steps are only needed during
3698 // the concurrent marking phase.
3699 if (!concurrent()) return;
3701 // (2) If marking has been aborted for Full GC, then we also abort.
3702 if (_cm->has_aborted()) {
3703 set_has_aborted();
3704 statsOnly( ++_aborted_cm_aborted );
3705 return;
3706 }
3708 double curr_time_ms = os::elapsedVTime() * 1000.0;
3710 // (3) If marking stats are enabled, then we update the step history.
3711 #if _MARKING_STATS_
3712 if (_words_scanned >= _words_scanned_limit) {
3713 ++_clock_due_to_scanning;
3714 }
3715 if (_refs_reached >= _refs_reached_limit) {
3716 ++_clock_due_to_marking;
3717 }
3719 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3720 _interval_start_time_ms = curr_time_ms;
3721 _all_clock_intervals_ms.add(last_interval_ms);
3723 if (_cm->verbose_medium()) {
3724 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3725 "scanned = %d%s, refs reached = %d%s",
3726 _worker_id, last_interval_ms,
3727 _words_scanned,
3728 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3729 _refs_reached,
3730 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3731 }
3732 #endif // _MARKING_STATS_
3734 // (4) We check whether we should yield. If we have to, then we abort.
3735 if (SuspendibleThreadSet::should_yield()) {
3736 // We should yield. To do this we abort the task. The caller is
3737 // responsible for yielding.
3738 set_has_aborted();
3739 statsOnly( ++_aborted_yield );
3740 return;
3741 }
3743 // (5) We check whether we've reached our time quota. If we have,
3744 // then we abort.
3745 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3746 if (elapsed_time_ms > _time_target_ms) {
3747 set_has_aborted();
3748 _has_timed_out = true;
3749 statsOnly( ++_aborted_timed_out );
3750 return;
3751 }
3753 // (6) Finally, we check whether there are enough completed STAB
3754 // buffers available for processing. If there are, we abort.
3755 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3756 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3757 if (_cm->verbose_low()) {
3758 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3759 _worker_id);
3760 }
3761 // we do need to process SATB buffers, we'll abort and restart
3762 // the marking task to do so
3763 set_has_aborted();
3764 statsOnly( ++_aborted_satb );
3765 return;
3766 }
3767 }
3769 void CMTask::recalculate_limits() {
3770 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3771 _words_scanned_limit = _real_words_scanned_limit;
3773 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3774 _refs_reached_limit = _real_refs_reached_limit;
3775 }
3777 void CMTask::decrease_limits() {
3778 // This is called when we believe that we're going to do an infrequent
3779 // operation which will increase the per byte scanned cost (i.e. move
3780 // entries to/from the global stack). It basically tries to decrease the
3781 // scanning limit so that the clock is called earlier.
3783 if (_cm->verbose_medium()) {
3784 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3785 }
3787 _words_scanned_limit = _real_words_scanned_limit -
3788 3 * words_scanned_period / 4;
3789 _refs_reached_limit = _real_refs_reached_limit -
3790 3 * refs_reached_period / 4;
3791 }
3793 void CMTask::move_entries_to_global_stack() {
3794 // local array where we'll store the entries that will be popped
3795 // from the local queue
3796 oop buffer[global_stack_transfer_size];
3798 int n = 0;
3799 oop obj;
3800 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3801 buffer[n] = obj;
3802 ++n;
3803 }
3805 if (n > 0) {
3806 // we popped at least one entry from the local queue
3808 statsOnly( ++_global_transfers_to; _local_pops += n );
3810 if (!_cm->mark_stack_push(buffer, n)) {
3811 if (_cm->verbose_low()) {
3812 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3813 _worker_id);
3814 }
3815 set_has_aborted();
3816 } else {
3817 // the transfer was successful
3819 if (_cm->verbose_medium()) {
3820 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3821 _worker_id, n);
3822 }
3823 statsOnly( int tmp_size = _cm->mark_stack_size();
3824 if (tmp_size > _global_max_size) {
3825 _global_max_size = tmp_size;
3826 }
3827 _global_pushes += n );
3828 }
3829 }
3831 // this operation was quite expensive, so decrease the limits
3832 decrease_limits();
3833 }
3835 void CMTask::get_entries_from_global_stack() {
3836 // local array where we'll store the entries that will be popped
3837 // from the global stack.
3838 oop buffer[global_stack_transfer_size];
3839 int n;
3840 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3841 assert(n <= global_stack_transfer_size,
3842 "we should not pop more than the given limit");
3843 if (n > 0) {
3844 // yes, we did actually pop at least one entry
3846 statsOnly( ++_global_transfers_from; _global_pops += n );
3847 if (_cm->verbose_medium()) {
3848 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3849 _worker_id, n);
3850 }
3851 for (int i = 0; i < n; ++i) {
3852 bool success = _task_queue->push(buffer[i]);
3853 // We only call this when the local queue is empty or under a
3854 // given target limit. So, we do not expect this push to fail.
3855 assert(success, "invariant");
3856 }
3858 statsOnly( int tmp_size = _task_queue->size();
3859 if (tmp_size > _local_max_size) {
3860 _local_max_size = tmp_size;
3861 }
3862 _local_pushes += n );
3863 }
3865 // this operation was quite expensive, so decrease the limits
3866 decrease_limits();
3867 }
3869 void CMTask::drain_local_queue(bool partially) {
3870 if (has_aborted()) return;
3872 // Decide what the target size is, depending whether we're going to
3873 // drain it partially (so that other tasks can steal if they run out
3874 // of things to do) or totally (at the very end).
3875 size_t target_size;
3876 if (partially) {
3877 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3878 } else {
3879 target_size = 0;
3880 }
3882 if (_task_queue->size() > target_size) {
3883 if (_cm->verbose_high()) {
3884 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3885 _worker_id, target_size);
3886 }
3888 oop obj;
3889 bool ret = _task_queue->pop_local(obj);
3890 while (ret) {
3891 statsOnly( ++_local_pops );
3893 if (_cm->verbose_high()) {
3894 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3895 p2i((void*) obj));
3896 }
3898 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3899 assert(!_g1h->is_on_master_free_list(
3900 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3902 scan_object(obj);
3904 if (_task_queue->size() <= target_size || has_aborted()) {
3905 ret = false;
3906 } else {
3907 ret = _task_queue->pop_local(obj);
3908 }
3909 }
3911 if (_cm->verbose_high()) {
3912 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3913 _worker_id, _task_queue->size());
3914 }
3915 }
3916 }
3918 void CMTask::drain_global_stack(bool partially) {
3919 if (has_aborted()) return;
3921 // We have a policy to drain the local queue before we attempt to
3922 // drain the global stack.
3923 assert(partially || _task_queue->size() == 0, "invariant");
3925 // Decide what the target size is, depending whether we're going to
3926 // drain it partially (so that other tasks can steal if they run out
3927 // of things to do) or totally (at the very end). Notice that,
3928 // because we move entries from the global stack in chunks or
3929 // because another task might be doing the same, we might in fact
3930 // drop below the target. But, this is not a problem.
3931 size_t target_size;
3932 if (partially) {
3933 target_size = _cm->partial_mark_stack_size_target();
3934 } else {
3935 target_size = 0;
3936 }
3938 if (_cm->mark_stack_size() > target_size) {
3939 if (_cm->verbose_low()) {
3940 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3941 _worker_id, target_size);
3942 }
3944 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3945 get_entries_from_global_stack();
3946 drain_local_queue(partially);
3947 }
3949 if (_cm->verbose_low()) {
3950 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3951 _worker_id, _cm->mark_stack_size());
3952 }
3953 }
3954 }
3956 // SATB Queue has several assumptions on whether to call the par or
3957 // non-par versions of the methods. this is why some of the code is
3958 // replicated. We should really get rid of the single-threaded version
3959 // of the code to simplify things.
3960 void CMTask::drain_satb_buffers() {
3961 if (has_aborted()) return;
3963 // We set this so that the regular clock knows that we're in the
3964 // middle of draining buffers and doesn't set the abort flag when it
3965 // notices that SATB buffers are available for draining. It'd be
3966 // very counter productive if it did that. :-)
3967 _draining_satb_buffers = true;
3969 CMObjectClosure oc(this);
3970 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3971 if (G1CollectedHeap::use_parallel_gc_threads()) {
3972 satb_mq_set.set_par_closure(_worker_id, &oc);
3973 } else {
3974 satb_mq_set.set_closure(&oc);
3975 }
3977 // This keeps claiming and applying the closure to completed buffers
3978 // until we run out of buffers or we need to abort.
3979 if (G1CollectedHeap::use_parallel_gc_threads()) {
3980 while (!has_aborted() &&
3981 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3982 if (_cm->verbose_medium()) {
3983 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3984 }
3985 statsOnly( ++_satb_buffers_processed );
3986 regular_clock_call();
3987 }
3988 } else {
3989 while (!has_aborted() &&
3990 satb_mq_set.apply_closure_to_completed_buffer()) {
3991 if (_cm->verbose_medium()) {
3992 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3993 }
3994 statsOnly( ++_satb_buffers_processed );
3995 regular_clock_call();
3996 }
3997 }
3999 _draining_satb_buffers = false;
4001 assert(has_aborted() ||
4002 concurrent() ||
4003 satb_mq_set.completed_buffers_num() == 0, "invariant");
4005 if (G1CollectedHeap::use_parallel_gc_threads()) {
4006 satb_mq_set.set_par_closure(_worker_id, NULL);
4007 } else {
4008 satb_mq_set.set_closure(NULL);
4009 }
4011 // again, this was a potentially expensive operation, decrease the
4012 // limits to get the regular clock call early
4013 decrease_limits();
4014 }
4016 void CMTask::print_stats() {
4017 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4018 _worker_id, _calls);
4019 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4020 _elapsed_time_ms, _termination_time_ms);
4021 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4022 _step_times_ms.num(), _step_times_ms.avg(),
4023 _step_times_ms.sd());
4024 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4025 _step_times_ms.maximum(), _step_times_ms.sum());
4027 #if _MARKING_STATS_
4028 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4029 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4030 _all_clock_intervals_ms.sd());
4031 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
4032 _all_clock_intervals_ms.maximum(),
4033 _all_clock_intervals_ms.sum());
4034 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
4035 _clock_due_to_scanning, _clock_due_to_marking);
4036 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
4037 _objs_scanned, _objs_found_on_bitmap);
4038 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
4039 _local_pushes, _local_pops, _local_max_size);
4040 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
4041 _global_pushes, _global_pops, _global_max_size);
4042 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
4043 _global_transfers_to,_global_transfers_from);
4044 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
4045 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
4046 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
4047 _steal_attempts, _steals);
4048 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
4049 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
4050 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4051 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
4052 _aborted_timed_out, _aborted_satb, _aborted_termination);
4053 #endif // _MARKING_STATS_
4054 }
4056 /*****************************************************************************
4058 The do_marking_step(time_target_ms, ...) method is the building
4059 block of the parallel marking framework. It can be called in parallel
4060 with other invocations of do_marking_step() on different tasks
4061 (but only one per task, obviously) and concurrently with the
4062 mutator threads, or during remark, hence it eliminates the need
4063 for two versions of the code. When called during remark, it will
4064 pick up from where the task left off during the concurrent marking
4065 phase. Interestingly, tasks are also claimable during evacuation
4066 pauses too, since do_marking_step() ensures that it aborts before
4067 it needs to yield.
4069 The data structures that it uses to do marking work are the
4070 following:
4072 (1) Marking Bitmap. If there are gray objects that appear only
4073 on the bitmap (this happens either when dealing with an overflow
4074 or when the initial marking phase has simply marked the roots
4075 and didn't push them on the stack), then tasks claim heap
4076 regions whose bitmap they then scan to find gray objects. A
4077 global finger indicates where the end of the last claimed region
4078 is. A local finger indicates how far into the region a task has
4079 scanned. The two fingers are used to determine how to gray an
4080 object (i.e. whether simply marking it is OK, as it will be
4081 visited by a task in the future, or whether it needs to be also
4082 pushed on a stack).
4084 (2) Local Queue. The local queue of the task which is accessed
4085 reasonably efficiently by the task. Other tasks can steal from
4086 it when they run out of work. Throughout the marking phase, a
4087 task attempts to keep its local queue short but not totally
4088 empty, so that entries are available for stealing by other
4089 tasks. Only when there is no more work, a task will totally
4090 drain its local queue.
4092 (3) Global Mark Stack. This handles local queue overflow. During
4093 marking only sets of entries are moved between it and the local
4094 queues, as access to it requires a mutex and more fine-grain
4095 interaction with it which might cause contention. If it
4096 overflows, then the marking phase should restart and iterate
4097 over the bitmap to identify gray objects. Throughout the marking
4098 phase, tasks attempt to keep the global mark stack at a small
4099 length but not totally empty, so that entries are available for
4100 popping by other tasks. Only when there is no more work, tasks
4101 will totally drain the global mark stack.
4103 (4) SATB Buffer Queue. This is where completed SATB buffers are
4104 made available. Buffers are regularly removed from this queue
4105 and scanned for roots, so that the queue doesn't get too
4106 long. During remark, all completed buffers are processed, as
4107 well as the filled in parts of any uncompleted buffers.
4109 The do_marking_step() method tries to abort when the time target
4110 has been reached. There are a few other cases when the
4111 do_marking_step() method also aborts:
4113 (1) When the marking phase has been aborted (after a Full GC).
4115 (2) When a global overflow (on the global stack) has been
4116 triggered. Before the task aborts, it will actually sync up with
4117 the other tasks to ensure that all the marking data structures
4118 (local queues, stacks, fingers etc.) are re-initialized so that
4119 when do_marking_step() completes, the marking phase can
4120 immediately restart.
4122 (3) When enough completed SATB buffers are available. The
4123 do_marking_step() method only tries to drain SATB buffers right
4124 at the beginning. So, if enough buffers are available, the
4125 marking step aborts and the SATB buffers are processed at
4126 the beginning of the next invocation.
4128 (4) To yield. when we have to yield then we abort and yield
4129 right at the end of do_marking_step(). This saves us from a lot
4130 of hassle as, by yielding we might allow a Full GC. If this
4131 happens then objects will be compacted underneath our feet, the
4132 heap might shrink, etc. We save checking for this by just
4133 aborting and doing the yield right at the end.
4135 From the above it follows that the do_marking_step() method should
4136 be called in a loop (or, otherwise, regularly) until it completes.
4138 If a marking step completes without its has_aborted() flag being
4139 true, it means it has completed the current marking phase (and
4140 also all other marking tasks have done so and have all synced up).
4142 A method called regular_clock_call() is invoked "regularly" (in
4143 sub ms intervals) throughout marking. It is this clock method that
4144 checks all the abort conditions which were mentioned above and
4145 decides when the task should abort. A work-based scheme is used to
4146 trigger this clock method: when the number of object words the
4147 marking phase has scanned or the number of references the marking
4148 phase has visited reach a given limit. Additional invocations to
4149 the method clock have been planted in a few other strategic places
4150 too. The initial reason for the clock method was to avoid calling
4151 vtime too regularly, as it is quite expensive. So, once it was in
4152 place, it was natural to piggy-back all the other conditions on it
4153 too and not constantly check them throughout the code.
4155 If do_termination is true then do_marking_step will enter its
4156 termination protocol.
4158 The value of is_serial must be true when do_marking_step is being
4159 called serially (i.e. by the VMThread) and do_marking_step should
4160 skip any synchronization in the termination and overflow code.
4161 Examples include the serial remark code and the serial reference
4162 processing closures.
4164 The value of is_serial must be false when do_marking_step is
4165 being called by any of the worker threads in a work gang.
4166 Examples include the concurrent marking code (CMMarkingTask),
4167 the MT remark code, and the MT reference processing closures.
4169 *****************************************************************************/
4171 void CMTask::do_marking_step(double time_target_ms,
4172 bool do_termination,
4173 bool is_serial) {
4174 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4175 assert(concurrent() == _cm->concurrent(), "they should be the same");
4177 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4178 assert(_task_queues != NULL, "invariant");
4179 assert(_task_queue != NULL, "invariant");
4180 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4182 assert(!_claimed,
4183 "only one thread should claim this task at any one time");
4185 // OK, this doesn't safeguard again all possible scenarios, as it is
4186 // possible for two threads to set the _claimed flag at the same
4187 // time. But it is only for debugging purposes anyway and it will
4188 // catch most problems.
4189 _claimed = true;
4191 _start_time_ms = os::elapsedVTime() * 1000.0;
4192 statsOnly( _interval_start_time_ms = _start_time_ms );
4194 // If do_stealing is true then do_marking_step will attempt to
4195 // steal work from the other CMTasks. It only makes sense to
4196 // enable stealing when the termination protocol is enabled
4197 // and do_marking_step() is not being called serially.
4198 bool do_stealing = do_termination && !is_serial;
4200 double diff_prediction_ms =
4201 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4202 _time_target_ms = time_target_ms - diff_prediction_ms;
4204 // set up the variables that are used in the work-based scheme to
4205 // call the regular clock method
4206 _words_scanned = 0;
4207 _refs_reached = 0;
4208 recalculate_limits();
4210 // clear all flags
4211 clear_has_aborted();
4212 _has_timed_out = false;
4213 _draining_satb_buffers = false;
4215 ++_calls;
4217 if (_cm->verbose_low()) {
4218 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4219 "target = %1.2lfms >>>>>>>>>>",
4220 _worker_id, _calls, _time_target_ms);
4221 }
4223 // Set up the bitmap and oop closures. Anything that uses them is
4224 // eventually called from this method, so it is OK to allocate these
4225 // statically.
4226 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4227 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4228 set_cm_oop_closure(&cm_oop_closure);
4230 if (_cm->has_overflown()) {
4231 // This can happen if the mark stack overflows during a GC pause
4232 // and this task, after a yield point, restarts. We have to abort
4233 // as we need to get into the overflow protocol which happens
4234 // right at the end of this task.
4235 set_has_aborted();
4236 }
4238 // First drain any available SATB buffers. After this, we will not
4239 // look at SATB buffers before the next invocation of this method.
4240 // If enough completed SATB buffers are queued up, the regular clock
4241 // will abort this task so that it restarts.
4242 drain_satb_buffers();
4243 // ...then partially drain the local queue and the global stack
4244 drain_local_queue(true);
4245 drain_global_stack(true);
4247 do {
4248 if (!has_aborted() && _curr_region != NULL) {
4249 // This means that we're already holding on to a region.
4250 assert(_finger != NULL, "if region is not NULL, then the finger "
4251 "should not be NULL either");
4253 // We might have restarted this task after an evacuation pause
4254 // which might have evacuated the region we're holding on to
4255 // underneath our feet. Let's read its limit again to make sure
4256 // that we do not iterate over a region of the heap that
4257 // contains garbage (update_region_limit() will also move
4258 // _finger to the start of the region if it is found empty).
4259 update_region_limit();
4260 // We will start from _finger not from the start of the region,
4261 // as we might be restarting this task after aborting half-way
4262 // through scanning this region. In this case, _finger points to
4263 // the address where we last found a marked object. If this is a
4264 // fresh region, _finger points to start().
4265 MemRegion mr = MemRegion(_finger, _region_limit);
4267 if (_cm->verbose_low()) {
4268 gclog_or_tty->print_cr("[%u] we're scanning part "
4269 "["PTR_FORMAT", "PTR_FORMAT") "
4270 "of region "HR_FORMAT,
4271 _worker_id, p2i(_finger), p2i(_region_limit),
4272 HR_FORMAT_PARAMS(_curr_region));
4273 }
4275 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4276 "humongous regions should go around loop once only");
4278 // Some special cases:
4279 // If the memory region is empty, we can just give up the region.
4280 // If the current region is humongous then we only need to check
4281 // the bitmap for the bit associated with the start of the object,
4282 // scan the object if it's live, and give up the region.
4283 // Otherwise, let's iterate over the bitmap of the part of the region
4284 // that is left.
4285 // If the iteration is successful, give up the region.
4286 if (mr.is_empty()) {
4287 giveup_current_region();
4288 regular_clock_call();
4289 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4290 if (_nextMarkBitMap->isMarked(mr.start())) {
4291 // The object is marked - apply the closure
4292 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4293 bitmap_closure.do_bit(offset);
4294 }
4295 // Even if this task aborted while scanning the humongous object
4296 // we can (and should) give up the current region.
4297 giveup_current_region();
4298 regular_clock_call();
4299 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4300 giveup_current_region();
4301 regular_clock_call();
4302 } else {
4303 assert(has_aborted(), "currently the only way to do so");
4304 // The only way to abort the bitmap iteration is to return
4305 // false from the do_bit() method. However, inside the
4306 // do_bit() method we move the _finger to point to the
4307 // object currently being looked at. So, if we bail out, we
4308 // have definitely set _finger to something non-null.
4309 assert(_finger != NULL, "invariant");
4311 // Region iteration was actually aborted. So now _finger
4312 // points to the address of the object we last scanned. If we
4313 // leave it there, when we restart this task, we will rescan
4314 // the object. It is easy to avoid this. We move the finger by
4315 // enough to point to the next possible object header (the
4316 // bitmap knows by how much we need to move it as it knows its
4317 // granularity).
4318 assert(_finger < _region_limit, "invariant");
4319 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4320 // Check if bitmap iteration was aborted while scanning the last object
4321 if (new_finger >= _region_limit) {
4322 giveup_current_region();
4323 } else {
4324 move_finger_to(new_finger);
4325 }
4326 }
4327 }
4328 // At this point we have either completed iterating over the
4329 // region we were holding on to, or we have aborted.
4331 // We then partially drain the local queue and the global stack.
4332 // (Do we really need this?)
4333 drain_local_queue(true);
4334 drain_global_stack(true);
4336 // Read the note on the claim_region() method on why it might
4337 // return NULL with potentially more regions available for
4338 // claiming and why we have to check out_of_regions() to determine
4339 // whether we're done or not.
4340 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4341 // We are going to try to claim a new region. We should have
4342 // given up on the previous one.
4343 // Separated the asserts so that we know which one fires.
4344 assert(_curr_region == NULL, "invariant");
4345 assert(_finger == NULL, "invariant");
4346 assert(_region_limit == NULL, "invariant");
4347 if (_cm->verbose_low()) {
4348 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4349 }
4350 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4351 if (claimed_region != NULL) {
4352 // Yes, we managed to claim one
4353 statsOnly( ++_regions_claimed );
4355 if (_cm->verbose_low()) {
4356 gclog_or_tty->print_cr("[%u] we successfully claimed "
4357 "region "PTR_FORMAT,
4358 _worker_id, p2i(claimed_region));
4359 }
4361 setup_for_region(claimed_region);
4362 assert(_curr_region == claimed_region, "invariant");
4363 }
4364 // It is important to call the regular clock here. It might take
4365 // a while to claim a region if, for example, we hit a large
4366 // block of empty regions. So we need to call the regular clock
4367 // method once round the loop to make sure it's called
4368 // frequently enough.
4369 regular_clock_call();
4370 }
4372 if (!has_aborted() && _curr_region == NULL) {
4373 assert(_cm->out_of_regions(),
4374 "at this point we should be out of regions");
4375 }
4376 } while ( _curr_region != NULL && !has_aborted());
4378 if (!has_aborted()) {
4379 // We cannot check whether the global stack is empty, since other
4380 // tasks might be pushing objects to it concurrently.
4381 assert(_cm->out_of_regions(),
4382 "at this point we should be out of regions");
4384 if (_cm->verbose_low()) {
4385 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4386 }
4388 // Try to reduce the number of available SATB buffers so that
4389 // remark has less work to do.
4390 drain_satb_buffers();
4391 }
4393 // Since we've done everything else, we can now totally drain the
4394 // local queue and global stack.
4395 drain_local_queue(false);
4396 drain_global_stack(false);
4398 // Attempt at work stealing from other task's queues.
4399 if (do_stealing && !has_aborted()) {
4400 // We have not aborted. This means that we have finished all that
4401 // we could. Let's try to do some stealing...
4403 // We cannot check whether the global stack is empty, since other
4404 // tasks might be pushing objects to it concurrently.
4405 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4406 "only way to reach here");
4408 if (_cm->verbose_low()) {
4409 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4410 }
4412 while (!has_aborted()) {
4413 oop obj;
4414 statsOnly( ++_steal_attempts );
4416 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4417 if (_cm->verbose_medium()) {
4418 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4419 _worker_id, p2i((void*) obj));
4420 }
4422 statsOnly( ++_steals );
4424 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4425 "any stolen object should be marked");
4426 scan_object(obj);
4428 // And since we're towards the end, let's totally drain the
4429 // local queue and global stack.
4430 drain_local_queue(false);
4431 drain_global_stack(false);
4432 } else {
4433 break;
4434 }
4435 }
4436 }
4438 // If we are about to wrap up and go into termination, check if we
4439 // should raise the overflow flag.
4440 if (do_termination && !has_aborted()) {
4441 if (_cm->force_overflow()->should_force()) {
4442 _cm->set_has_overflown();
4443 regular_clock_call();
4444 }
4445 }
4447 // We still haven't aborted. Now, let's try to get into the
4448 // termination protocol.
4449 if (do_termination && !has_aborted()) {
4450 // We cannot check whether the global stack is empty, since other
4451 // tasks might be concurrently pushing objects on it.
4452 // Separated the asserts so that we know which one fires.
4453 assert(_cm->out_of_regions(), "only way to reach here");
4454 assert(_task_queue->size() == 0, "only way to reach here");
4456 if (_cm->verbose_low()) {
4457 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4458 }
4460 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4462 // The CMTask class also extends the TerminatorTerminator class,
4463 // hence its should_exit_termination() method will also decide
4464 // whether to exit the termination protocol or not.
4465 bool finished = (is_serial ||
4466 _cm->terminator()->offer_termination(this));
4467 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4468 _termination_time_ms +=
4469 termination_end_time_ms - _termination_start_time_ms;
4471 if (finished) {
4472 // We're all done.
4474 if (_worker_id == 0) {
4475 // let's allow task 0 to do this
4476 if (concurrent()) {
4477 assert(_cm->concurrent_marking_in_progress(), "invariant");
4478 // we need to set this to false before the next
4479 // safepoint. This way we ensure that the marking phase
4480 // doesn't observe any more heap expansions.
4481 _cm->clear_concurrent_marking_in_progress();
4482 }
4483 }
4485 // We can now guarantee that the global stack is empty, since
4486 // all other tasks have finished. We separated the guarantees so
4487 // that, if a condition is false, we can immediately find out
4488 // which one.
4489 guarantee(_cm->out_of_regions(), "only way to reach here");
4490 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4491 guarantee(_task_queue->size() == 0, "only way to reach here");
4492 guarantee(!_cm->has_overflown(), "only way to reach here");
4493 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4495 if (_cm->verbose_low()) {
4496 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4497 }
4498 } else {
4499 // Apparently there's more work to do. Let's abort this task. It
4500 // will restart it and we can hopefully find more things to do.
4502 if (_cm->verbose_low()) {
4503 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4504 _worker_id);
4505 }
4507 set_has_aborted();
4508 statsOnly( ++_aborted_termination );
4509 }
4510 }
4512 // Mainly for debugging purposes to make sure that a pointer to the
4513 // closure which was statically allocated in this frame doesn't
4514 // escape it by accident.
4515 set_cm_oop_closure(NULL);
4516 double end_time_ms = os::elapsedVTime() * 1000.0;
4517 double elapsed_time_ms = end_time_ms - _start_time_ms;
4518 // Update the step history.
4519 _step_times_ms.add(elapsed_time_ms);
4521 if (has_aborted()) {
4522 // The task was aborted for some reason.
4524 statsOnly( ++_aborted );
4526 if (_has_timed_out) {
4527 double diff_ms = elapsed_time_ms - _time_target_ms;
4528 // Keep statistics of how well we did with respect to hitting
4529 // our target only if we actually timed out (if we aborted for
4530 // other reasons, then the results might get skewed).
4531 _marking_step_diffs_ms.add(diff_ms);
4532 }
4534 if (_cm->has_overflown()) {
4535 // This is the interesting one. We aborted because a global
4536 // overflow was raised. This means we have to restart the
4537 // marking phase and start iterating over regions. However, in
4538 // order to do this we have to make sure that all tasks stop
4539 // what they are doing and re-initialise in a safe manner. We
4540 // will achieve this with the use of two barrier sync points.
4542 if (_cm->verbose_low()) {
4543 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4544 }
4546 if (!is_serial) {
4547 // We only need to enter the sync barrier if being called
4548 // from a parallel context
4549 _cm->enter_first_sync_barrier(_worker_id);
4551 // When we exit this sync barrier we know that all tasks have
4552 // stopped doing marking work. So, it's now safe to
4553 // re-initialise our data structures. At the end of this method,
4554 // task 0 will clear the global data structures.
4555 }
4557 statsOnly( ++_aborted_overflow );
4559 // We clear the local state of this task...
4560 clear_region_fields();
4562 if (!is_serial) {
4563 // ...and enter the second barrier.
4564 _cm->enter_second_sync_barrier(_worker_id);
4565 }
4566 // At this point, if we're during the concurrent phase of
4567 // marking, everything has been re-initialized and we're
4568 // ready to restart.
4569 }
4571 if (_cm->verbose_low()) {
4572 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4573 "elapsed = %1.2lfms <<<<<<<<<<",
4574 _worker_id, _time_target_ms, elapsed_time_ms);
4575 if (_cm->has_aborted()) {
4576 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4577 _worker_id);
4578 }
4579 }
4580 } else {
4581 if (_cm->verbose_low()) {
4582 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4583 "elapsed = %1.2lfms <<<<<<<<<<",
4584 _worker_id, _time_target_ms, elapsed_time_ms);
4585 }
4586 }
4588 _claimed = false;
4589 }
4591 CMTask::CMTask(uint worker_id,
4592 ConcurrentMark* cm,
4593 size_t* marked_bytes,
4594 BitMap* card_bm,
4595 CMTaskQueue* task_queue,
4596 CMTaskQueueSet* task_queues)
4597 : _g1h(G1CollectedHeap::heap()),
4598 _worker_id(worker_id), _cm(cm),
4599 _claimed(false),
4600 _nextMarkBitMap(NULL), _hash_seed(17),
4601 _task_queue(task_queue),
4602 _task_queues(task_queues),
4603 _cm_oop_closure(NULL),
4604 _marked_bytes_array(marked_bytes),
4605 _card_bm(card_bm) {
4606 guarantee(task_queue != NULL, "invariant");
4607 guarantee(task_queues != NULL, "invariant");
4609 statsOnly( _clock_due_to_scanning = 0;
4610 _clock_due_to_marking = 0 );
4612 _marking_step_diffs_ms.add(0.5);
4613 }
4615 // These are formatting macros that are used below to ensure
4616 // consistent formatting. The *_H_* versions are used to format the
4617 // header for a particular value and they should be kept consistent
4618 // with the corresponding macro. Also note that most of the macros add
4619 // the necessary white space (as a prefix) which makes them a bit
4620 // easier to compose.
4622 // All the output lines are prefixed with this string to be able to
4623 // identify them easily in a large log file.
4624 #define G1PPRL_LINE_PREFIX "###"
4626 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4627 #ifdef _LP64
4628 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4629 #else // _LP64
4630 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4631 #endif // _LP64
4633 // For per-region info
4634 #define G1PPRL_TYPE_FORMAT " %-4s"
4635 #define G1PPRL_TYPE_H_FORMAT " %4s"
4636 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4637 #define G1PPRL_BYTE_H_FORMAT " %9s"
4638 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4639 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4641 // For summary info
4642 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4643 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4644 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4645 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4647 G1PrintRegionLivenessInfoClosure::
4648 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4649 : _out(out),
4650 _total_used_bytes(0), _total_capacity_bytes(0),
4651 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4652 _hum_used_bytes(0), _hum_capacity_bytes(0),
4653 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4654 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4655 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4656 MemRegion g1_committed = g1h->g1_committed();
4657 MemRegion g1_reserved = g1h->g1_reserved();
4658 double now = os::elapsedTime();
4660 // Print the header of the output.
4661 _out->cr();
4662 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4663 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4664 G1PPRL_SUM_ADDR_FORMAT("committed")
4665 G1PPRL_SUM_ADDR_FORMAT("reserved")
4666 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4667 p2i(g1_committed.start()), p2i(g1_committed.end()),
4668 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4669 HeapRegion::GrainBytes);
4670 _out->print_cr(G1PPRL_LINE_PREFIX);
4671 _out->print_cr(G1PPRL_LINE_PREFIX
4672 G1PPRL_TYPE_H_FORMAT
4673 G1PPRL_ADDR_BASE_H_FORMAT
4674 G1PPRL_BYTE_H_FORMAT
4675 G1PPRL_BYTE_H_FORMAT
4676 G1PPRL_BYTE_H_FORMAT
4677 G1PPRL_DOUBLE_H_FORMAT
4678 G1PPRL_BYTE_H_FORMAT
4679 G1PPRL_BYTE_H_FORMAT,
4680 "type", "address-range",
4681 "used", "prev-live", "next-live", "gc-eff",
4682 "remset", "code-roots");
4683 _out->print_cr(G1PPRL_LINE_PREFIX
4684 G1PPRL_TYPE_H_FORMAT
4685 G1PPRL_ADDR_BASE_H_FORMAT
4686 G1PPRL_BYTE_H_FORMAT
4687 G1PPRL_BYTE_H_FORMAT
4688 G1PPRL_BYTE_H_FORMAT
4689 G1PPRL_DOUBLE_H_FORMAT
4690 G1PPRL_BYTE_H_FORMAT
4691 G1PPRL_BYTE_H_FORMAT,
4692 "", "",
4693 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4694 "(bytes)", "(bytes)");
4695 }
4697 // It takes as a parameter a reference to one of the _hum_* fields, it
4698 // deduces the corresponding value for a region in a humongous region
4699 // series (either the region size, or what's left if the _hum_* field
4700 // is < the region size), and updates the _hum_* field accordingly.
4701 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4702 size_t bytes = 0;
4703 // The > 0 check is to deal with the prev and next live bytes which
4704 // could be 0.
4705 if (*hum_bytes > 0) {
4706 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4707 *hum_bytes -= bytes;
4708 }
4709 return bytes;
4710 }
4712 // It deduces the values for a region in a humongous region series
4713 // from the _hum_* fields and updates those accordingly. It assumes
4714 // that that _hum_* fields have already been set up from the "starts
4715 // humongous" region and we visit the regions in address order.
4716 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4717 size_t* capacity_bytes,
4718 size_t* prev_live_bytes,
4719 size_t* next_live_bytes) {
4720 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4721 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4722 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4723 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4724 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4725 }
4727 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4728 const char* type = "";
4729 HeapWord* bottom = r->bottom();
4730 HeapWord* end = r->end();
4731 size_t capacity_bytes = r->capacity();
4732 size_t used_bytes = r->used();
4733 size_t prev_live_bytes = r->live_bytes();
4734 size_t next_live_bytes = r->next_live_bytes();
4735 double gc_eff = r->gc_efficiency();
4736 size_t remset_bytes = r->rem_set()->mem_size();
4737 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4739 if (r->used() == 0) {
4740 type = "FREE";
4741 } else if (r->is_survivor()) {
4742 type = "SURV";
4743 } else if (r->is_young()) {
4744 type = "EDEN";
4745 } else if (r->startsHumongous()) {
4746 type = "HUMS";
4748 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4749 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4750 "they should have been zeroed after the last time we used them");
4751 // Set up the _hum_* fields.
4752 _hum_capacity_bytes = capacity_bytes;
4753 _hum_used_bytes = used_bytes;
4754 _hum_prev_live_bytes = prev_live_bytes;
4755 _hum_next_live_bytes = next_live_bytes;
4756 get_hum_bytes(&used_bytes, &capacity_bytes,
4757 &prev_live_bytes, &next_live_bytes);
4758 end = bottom + HeapRegion::GrainWords;
4759 } else if (r->continuesHumongous()) {
4760 type = "HUMC";
4761 get_hum_bytes(&used_bytes, &capacity_bytes,
4762 &prev_live_bytes, &next_live_bytes);
4763 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4764 } else {
4765 type = "OLD";
4766 }
4768 _total_used_bytes += used_bytes;
4769 _total_capacity_bytes += capacity_bytes;
4770 _total_prev_live_bytes += prev_live_bytes;
4771 _total_next_live_bytes += next_live_bytes;
4772 _total_remset_bytes += remset_bytes;
4773 _total_strong_code_roots_bytes += strong_code_roots_bytes;
4775 // Print a line for this particular region.
4776 _out->print_cr(G1PPRL_LINE_PREFIX
4777 G1PPRL_TYPE_FORMAT
4778 G1PPRL_ADDR_BASE_FORMAT
4779 G1PPRL_BYTE_FORMAT
4780 G1PPRL_BYTE_FORMAT
4781 G1PPRL_BYTE_FORMAT
4782 G1PPRL_DOUBLE_FORMAT
4783 G1PPRL_BYTE_FORMAT
4784 G1PPRL_BYTE_FORMAT,
4785 type, p2i(bottom), p2i(end),
4786 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4787 remset_bytes, strong_code_roots_bytes);
4789 return false;
4790 }
4792 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4793 // add static memory usages to remembered set sizes
4794 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4795 // Print the footer of the output.
4796 _out->print_cr(G1PPRL_LINE_PREFIX);
4797 _out->print_cr(G1PPRL_LINE_PREFIX
4798 " SUMMARY"
4799 G1PPRL_SUM_MB_FORMAT("capacity")
4800 G1PPRL_SUM_MB_PERC_FORMAT("used")
4801 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4802 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4803 G1PPRL_SUM_MB_FORMAT("remset")
4804 G1PPRL_SUM_MB_FORMAT("code-roots"),
4805 bytes_to_mb(_total_capacity_bytes),
4806 bytes_to_mb(_total_used_bytes),
4807 perc(_total_used_bytes, _total_capacity_bytes),
4808 bytes_to_mb(_total_prev_live_bytes),
4809 perc(_total_prev_live_bytes, _total_capacity_bytes),
4810 bytes_to_mb(_total_next_live_bytes),
4811 perc(_total_next_live_bytes, _total_capacity_bytes),
4812 bytes_to_mb(_total_remset_bytes),
4813 bytes_to_mb(_total_strong_code_roots_bytes));
4814 _out->cr();
4815 }