Sun, 21 Apr 2013 20:41:04 -0700
8012907: anti-delta fix for 8010992
Summary: anti-delta fix for 8010992 until 8012902 can be fixed
Reviewed-by: acorn, minqi, rdurbin
1 /*
2 * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1Log.hpp"
33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
34 #include "gc_implementation/g1/g1RemSet.hpp"
35 #include "gc_implementation/g1/heapRegion.inline.hpp"
36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
38 #include "gc_implementation/shared/vmGCOperations.hpp"
39 #include "memory/genOopClosures.inline.hpp"
40 #include "memory/referencePolicy.hpp"
41 #include "memory/resourceArea.hpp"
42 #include "oops/oop.inline.hpp"
43 #include "runtime/handles.inline.hpp"
44 #include "runtime/java.hpp"
45 #include "services/memTracker.hpp"
47 // Concurrent marking bit map wrapper
49 CMBitMapRO::CMBitMapRO(int shifter) :
50 _bm(),
51 _shifter(shifter) {
52 _bmStartWord = 0;
53 _bmWordSize = 0;
54 }
56 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
57 HeapWord* limit) const {
58 // First we must round addr *up* to a possible object boundary.
59 addr = (HeapWord*)align_size_up((intptr_t)addr,
60 HeapWordSize << _shifter);
61 size_t addrOffset = heapWordToOffset(addr);
62 if (limit == NULL) {
63 limit = _bmStartWord + _bmWordSize;
64 }
65 size_t limitOffset = heapWordToOffset(limit);
66 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
67 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
68 assert(nextAddr >= addr, "get_next_one postcondition");
69 assert(nextAddr == limit || isMarked(nextAddr),
70 "get_next_one postcondition");
71 return nextAddr;
72 }
74 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
75 HeapWord* limit) const {
76 size_t addrOffset = heapWordToOffset(addr);
77 if (limit == NULL) {
78 limit = _bmStartWord + _bmWordSize;
79 }
80 size_t limitOffset = heapWordToOffset(limit);
81 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
82 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
83 assert(nextAddr >= addr, "get_next_one postcondition");
84 assert(nextAddr == limit || !isMarked(nextAddr),
85 "get_next_one postcondition");
86 return nextAddr;
87 }
89 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
90 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
91 return (int) (diff >> _shifter);
92 }
94 #ifndef PRODUCT
95 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
96 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
97 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
98 "size inconsistency");
99 return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
100 _bmWordSize == heap_rs.size()>>LogHeapWordSize;
101 }
102 #endif
104 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
105 _bm.print_on_error(st, prefix);
106 }
108 bool CMBitMap::allocate(ReservedSpace heap_rs) {
109 _bmStartWord = (HeapWord*)(heap_rs.base());
110 _bmWordSize = heap_rs.size()/HeapWordSize; // heap_rs.size() is in bytes
111 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
112 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
113 if (!brs.is_reserved()) {
114 warning("ConcurrentMark marking bit map allocation failure");
115 return false;
116 }
117 MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
118 // For now we'll just commit all of the bit map up front.
119 // Later on we'll try to be more parsimonious with swap.
120 if (!_virtual_space.initialize(brs, brs.size())) {
121 warning("ConcurrentMark marking bit map backing store failure");
122 return false;
123 }
124 assert(_virtual_space.committed_size() == brs.size(),
125 "didn't reserve backing store for all of concurrent marking bit map?");
126 _bm.set_map((uintptr_t*)_virtual_space.low());
127 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
128 _bmWordSize, "inconsistency in bit map sizing");
129 _bm.set_size(_bmWordSize >> _shifter);
130 return true;
131 }
133 void CMBitMap::clearAll() {
134 _bm.clear();
135 return;
136 }
138 void CMBitMap::markRange(MemRegion mr) {
139 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
140 assert(!mr.is_empty(), "unexpected empty region");
141 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
142 ((HeapWord *) mr.end())),
143 "markRange memory region end is not card aligned");
144 // convert address range into offset range
145 _bm.at_put_range(heapWordToOffset(mr.start()),
146 heapWordToOffset(mr.end()), true);
147 }
149 void CMBitMap::clearRange(MemRegion mr) {
150 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
151 assert(!mr.is_empty(), "unexpected empty region");
152 // convert address range into offset range
153 _bm.at_put_range(heapWordToOffset(mr.start()),
154 heapWordToOffset(mr.end()), false);
155 }
157 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
158 HeapWord* end_addr) {
159 HeapWord* start = getNextMarkedWordAddress(addr);
160 start = MIN2(start, end_addr);
161 HeapWord* end = getNextUnmarkedWordAddress(start);
162 end = MIN2(end, end_addr);
163 assert(start <= end, "Consistency check");
164 MemRegion mr(start, end);
165 if (!mr.is_empty()) {
166 clearRange(mr);
167 }
168 return mr;
169 }
171 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
172 _base(NULL), _cm(cm)
173 #ifdef ASSERT
174 , _drain_in_progress(false)
175 , _drain_in_progress_yields(false)
176 #endif
177 {}
179 bool CMMarkStack::allocate(size_t capacity) {
180 // allocate a stack of the requisite depth
181 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
182 if (!rs.is_reserved()) {
183 warning("ConcurrentMark MarkStack allocation failure");
184 return false;
185 }
186 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
187 if (!_virtual_space.initialize(rs, rs.size())) {
188 warning("ConcurrentMark MarkStack backing store failure");
189 // Release the virtual memory reserved for the marking stack
190 rs.release();
191 return false;
192 }
193 assert(_virtual_space.committed_size() == rs.size(),
194 "Didn't reserve backing store for all of ConcurrentMark stack?");
195 _base = (oop*) _virtual_space.low();
196 setEmpty();
197 _capacity = (jint) capacity;
198 _saved_index = -1;
199 _should_expand = false;
200 NOT_PRODUCT(_max_depth = 0);
201 return true;
202 }
204 void CMMarkStack::expand() {
205 // Called, during remark, if we've overflown the marking stack during marking.
206 assert(isEmpty(), "stack should been emptied while handling overflow");
207 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
208 // Clear expansion flag
209 _should_expand = false;
210 if (_capacity == (jint) MarkStackSizeMax) {
211 if (PrintGCDetails && Verbose) {
212 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
213 }
214 return;
215 }
216 // Double capacity if possible
217 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
218 // Do not give up existing stack until we have managed to
219 // get the double capacity that we desired.
220 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
221 sizeof(oop)));
222 if (rs.is_reserved()) {
223 // Release the backing store associated with old stack
224 _virtual_space.release();
225 // Reinitialize virtual space for new stack
226 if (!_virtual_space.initialize(rs, rs.size())) {
227 fatal("Not enough swap for expanded marking stack capacity");
228 }
229 _base = (oop*)(_virtual_space.low());
230 _index = 0;
231 _capacity = new_capacity;
232 } else {
233 if (PrintGCDetails && Verbose) {
234 // Failed to double capacity, continue;
235 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
236 SIZE_FORMAT"K to " SIZE_FORMAT"K",
237 _capacity / K, new_capacity / K);
238 }
239 }
240 }
242 void CMMarkStack::set_should_expand() {
243 // If we're resetting the marking state because of an
244 // marking stack overflow, record that we should, if
245 // possible, expand the stack.
246 _should_expand = _cm->has_overflown();
247 }
249 CMMarkStack::~CMMarkStack() {
250 if (_base != NULL) {
251 _base = NULL;
252 _virtual_space.release();
253 }
254 }
256 void CMMarkStack::par_push(oop ptr) {
257 while (true) {
258 if (isFull()) {
259 _overflow = true;
260 return;
261 }
262 // Otherwise...
263 jint index = _index;
264 jint next_index = index+1;
265 jint res = Atomic::cmpxchg(next_index, &_index, index);
266 if (res == index) {
267 _base[index] = ptr;
268 // Note that we don't maintain this atomically. We could, but it
269 // doesn't seem necessary.
270 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
271 return;
272 }
273 // Otherwise, we need to try again.
274 }
275 }
277 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
278 while (true) {
279 if (isFull()) {
280 _overflow = true;
281 return;
282 }
283 // Otherwise...
284 jint index = _index;
285 jint next_index = index + n;
286 if (next_index > _capacity) {
287 _overflow = true;
288 return;
289 }
290 jint res = Atomic::cmpxchg(next_index, &_index, index);
291 if (res == index) {
292 for (int i = 0; i < n; i++) {
293 int ind = index + i;
294 assert(ind < _capacity, "By overflow test above.");
295 _base[ind] = ptr_arr[i];
296 }
297 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
298 return;
299 }
300 // Otherwise, we need to try again.
301 }
302 }
304 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
305 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
306 jint start = _index;
307 jint next_index = start + n;
308 if (next_index > _capacity) {
309 _overflow = true;
310 return;
311 }
312 // Otherwise.
313 _index = next_index;
314 for (int i = 0; i < n; i++) {
315 int ind = start + i;
316 assert(ind < _capacity, "By overflow test above.");
317 _base[ind] = ptr_arr[i];
318 }
319 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
320 }
322 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
323 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
324 jint index = _index;
325 if (index == 0) {
326 *n = 0;
327 return false;
328 } else {
329 int k = MIN2(max, index);
330 jint new_ind = index - k;
331 for (int j = 0; j < k; j++) {
332 ptr_arr[j] = _base[new_ind + j];
333 }
334 _index = new_ind;
335 *n = k;
336 return true;
337 }
338 }
340 template<class OopClosureClass>
341 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
342 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
343 || SafepointSynchronize::is_at_safepoint(),
344 "Drain recursion must be yield-safe.");
345 bool res = true;
346 debug_only(_drain_in_progress = true);
347 debug_only(_drain_in_progress_yields = yield_after);
348 while (!isEmpty()) {
349 oop newOop = pop();
350 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
351 assert(newOop->is_oop(), "Expected an oop");
352 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
353 "only grey objects on this stack");
354 newOop->oop_iterate(cl);
355 if (yield_after && _cm->do_yield_check()) {
356 res = false;
357 break;
358 }
359 }
360 debug_only(_drain_in_progress = false);
361 return res;
362 }
364 void CMMarkStack::note_start_of_gc() {
365 assert(_saved_index == -1,
366 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
367 _saved_index = _index;
368 }
370 void CMMarkStack::note_end_of_gc() {
371 // This is intentionally a guarantee, instead of an assert. If we
372 // accidentally add something to the mark stack during GC, it
373 // will be a correctness issue so it's better if we crash. we'll
374 // only check this once per GC anyway, so it won't be a performance
375 // issue in any way.
376 guarantee(_saved_index == _index,
377 err_msg("saved index: %d index: %d", _saved_index, _index));
378 _saved_index = -1;
379 }
381 void CMMarkStack::oops_do(OopClosure* f) {
382 assert(_saved_index == _index,
383 err_msg("saved index: %d index: %d", _saved_index, _index));
384 for (int i = 0; i < _index; i += 1) {
385 f->do_oop(&_base[i]);
386 }
387 }
389 bool ConcurrentMark::not_yet_marked(oop obj) const {
390 return _g1h->is_obj_ill(obj);
391 }
393 CMRootRegions::CMRootRegions() :
394 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
395 _should_abort(false), _next_survivor(NULL) { }
397 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
398 _young_list = g1h->young_list();
399 _cm = cm;
400 }
402 void CMRootRegions::prepare_for_scan() {
403 assert(!scan_in_progress(), "pre-condition");
405 // Currently, only survivors can be root regions.
406 assert(_next_survivor == NULL, "pre-condition");
407 _next_survivor = _young_list->first_survivor_region();
408 _scan_in_progress = (_next_survivor != NULL);
409 _should_abort = false;
410 }
412 HeapRegion* CMRootRegions::claim_next() {
413 if (_should_abort) {
414 // If someone has set the should_abort flag, we return NULL to
415 // force the caller to bail out of their loop.
416 return NULL;
417 }
419 // Currently, only survivors can be root regions.
420 HeapRegion* res = _next_survivor;
421 if (res != NULL) {
422 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
423 // Read it again in case it changed while we were waiting for the lock.
424 res = _next_survivor;
425 if (res != NULL) {
426 if (res == _young_list->last_survivor_region()) {
427 // We just claimed the last survivor so store NULL to indicate
428 // that we're done.
429 _next_survivor = NULL;
430 } else {
431 _next_survivor = res->get_next_young_region();
432 }
433 } else {
434 // Someone else claimed the last survivor while we were trying
435 // to take the lock so nothing else to do.
436 }
437 }
438 assert(res == NULL || res->is_survivor(), "post-condition");
440 return res;
441 }
443 void CMRootRegions::scan_finished() {
444 assert(scan_in_progress(), "pre-condition");
446 // Currently, only survivors can be root regions.
447 if (!_should_abort) {
448 assert(_next_survivor == NULL, "we should have claimed all survivors");
449 }
450 _next_survivor = NULL;
452 {
453 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
454 _scan_in_progress = false;
455 RootRegionScan_lock->notify_all();
456 }
457 }
459 bool CMRootRegions::wait_until_scan_finished() {
460 if (!scan_in_progress()) return false;
462 {
463 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
464 while (scan_in_progress()) {
465 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
466 }
467 }
468 return true;
469 }
471 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
472 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
473 #endif // _MSC_VER
475 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
476 return MAX2((n_par_threads + 2) / 4, 1U);
477 }
479 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
480 _g1h(g1h),
481 _markBitMap1(MinObjAlignment - 1),
482 _markBitMap2(MinObjAlignment - 1),
484 _parallel_marking_threads(0),
485 _max_parallel_marking_threads(0),
486 _sleep_factor(0.0),
487 _marking_task_overhead(1.0),
488 _cleanup_sleep_factor(0.0),
489 _cleanup_task_overhead(1.0),
490 _cleanup_list("Cleanup List"),
491 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
492 _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
493 CardTableModRefBS::card_shift,
494 false /* in_resource_area*/),
496 _prevMarkBitMap(&_markBitMap1),
497 _nextMarkBitMap(&_markBitMap2),
499 _markStack(this),
500 // _finger set in set_non_marking_state
502 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
503 // _active_tasks set in set_non_marking_state
504 // _tasks set inside the constructor
505 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
506 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
508 _has_overflown(false),
509 _concurrent(false),
510 _has_aborted(false),
511 _restart_for_overflow(false),
512 _concurrent_marking_in_progress(false),
514 // _verbose_level set below
516 _init_times(),
517 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
518 _cleanup_times(),
519 _total_counting_time(0.0),
520 _total_rs_scrub_time(0.0),
522 _parallel_workers(NULL),
524 _count_card_bitmaps(NULL),
525 _count_marked_bytes(NULL),
526 _completed_initialization(false) {
527 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
528 if (verbose_level < no_verbose) {
529 verbose_level = no_verbose;
530 }
531 if (verbose_level > high_verbose) {
532 verbose_level = high_verbose;
533 }
534 _verbose_level = verbose_level;
536 if (verbose_low()) {
537 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
538 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
539 }
541 if (!_markBitMap1.allocate(heap_rs)) {
542 warning("Failed to allocate first CM bit map");
543 return;
544 }
545 if (!_markBitMap2.allocate(heap_rs)) {
546 warning("Failed to allocate second CM bit map");
547 return;
548 }
550 // Create & start a ConcurrentMark thread.
551 _cmThread = new ConcurrentMarkThread(this);
552 assert(cmThread() != NULL, "CM Thread should have been created");
553 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
555 assert(CGC_lock != NULL, "Where's the CGC_lock?");
556 assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
557 assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
559 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
560 satb_qs.set_buffer_size(G1SATBBufferSize);
562 _root_regions.init(_g1h, this);
564 if (ConcGCThreads > ParallelGCThreads) {
565 warning("Can't have more ConcGCThreads (" UINT32_FORMAT ") "
566 "than ParallelGCThreads (" UINT32_FORMAT ").",
567 ConcGCThreads, ParallelGCThreads);
568 return;
569 }
570 if (ParallelGCThreads == 0) {
571 // if we are not running with any parallel GC threads we will not
572 // spawn any marking threads either
573 _parallel_marking_threads = 0;
574 _max_parallel_marking_threads = 0;
575 _sleep_factor = 0.0;
576 _marking_task_overhead = 1.0;
577 } else {
578 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
579 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
580 // if both are set
581 _sleep_factor = 0.0;
582 _marking_task_overhead = 1.0;
583 } else if (G1MarkingOverheadPercent > 0) {
584 // We will calculate the number of parallel marking threads based
585 // on a target overhead with respect to the soft real-time goal
586 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
587 double overall_cm_overhead =
588 (double) MaxGCPauseMillis * marking_overhead /
589 (double) GCPauseIntervalMillis;
590 double cpu_ratio = 1.0 / (double) os::processor_count();
591 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
592 double marking_task_overhead =
593 overall_cm_overhead / marking_thread_num *
594 (double) os::processor_count();
595 double sleep_factor =
596 (1.0 - marking_task_overhead) / marking_task_overhead;
598 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
599 _sleep_factor = sleep_factor;
600 _marking_task_overhead = marking_task_overhead;
601 } else {
602 // Calculate the number of parallel marking threads by scaling
603 // the number of parallel GC threads.
604 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
605 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
606 _sleep_factor = 0.0;
607 _marking_task_overhead = 1.0;
608 }
610 assert(ConcGCThreads > 0, "Should have been set");
611 _parallel_marking_threads = (uint) ConcGCThreads;
612 _max_parallel_marking_threads = _parallel_marking_threads;
614 if (parallel_marking_threads() > 1) {
615 _cleanup_task_overhead = 1.0;
616 } else {
617 _cleanup_task_overhead = marking_task_overhead();
618 }
619 _cleanup_sleep_factor =
620 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
622 #if 0
623 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
624 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
625 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
626 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
627 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
628 #endif
630 guarantee(parallel_marking_threads() > 0, "peace of mind");
631 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
632 _max_parallel_marking_threads, false, true);
633 if (_parallel_workers == NULL) {
634 vm_exit_during_initialization("Failed necessary allocation.");
635 } else {
636 _parallel_workers->initialize_workers();
637 }
638 }
640 if (FLAG_IS_DEFAULT(MarkStackSize)) {
641 uintx mark_stack_size =
642 MIN2(MarkStackSizeMax,
643 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
644 // Verify that the calculated value for MarkStackSize is in range.
645 // It would be nice to use the private utility routine from Arguments.
646 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
647 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
648 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
649 mark_stack_size, 1, MarkStackSizeMax);
650 return;
651 }
652 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
653 } else {
654 // Verify MarkStackSize is in range.
655 if (FLAG_IS_CMDLINE(MarkStackSize)) {
656 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
657 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
658 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
659 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
660 MarkStackSize, 1, MarkStackSizeMax);
661 return;
662 }
663 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
664 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
665 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
666 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
667 MarkStackSize, MarkStackSizeMax);
668 return;
669 }
670 }
671 }
672 }
674 if (!_markStack.allocate(MarkStackSize)) {
675 warning("Failed to allocate CM marking stack");
676 return;
677 }
679 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
680 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
682 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
683 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
685 BitMap::idx_t card_bm_size = _card_bm.size();
687 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
688 _active_tasks = _max_worker_id;
690 size_t max_regions = (size_t) _g1h->max_regions();
691 for (uint i = 0; i < _max_worker_id; ++i) {
692 CMTaskQueue* task_queue = new CMTaskQueue();
693 task_queue->initialize();
694 _task_queues->register_queue(i, task_queue);
696 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
697 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
699 _tasks[i] = new CMTask(i, this,
700 _count_marked_bytes[i],
701 &_count_card_bitmaps[i],
702 task_queue, _task_queues);
704 _accum_task_vtime[i] = 0.0;
705 }
707 // Calculate the card number for the bottom of the heap. Used
708 // in biasing indexes into the accounting card bitmaps.
709 _heap_bottom_card_num =
710 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
711 CardTableModRefBS::card_shift);
713 // Clear all the liveness counting data
714 clear_all_count_data();
716 // so that the call below can read a sensible value
717 _heap_start = (HeapWord*) heap_rs.base();
718 set_non_marking_state();
719 _completed_initialization = true;
720 }
722 void ConcurrentMark::update_g1_committed(bool force) {
723 // If concurrent marking is not in progress, then we do not need to
724 // update _heap_end.
725 if (!concurrent_marking_in_progress() && !force) return;
727 MemRegion committed = _g1h->g1_committed();
728 assert(committed.start() == _heap_start, "start shouldn't change");
729 HeapWord* new_end = committed.end();
730 if (new_end > _heap_end) {
731 // The heap has been expanded.
733 _heap_end = new_end;
734 }
735 // Notice that the heap can also shrink. However, this only happens
736 // during a Full GC (at least currently) and the entire marking
737 // phase will bail out and the task will not be restarted. So, let's
738 // do nothing.
739 }
741 void ConcurrentMark::reset() {
742 // Starting values for these two. This should be called in a STW
743 // phase. CM will be notified of any future g1_committed expansions
744 // will be at the end of evacuation pauses, when tasks are
745 // inactive.
746 MemRegion committed = _g1h->g1_committed();
747 _heap_start = committed.start();
748 _heap_end = committed.end();
750 // Separated the asserts so that we know which one fires.
751 assert(_heap_start != NULL, "heap bounds should look ok");
752 assert(_heap_end != NULL, "heap bounds should look ok");
753 assert(_heap_start < _heap_end, "heap bounds should look ok");
755 // Reset all the marking data structures and any necessary flags
756 reset_marking_state();
758 if (verbose_low()) {
759 gclog_or_tty->print_cr("[global] resetting");
760 }
762 // We do reset all of them, since different phases will use
763 // different number of active threads. So, it's easiest to have all
764 // of them ready.
765 for (uint i = 0; i < _max_worker_id; ++i) {
766 _tasks[i]->reset(_nextMarkBitMap);
767 }
769 // we need this to make sure that the flag is on during the evac
770 // pause with initial mark piggy-backed
771 set_concurrent_marking_in_progress();
772 }
775 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
776 _markStack.set_should_expand();
777 _markStack.setEmpty(); // Also clears the _markStack overflow flag
778 if (clear_overflow) {
779 clear_has_overflown();
780 } else {
781 assert(has_overflown(), "pre-condition");
782 }
783 _finger = _heap_start;
785 for (uint i = 0; i < _max_worker_id; ++i) {
786 CMTaskQueue* queue = _task_queues->queue(i);
787 queue->set_empty();
788 }
789 }
791 void ConcurrentMark::set_concurrency(uint active_tasks) {
792 assert(active_tasks <= _max_worker_id, "we should not have more");
794 _active_tasks = active_tasks;
795 // Need to update the three data structures below according to the
796 // number of active threads for this phase.
797 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
798 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
799 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
800 }
802 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
803 set_concurrency(active_tasks);
805 _concurrent = concurrent;
806 // We propagate this to all tasks, not just the active ones.
807 for (uint i = 0; i < _max_worker_id; ++i)
808 _tasks[i]->set_concurrent(concurrent);
810 if (concurrent) {
811 set_concurrent_marking_in_progress();
812 } else {
813 // We currently assume that the concurrent flag has been set to
814 // false before we start remark. At this point we should also be
815 // in a STW phase.
816 assert(!concurrent_marking_in_progress(), "invariant");
817 assert(_finger == _heap_end,
818 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
819 _finger, _heap_end));
820 update_g1_committed(true);
821 }
822 }
824 void ConcurrentMark::set_non_marking_state() {
825 // We set the global marking state to some default values when we're
826 // not doing marking.
827 reset_marking_state();
828 _active_tasks = 0;
829 clear_concurrent_marking_in_progress();
830 }
832 ConcurrentMark::~ConcurrentMark() {
833 // The ConcurrentMark instance is never freed.
834 ShouldNotReachHere();
835 }
837 void ConcurrentMark::clearNextBitmap() {
838 G1CollectedHeap* g1h = G1CollectedHeap::heap();
839 G1CollectorPolicy* g1p = g1h->g1_policy();
841 // Make sure that the concurrent mark thread looks to still be in
842 // the current cycle.
843 guarantee(cmThread()->during_cycle(), "invariant");
845 // We are finishing up the current cycle by clearing the next
846 // marking bitmap and getting it ready for the next cycle. During
847 // this time no other cycle can start. So, let's make sure that this
848 // is the case.
849 guarantee(!g1h->mark_in_progress(), "invariant");
851 // clear the mark bitmap (no grey objects to start with).
852 // We need to do this in chunks and offer to yield in between
853 // each chunk.
854 HeapWord* start = _nextMarkBitMap->startWord();
855 HeapWord* end = _nextMarkBitMap->endWord();
856 HeapWord* cur = start;
857 size_t chunkSize = M;
858 while (cur < end) {
859 HeapWord* next = cur + chunkSize;
860 if (next > end) {
861 next = end;
862 }
863 MemRegion mr(cur,next);
864 _nextMarkBitMap->clearRange(mr);
865 cur = next;
866 do_yield_check();
868 // Repeat the asserts from above. We'll do them as asserts here to
869 // minimize their overhead on the product. However, we'll have
870 // them as guarantees at the beginning / end of the bitmap
871 // clearing to get some checking in the product.
872 assert(cmThread()->during_cycle(), "invariant");
873 assert(!g1h->mark_in_progress(), "invariant");
874 }
876 // Clear the liveness counting data
877 clear_all_count_data();
879 // Repeat the asserts from above.
880 guarantee(cmThread()->during_cycle(), "invariant");
881 guarantee(!g1h->mark_in_progress(), "invariant");
882 }
884 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
885 public:
886 bool doHeapRegion(HeapRegion* r) {
887 if (!r->continuesHumongous()) {
888 r->note_start_of_marking();
889 }
890 return false;
891 }
892 };
894 void ConcurrentMark::checkpointRootsInitialPre() {
895 G1CollectedHeap* g1h = G1CollectedHeap::heap();
896 G1CollectorPolicy* g1p = g1h->g1_policy();
898 _has_aborted = false;
900 #ifndef PRODUCT
901 if (G1PrintReachableAtInitialMark) {
902 print_reachable("at-cycle-start",
903 VerifyOption_G1UsePrevMarking, true /* all */);
904 }
905 #endif
907 // Initialise marking structures. This has to be done in a STW phase.
908 reset();
910 // For each region note start of marking.
911 NoteStartOfMarkHRClosure startcl;
912 g1h->heap_region_iterate(&startcl);
913 }
916 void ConcurrentMark::checkpointRootsInitialPost() {
917 G1CollectedHeap* g1h = G1CollectedHeap::heap();
919 // If we force an overflow during remark, the remark operation will
920 // actually abort and we'll restart concurrent marking. If we always
921 // force an oveflow during remark we'll never actually complete the
922 // marking phase. So, we initilize this here, at the start of the
923 // cycle, so that at the remaining overflow number will decrease at
924 // every remark and we'll eventually not need to cause one.
925 force_overflow_stw()->init();
927 // Start Concurrent Marking weak-reference discovery.
928 ReferenceProcessor* rp = g1h->ref_processor_cm();
929 // enable ("weak") refs discovery
930 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
931 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
933 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
934 // This is the start of the marking cycle, we're expected all
935 // threads to have SATB queues with active set to false.
936 satb_mq_set.set_active_all_threads(true, /* new active value */
937 false /* expected_active */);
939 _root_regions.prepare_for_scan();
941 // update_g1_committed() will be called at the end of an evac pause
942 // when marking is on. So, it's also called at the end of the
943 // initial-mark pause to update the heap end, if the heap expands
944 // during it. No need to call it here.
945 }
947 /*
948 * Notice that in the next two methods, we actually leave the STS
949 * during the barrier sync and join it immediately afterwards. If we
950 * do not do this, the following deadlock can occur: one thread could
951 * be in the barrier sync code, waiting for the other thread to also
952 * sync up, whereas another one could be trying to yield, while also
953 * waiting for the other threads to sync up too.
954 *
955 * Note, however, that this code is also used during remark and in
956 * this case we should not attempt to leave / enter the STS, otherwise
957 * we'll either hit an asseert (debug / fastdebug) or deadlock
958 * (product). So we should only leave / enter the STS if we are
959 * operating concurrently.
960 *
961 * Because the thread that does the sync barrier has left the STS, it
962 * is possible to be suspended for a Full GC or an evacuation pause
963 * could occur. This is actually safe, since the entering the sync
964 * barrier is one of the last things do_marking_step() does, and it
965 * doesn't manipulate any data structures afterwards.
966 */
968 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
969 if (verbose_low()) {
970 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
971 }
973 if (concurrent()) {
974 ConcurrentGCThread::stsLeave();
975 }
976 _first_overflow_barrier_sync.enter();
977 if (concurrent()) {
978 ConcurrentGCThread::stsJoin();
979 }
980 // at this point everyone should have synced up and not be doing any
981 // more work
983 if (verbose_low()) {
984 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
985 }
987 // If we're executing the concurrent phase of marking, reset the marking
988 // state; otherwise the marking state is reset after reference processing,
989 // during the remark pause.
990 // If we reset here as a result of an overflow during the remark we will
991 // see assertion failures from any subsequent set_concurrency_and_phase()
992 // calls.
993 if (concurrent()) {
994 // let the task associated with with worker 0 do this
995 if (worker_id == 0) {
996 // task 0 is responsible for clearing the global data structures
997 // We should be here because of an overflow. During STW we should
998 // not clear the overflow flag since we rely on it being true when
999 // we exit this method to abort the pause and restart concurent
1000 // marking.
1001 reset_marking_state(true /* clear_overflow */);
1002 force_overflow()->update();
1004 if (G1Log::fine()) {
1005 gclog_or_tty->date_stamp(PrintGCDateStamps);
1006 gclog_or_tty->stamp(PrintGCTimeStamps);
1007 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1008 }
1009 }
1010 }
1012 // after this, each task should reset its own data structures then
1013 // then go into the second barrier
1014 }
1016 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1017 if (verbose_low()) {
1018 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1019 }
1021 if (concurrent()) {
1022 ConcurrentGCThread::stsLeave();
1023 }
1024 _second_overflow_barrier_sync.enter();
1025 if (concurrent()) {
1026 ConcurrentGCThread::stsJoin();
1027 }
1028 // at this point everything should be re-initialized and ready to go
1030 if (verbose_low()) {
1031 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1032 }
1033 }
1035 #ifndef PRODUCT
1036 void ForceOverflowSettings::init() {
1037 _num_remaining = G1ConcMarkForceOverflow;
1038 _force = false;
1039 update();
1040 }
1042 void ForceOverflowSettings::update() {
1043 if (_num_remaining > 0) {
1044 _num_remaining -= 1;
1045 _force = true;
1046 } else {
1047 _force = false;
1048 }
1049 }
1051 bool ForceOverflowSettings::should_force() {
1052 if (_force) {
1053 _force = false;
1054 return true;
1055 } else {
1056 return false;
1057 }
1058 }
1059 #endif // !PRODUCT
1061 class CMConcurrentMarkingTask: public AbstractGangTask {
1062 private:
1063 ConcurrentMark* _cm;
1064 ConcurrentMarkThread* _cmt;
1066 public:
1067 void work(uint worker_id) {
1068 assert(Thread::current()->is_ConcurrentGC_thread(),
1069 "this should only be done by a conc GC thread");
1070 ResourceMark rm;
1072 double start_vtime = os::elapsedVTime();
1074 ConcurrentGCThread::stsJoin();
1076 assert(worker_id < _cm->active_tasks(), "invariant");
1077 CMTask* the_task = _cm->task(worker_id);
1078 the_task->record_start_time();
1079 if (!_cm->has_aborted()) {
1080 do {
1081 double start_vtime_sec = os::elapsedVTime();
1082 double start_time_sec = os::elapsedTime();
1083 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1085 the_task->do_marking_step(mark_step_duration_ms,
1086 true /* do_termination */,
1087 false /* is_serial*/);
1089 double end_time_sec = os::elapsedTime();
1090 double end_vtime_sec = os::elapsedVTime();
1091 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1092 double elapsed_time_sec = end_time_sec - start_time_sec;
1093 _cm->clear_has_overflown();
1095 bool ret = _cm->do_yield_check(worker_id);
1097 jlong sleep_time_ms;
1098 if (!_cm->has_aborted() && the_task->has_aborted()) {
1099 sleep_time_ms =
1100 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1101 ConcurrentGCThread::stsLeave();
1102 os::sleep(Thread::current(), sleep_time_ms, false);
1103 ConcurrentGCThread::stsJoin();
1104 }
1105 double end_time2_sec = os::elapsedTime();
1106 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1108 #if 0
1109 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1110 "overhead %1.4lf",
1111 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1112 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1113 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1114 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1115 #endif
1116 } while (!_cm->has_aborted() && the_task->has_aborted());
1117 }
1118 the_task->record_end_time();
1119 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1121 ConcurrentGCThread::stsLeave();
1123 double end_vtime = os::elapsedVTime();
1124 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1125 }
1127 CMConcurrentMarkingTask(ConcurrentMark* cm,
1128 ConcurrentMarkThread* cmt) :
1129 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1131 ~CMConcurrentMarkingTask() { }
1132 };
1134 // Calculates the number of active workers for a concurrent
1135 // phase.
1136 uint ConcurrentMark::calc_parallel_marking_threads() {
1137 if (G1CollectedHeap::use_parallel_gc_threads()) {
1138 uint n_conc_workers = 0;
1139 if (!UseDynamicNumberOfGCThreads ||
1140 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1141 !ForceDynamicNumberOfGCThreads)) {
1142 n_conc_workers = max_parallel_marking_threads();
1143 } else {
1144 n_conc_workers =
1145 AdaptiveSizePolicy::calc_default_active_workers(
1146 max_parallel_marking_threads(),
1147 1, /* Minimum workers */
1148 parallel_marking_threads(),
1149 Threads::number_of_non_daemon_threads());
1150 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1151 // that scaling has already gone into "_max_parallel_marking_threads".
1152 }
1153 assert(n_conc_workers > 0, "Always need at least 1");
1154 return n_conc_workers;
1155 }
1156 // If we are not running with any parallel GC threads we will not
1157 // have spawned any marking threads either. Hence the number of
1158 // concurrent workers should be 0.
1159 return 0;
1160 }
1162 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1163 // Currently, only survivors can be root regions.
1164 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1165 G1RootRegionScanClosure cl(_g1h, this, worker_id);
1167 const uintx interval = PrefetchScanIntervalInBytes;
1168 HeapWord* curr = hr->bottom();
1169 const HeapWord* end = hr->top();
1170 while (curr < end) {
1171 Prefetch::read(curr, interval);
1172 oop obj = oop(curr);
1173 int size = obj->oop_iterate(&cl);
1174 assert(size == obj->size(), "sanity");
1175 curr += size;
1176 }
1177 }
1179 class CMRootRegionScanTask : public AbstractGangTask {
1180 private:
1181 ConcurrentMark* _cm;
1183 public:
1184 CMRootRegionScanTask(ConcurrentMark* cm) :
1185 AbstractGangTask("Root Region Scan"), _cm(cm) { }
1187 void work(uint worker_id) {
1188 assert(Thread::current()->is_ConcurrentGC_thread(),
1189 "this should only be done by a conc GC thread");
1191 CMRootRegions* root_regions = _cm->root_regions();
1192 HeapRegion* hr = root_regions->claim_next();
1193 while (hr != NULL) {
1194 _cm->scanRootRegion(hr, worker_id);
1195 hr = root_regions->claim_next();
1196 }
1197 }
1198 };
1200 void ConcurrentMark::scanRootRegions() {
1201 // scan_in_progress() will have been set to true only if there was
1202 // at least one root region to scan. So, if it's false, we
1203 // should not attempt to do any further work.
1204 if (root_regions()->scan_in_progress()) {
1205 _parallel_marking_threads = calc_parallel_marking_threads();
1206 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1207 "Maximum number of marking threads exceeded");
1208 uint active_workers = MAX2(1U, parallel_marking_threads());
1210 CMRootRegionScanTask task(this);
1211 if (use_parallel_marking_threads()) {
1212 _parallel_workers->set_active_workers((int) active_workers);
1213 _parallel_workers->run_task(&task);
1214 } else {
1215 task.work(0);
1216 }
1218 // It's possible that has_aborted() is true here without actually
1219 // aborting the survivor scan earlier. This is OK as it's
1220 // mainly used for sanity checking.
1221 root_regions()->scan_finished();
1222 }
1223 }
1225 void ConcurrentMark::markFromRoots() {
1226 // we might be tempted to assert that:
1227 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1228 // "inconsistent argument?");
1229 // However that wouldn't be right, because it's possible that
1230 // a safepoint is indeed in progress as a younger generation
1231 // stop-the-world GC happens even as we mark in this generation.
1233 _restart_for_overflow = false;
1234 force_overflow_conc()->init();
1236 // _g1h has _n_par_threads
1237 _parallel_marking_threads = calc_parallel_marking_threads();
1238 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1239 "Maximum number of marking threads exceeded");
1241 uint active_workers = MAX2(1U, parallel_marking_threads());
1243 // Parallel task terminator is set in "set_concurrency_and_phase()"
1244 set_concurrency_and_phase(active_workers, true /* concurrent */);
1246 CMConcurrentMarkingTask markingTask(this, cmThread());
1247 if (use_parallel_marking_threads()) {
1248 _parallel_workers->set_active_workers((int)active_workers);
1249 // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1250 // and the decisions on that MT processing is made elsewhere.
1251 assert(_parallel_workers->active_workers() > 0, "Should have been set");
1252 _parallel_workers->run_task(&markingTask);
1253 } else {
1254 markingTask.work(0);
1255 }
1256 print_stats();
1257 }
1259 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1260 // world is stopped at this checkpoint
1261 assert(SafepointSynchronize::is_at_safepoint(),
1262 "world should be stopped");
1264 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1266 // If a full collection has happened, we shouldn't do this.
1267 if (has_aborted()) {
1268 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1269 return;
1270 }
1272 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1274 if (VerifyDuringGC) {
1275 HandleMark hm; // handle scope
1276 gclog_or_tty->print(" VerifyDuringGC:(before)");
1277 Universe::heap()->prepare_for_verify();
1278 Universe::verify(/* silent */ false,
1279 /* option */ VerifyOption_G1UsePrevMarking);
1280 }
1282 G1CollectorPolicy* g1p = g1h->g1_policy();
1283 g1p->record_concurrent_mark_remark_start();
1285 double start = os::elapsedTime();
1287 checkpointRootsFinalWork();
1289 double mark_work_end = os::elapsedTime();
1291 weakRefsWork(clear_all_soft_refs);
1293 if (has_overflown()) {
1294 // Oops. We overflowed. Restart concurrent marking.
1295 _restart_for_overflow = true;
1296 if (G1TraceMarkStackOverflow) {
1297 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1298 }
1300 // Verify the heap w.r.t. the previous marking bitmap.
1301 if (VerifyDuringGC) {
1302 HandleMark hm; // handle scope
1303 gclog_or_tty->print(" VerifyDuringGC:(overflow)");
1304 Universe::heap()->prepare_for_verify();
1305 Universe::verify(/* silent */ false,
1306 /* option */ VerifyOption_G1UsePrevMarking);
1307 }
1309 // Clear the marking state because we will be restarting
1310 // marking due to overflowing the global mark stack.
1311 reset_marking_state();
1312 } else {
1313 // Aggregate the per-task counting data that we have accumulated
1314 // while marking.
1315 aggregate_count_data();
1317 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1318 // We're done with marking.
1319 // This is the end of the marking cycle, we're expected all
1320 // threads to have SATB queues with active set to true.
1321 satb_mq_set.set_active_all_threads(false, /* new active value */
1322 true /* expected_active */);
1324 if (VerifyDuringGC) {
1325 HandleMark hm; // handle scope
1326 gclog_or_tty->print(" VerifyDuringGC:(after)");
1327 Universe::heap()->prepare_for_verify();
1328 Universe::verify(/* silent */ false,
1329 /* option */ VerifyOption_G1UseNextMarking);
1330 }
1331 assert(!restart_for_overflow(), "sanity");
1332 // Completely reset the marking state since marking completed
1333 set_non_marking_state();
1334 }
1336 // Expand the marking stack, if we have to and if we can.
1337 if (_markStack.should_expand()) {
1338 _markStack.expand();
1339 }
1341 // Statistics
1342 double now = os::elapsedTime();
1343 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1344 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1345 _remark_times.add((now - start) * 1000.0);
1347 g1p->record_concurrent_mark_remark_end();
1348 }
1350 // Base class of the closures that finalize and verify the
1351 // liveness counting data.
1352 class CMCountDataClosureBase: public HeapRegionClosure {
1353 protected:
1354 G1CollectedHeap* _g1h;
1355 ConcurrentMark* _cm;
1356 CardTableModRefBS* _ct_bs;
1358 BitMap* _region_bm;
1359 BitMap* _card_bm;
1361 // Takes a region that's not empty (i.e., it has at least one
1362 // live object in it and sets its corresponding bit on the region
1363 // bitmap to 1. If the region is "starts humongous" it will also set
1364 // to 1 the bits on the region bitmap that correspond to its
1365 // associated "continues humongous" regions.
1366 void set_bit_for_region(HeapRegion* hr) {
1367 assert(!hr->continuesHumongous(), "should have filtered those out");
1369 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1370 if (!hr->startsHumongous()) {
1371 // Normal (non-humongous) case: just set the bit.
1372 _region_bm->par_at_put(index, true);
1373 } else {
1374 // Starts humongous case: calculate how many regions are part of
1375 // this humongous region and then set the bit range.
1376 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1377 _region_bm->par_at_put_range(index, end_index, true);
1378 }
1379 }
1381 public:
1382 CMCountDataClosureBase(G1CollectedHeap* g1h,
1383 BitMap* region_bm, BitMap* card_bm):
1384 _g1h(g1h), _cm(g1h->concurrent_mark()),
1385 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1386 _region_bm(region_bm), _card_bm(card_bm) { }
1387 };
1389 // Closure that calculates the # live objects per region. Used
1390 // for verification purposes during the cleanup pause.
1391 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1392 CMBitMapRO* _bm;
1393 size_t _region_marked_bytes;
1395 public:
1396 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1397 BitMap* region_bm, BitMap* card_bm) :
1398 CMCountDataClosureBase(g1h, region_bm, card_bm),
1399 _bm(bm), _region_marked_bytes(0) { }
1401 bool doHeapRegion(HeapRegion* hr) {
1403 if (hr->continuesHumongous()) {
1404 // We will ignore these here and process them when their
1405 // associated "starts humongous" region is processed (see
1406 // set_bit_for_heap_region()). Note that we cannot rely on their
1407 // associated "starts humongous" region to have their bit set to
1408 // 1 since, due to the region chunking in the parallel region
1409 // iteration, a "continues humongous" region might be visited
1410 // before its associated "starts humongous".
1411 return false;
1412 }
1414 HeapWord* ntams = hr->next_top_at_mark_start();
1415 HeapWord* start = hr->bottom();
1417 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1418 err_msg("Preconditions not met - "
1419 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1420 start, ntams, hr->end()));
1422 // Find the first marked object at or after "start".
1423 start = _bm->getNextMarkedWordAddress(start, ntams);
1425 size_t marked_bytes = 0;
1427 while (start < ntams) {
1428 oop obj = oop(start);
1429 int obj_sz = obj->size();
1430 HeapWord* obj_end = start + obj_sz;
1432 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1433 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1435 // Note: if we're looking at the last region in heap - obj_end
1436 // could be actually just beyond the end of the heap; end_idx
1437 // will then correspond to a (non-existent) card that is also
1438 // just beyond the heap.
1439 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1440 // end of object is not card aligned - increment to cover
1441 // all the cards spanned by the object
1442 end_idx += 1;
1443 }
1445 // Set the bits in the card BM for the cards spanned by this object.
1446 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1448 // Add the size of this object to the number of marked bytes.
1449 marked_bytes += (size_t)obj_sz * HeapWordSize;
1451 // Find the next marked object after this one.
1452 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1453 }
1455 // Mark the allocated-since-marking portion...
1456 HeapWord* top = hr->top();
1457 if (ntams < top) {
1458 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1459 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1461 // Note: if we're looking at the last region in heap - top
1462 // could be actually just beyond the end of the heap; end_idx
1463 // will then correspond to a (non-existent) card that is also
1464 // just beyond the heap.
1465 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1466 // end of object is not card aligned - increment to cover
1467 // all the cards spanned by the object
1468 end_idx += 1;
1469 }
1470 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1472 // This definitely means the region has live objects.
1473 set_bit_for_region(hr);
1474 }
1476 // Update the live region bitmap.
1477 if (marked_bytes > 0) {
1478 set_bit_for_region(hr);
1479 }
1481 // Set the marked bytes for the current region so that
1482 // it can be queried by a calling verificiation routine
1483 _region_marked_bytes = marked_bytes;
1485 return false;
1486 }
1488 size_t region_marked_bytes() const { return _region_marked_bytes; }
1489 };
1491 // Heap region closure used for verifying the counting data
1492 // that was accumulated concurrently and aggregated during
1493 // the remark pause. This closure is applied to the heap
1494 // regions during the STW cleanup pause.
1496 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1497 G1CollectedHeap* _g1h;
1498 ConcurrentMark* _cm;
1499 CalcLiveObjectsClosure _calc_cl;
1500 BitMap* _region_bm; // Region BM to be verified
1501 BitMap* _card_bm; // Card BM to be verified
1502 bool _verbose; // verbose output?
1504 BitMap* _exp_region_bm; // Expected Region BM values
1505 BitMap* _exp_card_bm; // Expected card BM values
1507 int _failures;
1509 public:
1510 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1511 BitMap* region_bm,
1512 BitMap* card_bm,
1513 BitMap* exp_region_bm,
1514 BitMap* exp_card_bm,
1515 bool verbose) :
1516 _g1h(g1h), _cm(g1h->concurrent_mark()),
1517 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1518 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1519 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1520 _failures(0) { }
1522 int failures() const { return _failures; }
1524 bool doHeapRegion(HeapRegion* hr) {
1525 if (hr->continuesHumongous()) {
1526 // We will ignore these here and process them when their
1527 // associated "starts humongous" region is processed (see
1528 // set_bit_for_heap_region()). Note that we cannot rely on their
1529 // associated "starts humongous" region to have their bit set to
1530 // 1 since, due to the region chunking in the parallel region
1531 // iteration, a "continues humongous" region might be visited
1532 // before its associated "starts humongous".
1533 return false;
1534 }
1536 int failures = 0;
1538 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1539 // this region and set the corresponding bits in the expected region
1540 // and card bitmaps.
1541 bool res = _calc_cl.doHeapRegion(hr);
1542 assert(res == false, "should be continuing");
1544 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1545 Mutex::_no_safepoint_check_flag);
1547 // Verify the marked bytes for this region.
1548 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1549 size_t act_marked_bytes = hr->next_marked_bytes();
1551 // We're not OK if expected marked bytes > actual marked bytes. It means
1552 // we have missed accounting some objects during the actual marking.
1553 if (exp_marked_bytes > act_marked_bytes) {
1554 if (_verbose) {
1555 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1556 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1557 hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1558 }
1559 failures += 1;
1560 }
1562 // Verify the bit, for this region, in the actual and expected
1563 // (which was just calculated) region bit maps.
1564 // We're not OK if the bit in the calculated expected region
1565 // bitmap is set and the bit in the actual region bitmap is not.
1566 BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1568 bool expected = _exp_region_bm->at(index);
1569 bool actual = _region_bm->at(index);
1570 if (expected && !actual) {
1571 if (_verbose) {
1572 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1573 "expected: %s, actual: %s",
1574 hr->hrs_index(),
1575 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1576 }
1577 failures += 1;
1578 }
1580 // Verify that the card bit maps for the cards spanned by the current
1581 // region match. We have an error if we have a set bit in the expected
1582 // bit map and the corresponding bit in the actual bitmap is not set.
1584 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1585 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1587 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1588 expected = _exp_card_bm->at(i);
1589 actual = _card_bm->at(i);
1591 if (expected && !actual) {
1592 if (_verbose) {
1593 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1594 "expected: %s, actual: %s",
1595 hr->hrs_index(), i,
1596 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1597 }
1598 failures += 1;
1599 }
1600 }
1602 if (failures > 0 && _verbose) {
1603 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1604 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1605 HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1606 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1607 }
1609 _failures += failures;
1611 // We could stop iteration over the heap when we
1612 // find the first violating region by returning true.
1613 return false;
1614 }
1615 };
1618 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1619 protected:
1620 G1CollectedHeap* _g1h;
1621 ConcurrentMark* _cm;
1622 BitMap* _actual_region_bm;
1623 BitMap* _actual_card_bm;
1625 uint _n_workers;
1627 BitMap* _expected_region_bm;
1628 BitMap* _expected_card_bm;
1630 int _failures;
1631 bool _verbose;
1633 public:
1634 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1635 BitMap* region_bm, BitMap* card_bm,
1636 BitMap* expected_region_bm, BitMap* expected_card_bm)
1637 : AbstractGangTask("G1 verify final counting"),
1638 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1639 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1640 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1641 _failures(0), _verbose(false),
1642 _n_workers(0) {
1643 assert(VerifyDuringGC, "don't call this otherwise");
1645 // Use the value already set as the number of active threads
1646 // in the call to run_task().
1647 if (G1CollectedHeap::use_parallel_gc_threads()) {
1648 assert( _g1h->workers()->active_workers() > 0,
1649 "Should have been previously set");
1650 _n_workers = _g1h->workers()->active_workers();
1651 } else {
1652 _n_workers = 1;
1653 }
1655 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1656 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1658 _verbose = _cm->verbose_medium();
1659 }
1661 void work(uint worker_id) {
1662 assert(worker_id < _n_workers, "invariant");
1664 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1665 _actual_region_bm, _actual_card_bm,
1666 _expected_region_bm,
1667 _expected_card_bm,
1668 _verbose);
1670 if (G1CollectedHeap::use_parallel_gc_threads()) {
1671 _g1h->heap_region_par_iterate_chunked(&verify_cl,
1672 worker_id,
1673 _n_workers,
1674 HeapRegion::VerifyCountClaimValue);
1675 } else {
1676 _g1h->heap_region_iterate(&verify_cl);
1677 }
1679 Atomic::add(verify_cl.failures(), &_failures);
1680 }
1682 int failures() const { return _failures; }
1683 };
1685 // Closure that finalizes the liveness counting data.
1686 // Used during the cleanup pause.
1687 // Sets the bits corresponding to the interval [NTAMS, top]
1688 // (which contains the implicitly live objects) in the
1689 // card liveness bitmap. Also sets the bit for each region,
1690 // containing live data, in the region liveness bitmap.
1692 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1693 public:
1694 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1695 BitMap* region_bm,
1696 BitMap* card_bm) :
1697 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1699 bool doHeapRegion(HeapRegion* hr) {
1701 if (hr->continuesHumongous()) {
1702 // We will ignore these here and process them when their
1703 // associated "starts humongous" region is processed (see
1704 // set_bit_for_heap_region()). Note that we cannot rely on their
1705 // associated "starts humongous" region to have their bit set to
1706 // 1 since, due to the region chunking in the parallel region
1707 // iteration, a "continues humongous" region might be visited
1708 // before its associated "starts humongous".
1709 return false;
1710 }
1712 HeapWord* ntams = hr->next_top_at_mark_start();
1713 HeapWord* top = hr->top();
1715 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1717 // Mark the allocated-since-marking portion...
1718 if (ntams < top) {
1719 // This definitely means the region has live objects.
1720 set_bit_for_region(hr);
1722 // Now set the bits in the card bitmap for [ntams, top)
1723 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1724 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1726 // Note: if we're looking at the last region in heap - top
1727 // could be actually just beyond the end of the heap; end_idx
1728 // will then correspond to a (non-existent) card that is also
1729 // just beyond the heap.
1730 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1731 // end of object is not card aligned - increment to cover
1732 // all the cards spanned by the object
1733 end_idx += 1;
1734 }
1736 assert(end_idx <= _card_bm->size(),
1737 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1738 end_idx, _card_bm->size()));
1739 assert(start_idx < _card_bm->size(),
1740 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1741 start_idx, _card_bm->size()));
1743 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1744 }
1746 // Set the bit for the region if it contains live data
1747 if (hr->next_marked_bytes() > 0) {
1748 set_bit_for_region(hr);
1749 }
1751 return false;
1752 }
1753 };
1755 class G1ParFinalCountTask: public AbstractGangTask {
1756 protected:
1757 G1CollectedHeap* _g1h;
1758 ConcurrentMark* _cm;
1759 BitMap* _actual_region_bm;
1760 BitMap* _actual_card_bm;
1762 uint _n_workers;
1764 public:
1765 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1766 : AbstractGangTask("G1 final counting"),
1767 _g1h(g1h), _cm(_g1h->concurrent_mark()),
1768 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1769 _n_workers(0) {
1770 // Use the value already set as the number of active threads
1771 // in the call to run_task().
1772 if (G1CollectedHeap::use_parallel_gc_threads()) {
1773 assert( _g1h->workers()->active_workers() > 0,
1774 "Should have been previously set");
1775 _n_workers = _g1h->workers()->active_workers();
1776 } else {
1777 _n_workers = 1;
1778 }
1779 }
1781 void work(uint worker_id) {
1782 assert(worker_id < _n_workers, "invariant");
1784 FinalCountDataUpdateClosure final_update_cl(_g1h,
1785 _actual_region_bm,
1786 _actual_card_bm);
1788 if (G1CollectedHeap::use_parallel_gc_threads()) {
1789 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1790 worker_id,
1791 _n_workers,
1792 HeapRegion::FinalCountClaimValue);
1793 } else {
1794 _g1h->heap_region_iterate(&final_update_cl);
1795 }
1796 }
1797 };
1799 class G1ParNoteEndTask;
1801 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1802 G1CollectedHeap* _g1;
1803 int _worker_num;
1804 size_t _max_live_bytes;
1805 uint _regions_claimed;
1806 size_t _freed_bytes;
1807 FreeRegionList* _local_cleanup_list;
1808 OldRegionSet* _old_proxy_set;
1809 HumongousRegionSet* _humongous_proxy_set;
1810 HRRSCleanupTask* _hrrs_cleanup_task;
1811 double _claimed_region_time;
1812 double _max_region_time;
1814 public:
1815 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1816 int worker_num,
1817 FreeRegionList* local_cleanup_list,
1818 OldRegionSet* old_proxy_set,
1819 HumongousRegionSet* humongous_proxy_set,
1820 HRRSCleanupTask* hrrs_cleanup_task) :
1821 _g1(g1), _worker_num(worker_num),
1822 _max_live_bytes(0), _regions_claimed(0),
1823 _freed_bytes(0),
1824 _claimed_region_time(0.0), _max_region_time(0.0),
1825 _local_cleanup_list(local_cleanup_list),
1826 _old_proxy_set(old_proxy_set),
1827 _humongous_proxy_set(humongous_proxy_set),
1828 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1830 size_t freed_bytes() { return _freed_bytes; }
1832 bool doHeapRegion(HeapRegion *hr) {
1833 if (hr->continuesHumongous()) {
1834 return false;
1835 }
1836 // We use a claim value of zero here because all regions
1837 // were claimed with value 1 in the FinalCount task.
1838 _g1->reset_gc_time_stamps(hr);
1839 double start = os::elapsedTime();
1840 _regions_claimed++;
1841 hr->note_end_of_marking();
1842 _max_live_bytes += hr->max_live_bytes();
1843 _g1->free_region_if_empty(hr,
1844 &_freed_bytes,
1845 _local_cleanup_list,
1846 _old_proxy_set,
1847 _humongous_proxy_set,
1848 _hrrs_cleanup_task,
1849 true /* par */);
1850 double region_time = (os::elapsedTime() - start);
1851 _claimed_region_time += region_time;
1852 if (region_time > _max_region_time) {
1853 _max_region_time = region_time;
1854 }
1855 return false;
1856 }
1858 size_t max_live_bytes() { return _max_live_bytes; }
1859 uint regions_claimed() { return _regions_claimed; }
1860 double claimed_region_time_sec() { return _claimed_region_time; }
1861 double max_region_time_sec() { return _max_region_time; }
1862 };
1864 class G1ParNoteEndTask: public AbstractGangTask {
1865 friend class G1NoteEndOfConcMarkClosure;
1867 protected:
1868 G1CollectedHeap* _g1h;
1869 size_t _max_live_bytes;
1870 size_t _freed_bytes;
1871 FreeRegionList* _cleanup_list;
1873 public:
1874 G1ParNoteEndTask(G1CollectedHeap* g1h,
1875 FreeRegionList* cleanup_list) :
1876 AbstractGangTask("G1 note end"), _g1h(g1h),
1877 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1879 void work(uint worker_id) {
1880 double start = os::elapsedTime();
1881 FreeRegionList local_cleanup_list("Local Cleanup List");
1882 OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1883 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1884 HRRSCleanupTask hrrs_cleanup_task;
1885 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1886 &old_proxy_set,
1887 &humongous_proxy_set,
1888 &hrrs_cleanup_task);
1889 if (G1CollectedHeap::use_parallel_gc_threads()) {
1890 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1891 _g1h->workers()->active_workers(),
1892 HeapRegion::NoteEndClaimValue);
1893 } else {
1894 _g1h->heap_region_iterate(&g1_note_end);
1895 }
1896 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1898 // Now update the lists
1899 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1900 NULL /* free_list */,
1901 &old_proxy_set,
1902 &humongous_proxy_set,
1903 true /* par */);
1904 {
1905 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1906 _max_live_bytes += g1_note_end.max_live_bytes();
1907 _freed_bytes += g1_note_end.freed_bytes();
1909 // If we iterate over the global cleanup list at the end of
1910 // cleanup to do this printing we will not guarantee to only
1911 // generate output for the newly-reclaimed regions (the list
1912 // might not be empty at the beginning of cleanup; we might
1913 // still be working on its previous contents). So we do the
1914 // printing here, before we append the new regions to the global
1915 // cleanup list.
1917 G1HRPrinter* hr_printer = _g1h->hr_printer();
1918 if (hr_printer->is_active()) {
1919 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1920 while (iter.more_available()) {
1921 HeapRegion* hr = iter.get_next();
1922 hr_printer->cleanup(hr);
1923 }
1924 }
1926 _cleanup_list->add_as_tail(&local_cleanup_list);
1927 assert(local_cleanup_list.is_empty(), "post-condition");
1929 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1930 }
1931 }
1932 size_t max_live_bytes() { return _max_live_bytes; }
1933 size_t freed_bytes() { return _freed_bytes; }
1934 };
1936 class G1ParScrubRemSetTask: public AbstractGangTask {
1937 protected:
1938 G1RemSet* _g1rs;
1939 BitMap* _region_bm;
1940 BitMap* _card_bm;
1941 public:
1942 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1943 BitMap* region_bm, BitMap* card_bm) :
1944 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1945 _region_bm(region_bm), _card_bm(card_bm) { }
1947 void work(uint worker_id) {
1948 if (G1CollectedHeap::use_parallel_gc_threads()) {
1949 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1950 HeapRegion::ScrubRemSetClaimValue);
1951 } else {
1952 _g1rs->scrub(_region_bm, _card_bm);
1953 }
1954 }
1956 };
1958 void ConcurrentMark::cleanup() {
1959 // world is stopped at this checkpoint
1960 assert(SafepointSynchronize::is_at_safepoint(),
1961 "world should be stopped");
1962 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1964 // If a full collection has happened, we shouldn't do this.
1965 if (has_aborted()) {
1966 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1967 return;
1968 }
1970 HRSPhaseSetter x(HRSPhaseCleanup);
1971 g1h->verify_region_sets_optional();
1973 if (VerifyDuringGC) {
1974 HandleMark hm; // handle scope
1975 gclog_or_tty->print(" VerifyDuringGC:(before)");
1976 Universe::heap()->prepare_for_verify();
1977 Universe::verify(/* silent */ false,
1978 /* option */ VerifyOption_G1UsePrevMarking);
1979 }
1981 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1982 g1p->record_concurrent_mark_cleanup_start();
1984 double start = os::elapsedTime();
1986 HeapRegionRemSet::reset_for_cleanup_tasks();
1988 uint n_workers;
1990 // Do counting once more with the world stopped for good measure.
1991 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1993 if (G1CollectedHeap::use_parallel_gc_threads()) {
1994 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1995 "sanity check");
1997 g1h->set_par_threads();
1998 n_workers = g1h->n_par_threads();
1999 assert(g1h->n_par_threads() == n_workers,
2000 "Should not have been reset");
2001 g1h->workers()->run_task(&g1_par_count_task);
2002 // Done with the parallel phase so reset to 0.
2003 g1h->set_par_threads(0);
2005 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2006 "sanity check");
2007 } else {
2008 n_workers = 1;
2009 g1_par_count_task.work(0);
2010 }
2012 if (VerifyDuringGC) {
2013 // Verify that the counting data accumulated during marking matches
2014 // that calculated by walking the marking bitmap.
2016 // Bitmaps to hold expected values
2017 BitMap expected_region_bm(_region_bm.size(), false);
2018 BitMap expected_card_bm(_card_bm.size(), false);
2020 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2021 &_region_bm,
2022 &_card_bm,
2023 &expected_region_bm,
2024 &expected_card_bm);
2026 if (G1CollectedHeap::use_parallel_gc_threads()) {
2027 g1h->set_par_threads((int)n_workers);
2028 g1h->workers()->run_task(&g1_par_verify_task);
2029 // Done with the parallel phase so reset to 0.
2030 g1h->set_par_threads(0);
2032 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2033 "sanity check");
2034 } else {
2035 g1_par_verify_task.work(0);
2036 }
2038 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2039 }
2041 size_t start_used_bytes = g1h->used();
2042 g1h->set_marking_complete();
2044 double count_end = os::elapsedTime();
2045 double this_final_counting_time = (count_end - start);
2046 _total_counting_time += this_final_counting_time;
2048 if (G1PrintRegionLivenessInfo) {
2049 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2050 _g1h->heap_region_iterate(&cl);
2051 }
2053 // Install newly created mark bitMap as "prev".
2054 swapMarkBitMaps();
2056 g1h->reset_gc_time_stamp();
2058 // Note end of marking in all heap regions.
2059 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2060 if (G1CollectedHeap::use_parallel_gc_threads()) {
2061 g1h->set_par_threads((int)n_workers);
2062 g1h->workers()->run_task(&g1_par_note_end_task);
2063 g1h->set_par_threads(0);
2065 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2066 "sanity check");
2067 } else {
2068 g1_par_note_end_task.work(0);
2069 }
2070 g1h->check_gc_time_stamps();
2072 if (!cleanup_list_is_empty()) {
2073 // The cleanup list is not empty, so we'll have to process it
2074 // concurrently. Notify anyone else that might be wanting free
2075 // regions that there will be more free regions coming soon.
2076 g1h->set_free_regions_coming();
2077 }
2079 // call below, since it affects the metric by which we sort the heap
2080 // regions.
2081 if (G1ScrubRemSets) {
2082 double rs_scrub_start = os::elapsedTime();
2083 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2084 if (G1CollectedHeap::use_parallel_gc_threads()) {
2085 g1h->set_par_threads((int)n_workers);
2086 g1h->workers()->run_task(&g1_par_scrub_rs_task);
2087 g1h->set_par_threads(0);
2089 assert(g1h->check_heap_region_claim_values(
2090 HeapRegion::ScrubRemSetClaimValue),
2091 "sanity check");
2092 } else {
2093 g1_par_scrub_rs_task.work(0);
2094 }
2096 double rs_scrub_end = os::elapsedTime();
2097 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2098 _total_rs_scrub_time += this_rs_scrub_time;
2099 }
2101 // this will also free any regions totally full of garbage objects,
2102 // and sort the regions.
2103 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2105 // Statistics.
2106 double end = os::elapsedTime();
2107 _cleanup_times.add((end - start) * 1000.0);
2109 if (G1Log::fine()) {
2110 g1h->print_size_transition(gclog_or_tty,
2111 start_used_bytes,
2112 g1h->used(),
2113 g1h->capacity());
2114 }
2116 // Clean up will have freed any regions completely full of garbage.
2117 // Update the soft reference policy with the new heap occupancy.
2118 Universe::update_heap_info_at_gc();
2120 // We need to make this be a "collection" so any collection pause that
2121 // races with it goes around and waits for completeCleanup to finish.
2122 g1h->increment_total_collections();
2124 // We reclaimed old regions so we should calculate the sizes to make
2125 // sure we update the old gen/space data.
2126 g1h->g1mm()->update_sizes();
2128 if (VerifyDuringGC) {
2129 HandleMark hm; // handle scope
2130 gclog_or_tty->print(" VerifyDuringGC:(after)");
2131 Universe::heap()->prepare_for_verify();
2132 Universe::verify(/* silent */ false,
2133 /* option */ VerifyOption_G1UsePrevMarking);
2134 }
2136 g1h->verify_region_sets_optional();
2137 }
2139 void ConcurrentMark::completeCleanup() {
2140 if (has_aborted()) return;
2142 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2144 _cleanup_list.verify_optional();
2145 FreeRegionList tmp_free_list("Tmp Free List");
2147 if (G1ConcRegionFreeingVerbose) {
2148 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2149 "cleanup list has %u entries",
2150 _cleanup_list.length());
2151 }
2153 // Noone else should be accessing the _cleanup_list at this point,
2154 // so it's not necessary to take any locks
2155 while (!_cleanup_list.is_empty()) {
2156 HeapRegion* hr = _cleanup_list.remove_head();
2157 assert(hr != NULL, "the list was not empty");
2158 hr->par_clear();
2159 tmp_free_list.add_as_tail(hr);
2161 // Instead of adding one region at a time to the secondary_free_list,
2162 // we accumulate them in the local list and move them a few at a
2163 // time. This also cuts down on the number of notify_all() calls
2164 // we do during this process. We'll also append the local list when
2165 // _cleanup_list is empty (which means we just removed the last
2166 // region from the _cleanup_list).
2167 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2168 _cleanup_list.is_empty()) {
2169 if (G1ConcRegionFreeingVerbose) {
2170 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2171 "appending %u entries to the secondary_free_list, "
2172 "cleanup list still has %u entries",
2173 tmp_free_list.length(),
2174 _cleanup_list.length());
2175 }
2177 {
2178 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2179 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2180 SecondaryFreeList_lock->notify_all();
2181 }
2183 if (G1StressConcRegionFreeing) {
2184 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2185 os::sleep(Thread::current(), (jlong) 1, false);
2186 }
2187 }
2188 }
2189 }
2190 assert(tmp_free_list.is_empty(), "post-condition");
2191 }
2193 // Supporting Object and Oop closures for reference discovery
2194 // and processing in during marking
2196 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2197 HeapWord* addr = (HeapWord*)obj;
2198 return addr != NULL &&
2199 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2200 }
2202 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2203 // Uses the CMTask associated with a worker thread (for serial reference
2204 // processing the CMTask for worker 0 is used) to preserve (mark) and
2205 // trace referent objects.
2206 //
2207 // Using the CMTask and embedded local queues avoids having the worker
2208 // threads operating on the global mark stack. This reduces the risk
2209 // of overflowing the stack - which we would rather avoid at this late
2210 // state. Also using the tasks' local queues removes the potential
2211 // of the workers interfering with each other that could occur if
2212 // operating on the global stack.
2214 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2215 ConcurrentMark* _cm;
2216 CMTask* _task;
2217 int _ref_counter_limit;
2218 int _ref_counter;
2219 bool _is_serial;
2220 public:
2221 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2222 _cm(cm), _task(task), _is_serial(is_serial),
2223 _ref_counter_limit(G1RefProcDrainInterval) {
2224 assert(_ref_counter_limit > 0, "sanity");
2225 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2226 _ref_counter = _ref_counter_limit;
2227 }
2229 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2230 virtual void do_oop( oop* p) { do_oop_work(p); }
2232 template <class T> void do_oop_work(T* p) {
2233 if (!_cm->has_overflown()) {
2234 oop obj = oopDesc::load_decode_heap_oop(p);
2235 if (_cm->verbose_high()) {
2236 gclog_or_tty->print_cr("\t[%u] we're looking at location "
2237 "*"PTR_FORMAT" = "PTR_FORMAT,
2238 _task->worker_id(), p, (void*) obj);
2239 }
2241 _task->deal_with_reference(obj);
2242 _ref_counter--;
2244 if (_ref_counter == 0) {
2245 // We have dealt with _ref_counter_limit references, pushing them
2246 // and objects reachable from them on to the local stack (and
2247 // possibly the global stack). Call CMTask::do_marking_step() to
2248 // process these entries.
2249 //
2250 // We call CMTask::do_marking_step() in a loop, which we'll exit if
2251 // there's nothing more to do (i.e. we're done with the entries that
2252 // were pushed as a result of the CMTask::deal_with_reference() calls
2253 // above) or we overflow.
2254 //
2255 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2256 // flag while there may still be some work to do. (See the comment at
2257 // the beginning of CMTask::do_marking_step() for those conditions -
2258 // one of which is reaching the specified time target.) It is only
2259 // when CMTask::do_marking_step() returns without setting the
2260 // has_aborted() flag that the marking step has completed.
2261 do {
2262 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2263 _task->do_marking_step(mark_step_duration_ms,
2264 false /* do_termination */,
2265 _is_serial);
2266 } while (_task->has_aborted() && !_cm->has_overflown());
2267 _ref_counter = _ref_counter_limit;
2268 }
2269 } else {
2270 if (_cm->verbose_high()) {
2271 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2272 }
2273 }
2274 }
2275 };
2277 // 'Drain' oop closure used by both serial and parallel reference processing.
2278 // Uses the CMTask associated with a given worker thread (for serial
2279 // reference processing the CMtask for worker 0 is used). Calls the
2280 // do_marking_step routine, with an unbelievably large timeout value,
2281 // to drain the marking data structures of the remaining entries
2282 // added by the 'keep alive' oop closure above.
2284 class G1CMDrainMarkingStackClosure: public VoidClosure {
2285 ConcurrentMark* _cm;
2286 CMTask* _task;
2287 bool _is_serial;
2288 public:
2289 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2290 _cm(cm), _task(task), _is_serial(is_serial) {
2291 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2292 }
2294 void do_void() {
2295 do {
2296 if (_cm->verbose_high()) {
2297 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2298 _task->worker_id(), BOOL_TO_STR(_is_serial));
2299 }
2301 // We call CMTask::do_marking_step() to completely drain the local
2302 // and global marking stacks of entries pushed by the 'keep alive'
2303 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2304 //
2305 // CMTask::do_marking_step() is called in a loop, which we'll exit
2306 // if there's nothing more to do (i.e. we'completely drained the
2307 // entries that were pushed as a a result of applying the 'keep alive'
2308 // closure to the entries on the discovered ref lists) or we overflow
2309 // the global marking stack.
2310 //
2311 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2312 // flag while there may still be some work to do. (See the comment at
2313 // the beginning of CMTask::do_marking_step() for those conditions -
2314 // one of which is reaching the specified time target.) It is only
2315 // when CMTask::do_marking_step() returns without setting the
2316 // has_aborted() flag that the marking step has completed.
2318 _task->do_marking_step(1000000000.0 /* something very large */,
2319 true /* do_termination */,
2320 _is_serial);
2321 } while (_task->has_aborted() && !_cm->has_overflown());
2322 }
2323 };
2325 // Implementation of AbstractRefProcTaskExecutor for parallel
2326 // reference processing at the end of G1 concurrent marking
2328 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2329 private:
2330 G1CollectedHeap* _g1h;
2331 ConcurrentMark* _cm;
2332 WorkGang* _workers;
2333 int _active_workers;
2335 public:
2336 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2337 ConcurrentMark* cm,
2338 WorkGang* workers,
2339 int n_workers) :
2340 _g1h(g1h), _cm(cm),
2341 _workers(workers), _active_workers(n_workers) { }
2343 // Executes the given task using concurrent marking worker threads.
2344 virtual void execute(ProcessTask& task);
2345 virtual void execute(EnqueueTask& task);
2346 };
2348 class G1CMRefProcTaskProxy: public AbstractGangTask {
2349 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2350 ProcessTask& _proc_task;
2351 G1CollectedHeap* _g1h;
2352 ConcurrentMark* _cm;
2354 public:
2355 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2356 G1CollectedHeap* g1h,
2357 ConcurrentMark* cm) :
2358 AbstractGangTask("Process reference objects in parallel"),
2359 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2360 ReferenceProcessor* rp = _g1h->ref_processor_cm();
2361 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2362 }
2364 virtual void work(uint worker_id) {
2365 CMTask* task = _cm->task(worker_id);
2366 G1CMIsAliveClosure g1_is_alive(_g1h);
2367 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2368 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2370 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2371 }
2372 };
2374 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2375 assert(_workers != NULL, "Need parallel worker threads.");
2376 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2378 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2380 // We need to reset the concurrency level before each
2381 // proxy task execution, so that the termination protocol
2382 // and overflow handling in CMTask::do_marking_step() knows
2383 // how many workers to wait for.
2384 _cm->set_concurrency(_active_workers);
2385 _g1h->set_par_threads(_active_workers);
2386 _workers->run_task(&proc_task_proxy);
2387 _g1h->set_par_threads(0);
2388 }
2390 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2391 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2392 EnqueueTask& _enq_task;
2394 public:
2395 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2396 AbstractGangTask("Enqueue reference objects in parallel"),
2397 _enq_task(enq_task) { }
2399 virtual void work(uint worker_id) {
2400 _enq_task.work(worker_id);
2401 }
2402 };
2404 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2405 assert(_workers != NULL, "Need parallel worker threads.");
2406 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2408 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2410 // Not strictly necessary but...
2411 //
2412 // We need to reset the concurrency level before each
2413 // proxy task execution, so that the termination protocol
2414 // and overflow handling in CMTask::do_marking_step() knows
2415 // how many workers to wait for.
2416 _cm->set_concurrency(_active_workers);
2417 _g1h->set_par_threads(_active_workers);
2418 _workers->run_task(&enq_task_proxy);
2419 _g1h->set_par_threads(0);
2420 }
2422 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2423 if (has_overflown()) {
2424 // Skip processing the discovered references if we have
2425 // overflown the global marking stack. Reference objects
2426 // only get discovered once so it is OK to not
2427 // de-populate the discovered reference lists. We could have,
2428 // but the only benefit would be that, when marking restarts,
2429 // less reference objects are discovered.
2430 return;
2431 }
2433 ResourceMark rm;
2434 HandleMark hm;
2436 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2438 // Is alive closure.
2439 G1CMIsAliveClosure g1_is_alive(g1h);
2441 // Inner scope to exclude the cleaning of the string and symbol
2442 // tables from the displayed time.
2443 {
2444 if (G1Log::finer()) {
2445 gclog_or_tty->put(' ');
2446 }
2447 TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2449 ReferenceProcessor* rp = g1h->ref_processor_cm();
2451 // See the comment in G1CollectedHeap::ref_processing_init()
2452 // about how reference processing currently works in G1.
2454 // Set the soft reference policy
2455 rp->setup_policy(clear_all_soft_refs);
2456 assert(_markStack.isEmpty(), "mark stack should be empty");
2458 // Instances of the 'Keep Alive' and 'Complete GC' closures used
2459 // in serial reference processing. Note these closures are also
2460 // used for serially processing (by the the current thread) the
2461 // JNI references during parallel reference processing.
2462 //
2463 // These closures do not need to synchronize with the worker
2464 // threads involved in parallel reference processing as these
2465 // instances are executed serially by the current thread (e.g.
2466 // reference processing is not multi-threaded and is thus
2467 // performed by the current thread instead of a gang worker).
2468 //
2469 // The gang tasks involved in parallel reference procssing create
2470 // their own instances of these closures, which do their own
2471 // synchronization among themselves.
2472 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2473 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2475 // We need at least one active thread. If reference processing
2476 // is not multi-threaded we use the current (VMThread) thread,
2477 // otherwise we use the work gang from the G1CollectedHeap and
2478 // we utilize all the worker threads we can.
2479 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2480 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2481 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2483 // Parallel processing task executor.
2484 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2485 g1h->workers(), active_workers);
2486 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2488 // Set the concurrency level. The phase was already set prior to
2489 // executing the remark task.
2490 set_concurrency(active_workers);
2492 // Set the degree of MT processing here. If the discovery was done MT,
2493 // the number of threads involved during discovery could differ from
2494 // the number of active workers. This is OK as long as the discovered
2495 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2496 rp->set_active_mt_degree(active_workers);
2498 // Process the weak references.
2499 rp->process_discovered_references(&g1_is_alive,
2500 &g1_keep_alive,
2501 &g1_drain_mark_stack,
2502 executor);
2504 // The do_oop work routines of the keep_alive and drain_marking_stack
2505 // oop closures will set the has_overflown flag if we overflow the
2506 // global marking stack.
2508 assert(_markStack.overflow() || _markStack.isEmpty(),
2509 "mark stack should be empty (unless it overflowed)");
2511 if (_markStack.overflow()) {
2512 // This should have been done already when we tried to push an
2513 // entry on to the global mark stack. But let's do it again.
2514 set_has_overflown();
2515 }
2517 assert(rp->num_q() == active_workers, "why not");
2519 rp->enqueue_discovered_references(executor);
2521 rp->verify_no_references_recorded();
2522 assert(!rp->discovery_enabled(), "Post condition");
2523 }
2525 // Now clean up stale oops in StringTable
2526 StringTable::unlink(&g1_is_alive);
2527 // Clean up unreferenced symbols in symbol table.
2528 SymbolTable::unlink();
2529 }
2531 void ConcurrentMark::swapMarkBitMaps() {
2532 CMBitMapRO* temp = _prevMarkBitMap;
2533 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2534 _nextMarkBitMap = (CMBitMap*) temp;
2535 }
2537 class CMRemarkTask: public AbstractGangTask {
2538 private:
2539 ConcurrentMark* _cm;
2540 bool _is_serial;
2541 public:
2542 void work(uint worker_id) {
2543 // Since all available tasks are actually started, we should
2544 // only proceed if we're supposed to be actived.
2545 if (worker_id < _cm->active_tasks()) {
2546 CMTask* task = _cm->task(worker_id);
2547 task->record_start_time();
2548 do {
2549 task->do_marking_step(1000000000.0 /* something very large */,
2550 true /* do_termination */,
2551 _is_serial);
2552 } while (task->has_aborted() && !_cm->has_overflown());
2553 // If we overflow, then we do not want to restart. We instead
2554 // want to abort remark and do concurrent marking again.
2555 task->record_end_time();
2556 }
2557 }
2559 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2560 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2561 _cm->terminator()->reset_for_reuse(active_workers);
2562 }
2563 };
2565 void ConcurrentMark::checkpointRootsFinalWork() {
2566 ResourceMark rm;
2567 HandleMark hm;
2568 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2570 g1h->ensure_parsability(false);
2572 if (G1CollectedHeap::use_parallel_gc_threads()) {
2573 G1CollectedHeap::StrongRootsScope srs(g1h);
2574 // this is remark, so we'll use up all active threads
2575 uint active_workers = g1h->workers()->active_workers();
2576 if (active_workers == 0) {
2577 assert(active_workers > 0, "Should have been set earlier");
2578 active_workers = (uint) ParallelGCThreads;
2579 g1h->workers()->set_active_workers(active_workers);
2580 }
2581 set_concurrency_and_phase(active_workers, false /* concurrent */);
2582 // Leave _parallel_marking_threads at it's
2583 // value originally calculated in the ConcurrentMark
2584 // constructor and pass values of the active workers
2585 // through the gang in the task.
2587 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2588 // We will start all available threads, even if we decide that the
2589 // active_workers will be fewer. The extra ones will just bail out
2590 // immediately.
2591 g1h->set_par_threads(active_workers);
2592 g1h->workers()->run_task(&remarkTask);
2593 g1h->set_par_threads(0);
2594 } else {
2595 G1CollectedHeap::StrongRootsScope srs(g1h);
2596 uint active_workers = 1;
2597 set_concurrency_and_phase(active_workers, false /* concurrent */);
2599 // Note - if there's no work gang then the VMThread will be
2600 // the thread to execute the remark - serially. We have
2601 // to pass true for the is_serial parameter so that
2602 // CMTask::do_marking_step() doesn't enter the sync
2603 // barriers in the event of an overflow. Doing so will
2604 // cause an assert that the current thread is not a
2605 // concurrent GC thread.
2606 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2607 remarkTask.work(0);
2608 }
2609 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2610 guarantee(has_overflown() ||
2611 satb_mq_set.completed_buffers_num() == 0,
2612 err_msg("Invariant: has_overflown = %s, num buffers = %d",
2613 BOOL_TO_STR(has_overflown()),
2614 satb_mq_set.completed_buffers_num()));
2616 print_stats();
2617 }
2619 #ifndef PRODUCT
2621 class PrintReachableOopClosure: public OopClosure {
2622 private:
2623 G1CollectedHeap* _g1h;
2624 outputStream* _out;
2625 VerifyOption _vo;
2626 bool _all;
2628 public:
2629 PrintReachableOopClosure(outputStream* out,
2630 VerifyOption vo,
2631 bool all) :
2632 _g1h(G1CollectedHeap::heap()),
2633 _out(out), _vo(vo), _all(all) { }
2635 void do_oop(narrowOop* p) { do_oop_work(p); }
2636 void do_oop( oop* p) { do_oop_work(p); }
2638 template <class T> void do_oop_work(T* p) {
2639 oop obj = oopDesc::load_decode_heap_oop(p);
2640 const char* str = NULL;
2641 const char* str2 = "";
2643 if (obj == NULL) {
2644 str = "";
2645 } else if (!_g1h->is_in_g1_reserved(obj)) {
2646 str = " O";
2647 } else {
2648 HeapRegion* hr = _g1h->heap_region_containing(obj);
2649 guarantee(hr != NULL, "invariant");
2650 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2651 bool marked = _g1h->is_marked(obj, _vo);
2653 if (over_tams) {
2654 str = " >";
2655 if (marked) {
2656 str2 = " AND MARKED";
2657 }
2658 } else if (marked) {
2659 str = " M";
2660 } else {
2661 str = " NOT";
2662 }
2663 }
2665 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2666 p, (void*) obj, str, str2);
2667 }
2668 };
2670 class PrintReachableObjectClosure : public ObjectClosure {
2671 private:
2672 G1CollectedHeap* _g1h;
2673 outputStream* _out;
2674 VerifyOption _vo;
2675 bool _all;
2676 HeapRegion* _hr;
2678 public:
2679 PrintReachableObjectClosure(outputStream* out,
2680 VerifyOption vo,
2681 bool all,
2682 HeapRegion* hr) :
2683 _g1h(G1CollectedHeap::heap()),
2684 _out(out), _vo(vo), _all(all), _hr(hr) { }
2686 void do_object(oop o) {
2687 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2688 bool marked = _g1h->is_marked(o, _vo);
2689 bool print_it = _all || over_tams || marked;
2691 if (print_it) {
2692 _out->print_cr(" "PTR_FORMAT"%s",
2693 o, (over_tams) ? " >" : (marked) ? " M" : "");
2694 PrintReachableOopClosure oopCl(_out, _vo, _all);
2695 o->oop_iterate_no_header(&oopCl);
2696 }
2697 }
2698 };
2700 class PrintReachableRegionClosure : public HeapRegionClosure {
2701 private:
2702 G1CollectedHeap* _g1h;
2703 outputStream* _out;
2704 VerifyOption _vo;
2705 bool _all;
2707 public:
2708 bool doHeapRegion(HeapRegion* hr) {
2709 HeapWord* b = hr->bottom();
2710 HeapWord* e = hr->end();
2711 HeapWord* t = hr->top();
2712 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2713 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2714 "TAMS: "PTR_FORMAT, b, e, t, p);
2715 _out->cr();
2717 HeapWord* from = b;
2718 HeapWord* to = t;
2720 if (to > from) {
2721 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2722 _out->cr();
2723 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2724 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2725 _out->cr();
2726 }
2728 return false;
2729 }
2731 PrintReachableRegionClosure(outputStream* out,
2732 VerifyOption vo,
2733 bool all) :
2734 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2735 };
2737 void ConcurrentMark::print_reachable(const char* str,
2738 VerifyOption vo,
2739 bool all) {
2740 gclog_or_tty->cr();
2741 gclog_or_tty->print_cr("== Doing heap dump... ");
2743 if (G1PrintReachableBaseFile == NULL) {
2744 gclog_or_tty->print_cr(" #### error: no base file defined");
2745 return;
2746 }
2748 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2749 (JVM_MAXPATHLEN - 1)) {
2750 gclog_or_tty->print_cr(" #### error: file name too long");
2751 return;
2752 }
2754 char file_name[JVM_MAXPATHLEN];
2755 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2756 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2758 fileStream fout(file_name);
2759 if (!fout.is_open()) {
2760 gclog_or_tty->print_cr(" #### error: could not open file");
2761 return;
2762 }
2764 outputStream* out = &fout;
2765 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2766 out->cr();
2768 out->print_cr("--- ITERATING OVER REGIONS");
2769 out->cr();
2770 PrintReachableRegionClosure rcl(out, vo, all);
2771 _g1h->heap_region_iterate(&rcl);
2772 out->cr();
2774 gclog_or_tty->print_cr(" done");
2775 gclog_or_tty->flush();
2776 }
2778 #endif // PRODUCT
2780 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2781 // Note we are overriding the read-only view of the prev map here, via
2782 // the cast.
2783 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2784 }
2786 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2787 _nextMarkBitMap->clearRange(mr);
2788 }
2790 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2791 clearRangePrevBitmap(mr);
2792 clearRangeNextBitmap(mr);
2793 }
2795 HeapRegion*
2796 ConcurrentMark::claim_region(uint worker_id) {
2797 // "checkpoint" the finger
2798 HeapWord* finger = _finger;
2800 // _heap_end will not change underneath our feet; it only changes at
2801 // yield points.
2802 while (finger < _heap_end) {
2803 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2805 // Note on how this code handles humongous regions. In the
2806 // normal case the finger will reach the start of a "starts
2807 // humongous" (SH) region. Its end will either be the end of the
2808 // last "continues humongous" (CH) region in the sequence, or the
2809 // standard end of the SH region (if the SH is the only region in
2810 // the sequence). That way claim_region() will skip over the CH
2811 // regions. However, there is a subtle race between a CM thread
2812 // executing this method and a mutator thread doing a humongous
2813 // object allocation. The two are not mutually exclusive as the CM
2814 // thread does not need to hold the Heap_lock when it gets
2815 // here. So there is a chance that claim_region() will come across
2816 // a free region that's in the progress of becoming a SH or a CH
2817 // region. In the former case, it will either
2818 // a) Miss the update to the region's end, in which case it will
2819 // visit every subsequent CH region, will find their bitmaps
2820 // empty, and do nothing, or
2821 // b) Will observe the update of the region's end (in which case
2822 // it will skip the subsequent CH regions).
2823 // If it comes across a region that suddenly becomes CH, the
2824 // scenario will be similar to b). So, the race between
2825 // claim_region() and a humongous object allocation might force us
2826 // to do a bit of unnecessary work (due to some unnecessary bitmap
2827 // iterations) but it should not introduce and correctness issues.
2828 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2829 HeapWord* bottom = curr_region->bottom();
2830 HeapWord* end = curr_region->end();
2831 HeapWord* limit = curr_region->next_top_at_mark_start();
2833 if (verbose_low()) {
2834 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2835 "["PTR_FORMAT", "PTR_FORMAT"), "
2836 "limit = "PTR_FORMAT,
2837 worker_id, curr_region, bottom, end, limit);
2838 }
2840 // Is the gap between reading the finger and doing the CAS too long?
2841 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2842 if (res == finger) {
2843 // we succeeded
2845 // notice that _finger == end cannot be guaranteed here since,
2846 // someone else might have moved the finger even further
2847 assert(_finger >= end, "the finger should have moved forward");
2849 if (verbose_low()) {
2850 gclog_or_tty->print_cr("[%u] we were successful with region = "
2851 PTR_FORMAT, worker_id, curr_region);
2852 }
2854 if (limit > bottom) {
2855 if (verbose_low()) {
2856 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2857 "returning it ", worker_id, curr_region);
2858 }
2859 return curr_region;
2860 } else {
2861 assert(limit == bottom,
2862 "the region limit should be at bottom");
2863 if (verbose_low()) {
2864 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2865 "returning NULL", worker_id, curr_region);
2866 }
2867 // we return NULL and the caller should try calling
2868 // claim_region() again.
2869 return NULL;
2870 }
2871 } else {
2872 assert(_finger > finger, "the finger should have moved forward");
2873 if (verbose_low()) {
2874 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2875 "global finger = "PTR_FORMAT", "
2876 "our finger = "PTR_FORMAT,
2877 worker_id, _finger, finger);
2878 }
2880 // read it again
2881 finger = _finger;
2882 }
2883 }
2885 return NULL;
2886 }
2888 #ifndef PRODUCT
2889 enum VerifyNoCSetOopsPhase {
2890 VerifyNoCSetOopsStack,
2891 VerifyNoCSetOopsQueues,
2892 VerifyNoCSetOopsSATBCompleted,
2893 VerifyNoCSetOopsSATBThread
2894 };
2896 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
2897 private:
2898 G1CollectedHeap* _g1h;
2899 VerifyNoCSetOopsPhase _phase;
2900 int _info;
2902 const char* phase_str() {
2903 switch (_phase) {
2904 case VerifyNoCSetOopsStack: return "Stack";
2905 case VerifyNoCSetOopsQueues: return "Queue";
2906 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2907 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
2908 default: ShouldNotReachHere();
2909 }
2910 return NULL;
2911 }
2913 void do_object_work(oop obj) {
2914 guarantee(!_g1h->obj_in_cs(obj),
2915 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2916 (void*) obj, phase_str(), _info));
2917 }
2919 public:
2920 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2922 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2923 _phase = phase;
2924 _info = info;
2925 }
2927 virtual void do_oop(oop* p) {
2928 oop obj = oopDesc::load_decode_heap_oop(p);
2929 do_object_work(obj);
2930 }
2932 virtual void do_oop(narrowOop* p) {
2933 // We should not come across narrow oops while scanning marking
2934 // stacks and SATB buffers.
2935 ShouldNotReachHere();
2936 }
2938 virtual void do_object(oop obj) {
2939 do_object_work(obj);
2940 }
2941 };
2943 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2944 bool verify_enqueued_buffers,
2945 bool verify_thread_buffers,
2946 bool verify_fingers) {
2947 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2948 if (!G1CollectedHeap::heap()->mark_in_progress()) {
2949 return;
2950 }
2952 VerifyNoCSetOopsClosure cl;
2954 if (verify_stacks) {
2955 // Verify entries on the global mark stack
2956 cl.set_phase(VerifyNoCSetOopsStack);
2957 _markStack.oops_do(&cl);
2959 // Verify entries on the task queues
2960 for (uint i = 0; i < _max_worker_id; i += 1) {
2961 cl.set_phase(VerifyNoCSetOopsQueues, i);
2962 CMTaskQueue* queue = _task_queues->queue(i);
2963 queue->oops_do(&cl);
2964 }
2965 }
2967 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2969 // Verify entries on the enqueued SATB buffers
2970 if (verify_enqueued_buffers) {
2971 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2972 satb_qs.iterate_completed_buffers_read_only(&cl);
2973 }
2975 // Verify entries on the per-thread SATB buffers
2976 if (verify_thread_buffers) {
2977 cl.set_phase(VerifyNoCSetOopsSATBThread);
2978 satb_qs.iterate_thread_buffers_read_only(&cl);
2979 }
2981 if (verify_fingers) {
2982 // Verify the global finger
2983 HeapWord* global_finger = finger();
2984 if (global_finger != NULL && global_finger < _heap_end) {
2985 // The global finger always points to a heap region boundary. We
2986 // use heap_region_containing_raw() to get the containing region
2987 // given that the global finger could be pointing to a free region
2988 // which subsequently becomes continues humongous. If that
2989 // happens, heap_region_containing() will return the bottom of the
2990 // corresponding starts humongous region and the check below will
2991 // not hold any more.
2992 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2993 guarantee(global_finger == global_hr->bottom(),
2994 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2995 global_finger, HR_FORMAT_PARAMS(global_hr)));
2996 }
2998 // Verify the task fingers
2999 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3000 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3001 CMTask* task = _tasks[i];
3002 HeapWord* task_finger = task->finger();
3003 if (task_finger != NULL && task_finger < _heap_end) {
3004 // See above note on the global finger verification.
3005 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3006 guarantee(task_finger == task_hr->bottom() ||
3007 !task_hr->in_collection_set(),
3008 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3009 task_finger, HR_FORMAT_PARAMS(task_hr)));
3010 }
3011 }
3012 }
3013 }
3014 #endif // PRODUCT
3016 // Aggregate the counting data that was constructed concurrently
3017 // with marking.
3018 class AggregateCountDataHRClosure: public HeapRegionClosure {
3019 G1CollectedHeap* _g1h;
3020 ConcurrentMark* _cm;
3021 CardTableModRefBS* _ct_bs;
3022 BitMap* _cm_card_bm;
3023 uint _max_worker_id;
3025 public:
3026 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3027 BitMap* cm_card_bm,
3028 uint max_worker_id) :
3029 _g1h(g1h), _cm(g1h->concurrent_mark()),
3030 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3031 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3033 bool doHeapRegion(HeapRegion* hr) {
3034 if (hr->continuesHumongous()) {
3035 // We will ignore these here and process them when their
3036 // associated "starts humongous" region is processed.
3037 // Note that we cannot rely on their associated
3038 // "starts humongous" region to have their bit set to 1
3039 // since, due to the region chunking in the parallel region
3040 // iteration, a "continues humongous" region might be visited
3041 // before its associated "starts humongous".
3042 return false;
3043 }
3045 HeapWord* start = hr->bottom();
3046 HeapWord* limit = hr->next_top_at_mark_start();
3047 HeapWord* end = hr->end();
3049 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3050 err_msg("Preconditions not met - "
3051 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3052 "top: "PTR_FORMAT", end: "PTR_FORMAT,
3053 start, limit, hr->top(), hr->end()));
3055 assert(hr->next_marked_bytes() == 0, "Precondition");
3057 if (start == limit) {
3058 // NTAMS of this region has not been set so nothing to do.
3059 return false;
3060 }
3062 // 'start' should be in the heap.
3063 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3064 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
3065 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3067 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3068 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3069 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3071 // If ntams is not card aligned then we bump card bitmap index
3072 // for limit so that we get the all the cards spanned by
3073 // the object ending at ntams.
3074 // Note: if this is the last region in the heap then ntams
3075 // could be actually just beyond the end of the the heap;
3076 // limit_idx will then correspond to a (non-existent) card
3077 // that is also outside the heap.
3078 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3079 limit_idx += 1;
3080 }
3082 assert(limit_idx <= end_idx, "or else use atomics");
3084 // Aggregate the "stripe" in the count data associated with hr.
3085 uint hrs_index = hr->hrs_index();
3086 size_t marked_bytes = 0;
3088 for (uint i = 0; i < _max_worker_id; i += 1) {
3089 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3090 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3092 // Fetch the marked_bytes in this region for task i and
3093 // add it to the running total for this region.
3094 marked_bytes += marked_bytes_array[hrs_index];
3096 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3097 // into the global card bitmap.
3098 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3100 while (scan_idx < limit_idx) {
3101 assert(task_card_bm->at(scan_idx) == true, "should be");
3102 _cm_card_bm->set_bit(scan_idx);
3103 assert(_cm_card_bm->at(scan_idx) == true, "should be");
3105 // BitMap::get_next_one_offset() can handle the case when
3106 // its left_offset parameter is greater than its right_offset
3107 // parameter. It does, however, have an early exit if
3108 // left_offset == right_offset. So let's limit the value
3109 // passed in for left offset here.
3110 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3111 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3112 }
3113 }
3115 // Update the marked bytes for this region.
3116 hr->add_to_marked_bytes(marked_bytes);
3118 // Next heap region
3119 return false;
3120 }
3121 };
3123 class G1AggregateCountDataTask: public AbstractGangTask {
3124 protected:
3125 G1CollectedHeap* _g1h;
3126 ConcurrentMark* _cm;
3127 BitMap* _cm_card_bm;
3128 uint _max_worker_id;
3129 int _active_workers;
3131 public:
3132 G1AggregateCountDataTask(G1CollectedHeap* g1h,
3133 ConcurrentMark* cm,
3134 BitMap* cm_card_bm,
3135 uint max_worker_id,
3136 int n_workers) :
3137 AbstractGangTask("Count Aggregation"),
3138 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3139 _max_worker_id(max_worker_id),
3140 _active_workers(n_workers) { }
3142 void work(uint worker_id) {
3143 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3145 if (G1CollectedHeap::use_parallel_gc_threads()) {
3146 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3147 _active_workers,
3148 HeapRegion::AggregateCountClaimValue);
3149 } else {
3150 _g1h->heap_region_iterate(&cl);
3151 }
3152 }
3153 };
3156 void ConcurrentMark::aggregate_count_data() {
3157 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3158 _g1h->workers()->active_workers() :
3159 1);
3161 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3162 _max_worker_id, n_workers);
3164 if (G1CollectedHeap::use_parallel_gc_threads()) {
3165 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3166 "sanity check");
3167 _g1h->set_par_threads(n_workers);
3168 _g1h->workers()->run_task(&g1_par_agg_task);
3169 _g1h->set_par_threads(0);
3171 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3172 "sanity check");
3173 _g1h->reset_heap_region_claim_values();
3174 } else {
3175 g1_par_agg_task.work(0);
3176 }
3177 }
3179 // Clear the per-worker arrays used to store the per-region counting data
3180 void ConcurrentMark::clear_all_count_data() {
3181 // Clear the global card bitmap - it will be filled during
3182 // liveness count aggregation (during remark) and the
3183 // final counting task.
3184 _card_bm.clear();
3186 // Clear the global region bitmap - it will be filled as part
3187 // of the final counting task.
3188 _region_bm.clear();
3190 uint max_regions = _g1h->max_regions();
3191 assert(_max_worker_id > 0, "uninitialized");
3193 for (uint i = 0; i < _max_worker_id; i += 1) {
3194 BitMap* task_card_bm = count_card_bitmap_for(i);
3195 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3197 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3198 assert(marked_bytes_array != NULL, "uninitialized");
3200 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3201 task_card_bm->clear();
3202 }
3203 }
3205 void ConcurrentMark::print_stats() {
3206 if (verbose_stats()) {
3207 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3208 for (size_t i = 0; i < _active_tasks; ++i) {
3209 _tasks[i]->print_stats();
3210 gclog_or_tty->print_cr("---------------------------------------------------------------------");
3211 }
3212 }
3213 }
3215 // abandon current marking iteration due to a Full GC
3216 void ConcurrentMark::abort() {
3217 // Clear all marks to force marking thread to do nothing
3218 _nextMarkBitMap->clearAll();
3219 // Clear the liveness counting data
3220 clear_all_count_data();
3221 // Empty mark stack
3222 reset_marking_state();
3223 for (uint i = 0; i < _max_worker_id; ++i) {
3224 _tasks[i]->clear_region_fields();
3225 }
3226 _has_aborted = true;
3228 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3229 satb_mq_set.abandon_partial_marking();
3230 // This can be called either during or outside marking, we'll read
3231 // the expected_active value from the SATB queue set.
3232 satb_mq_set.set_active_all_threads(
3233 false, /* new active value */
3234 satb_mq_set.is_active() /* expected_active */);
3235 }
3237 static void print_ms_time_info(const char* prefix, const char* name,
3238 NumberSeq& ns) {
3239 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3240 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3241 if (ns.num() > 0) {
3242 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3243 prefix, ns.sd(), ns.maximum());
3244 }
3245 }
3247 void ConcurrentMark::print_summary_info() {
3248 gclog_or_tty->print_cr(" Concurrent marking:");
3249 print_ms_time_info(" ", "init marks", _init_times);
3250 print_ms_time_info(" ", "remarks", _remark_times);
3251 {
3252 print_ms_time_info(" ", "final marks", _remark_mark_times);
3253 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3255 }
3256 print_ms_time_info(" ", "cleanups", _cleanup_times);
3257 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3258 _total_counting_time,
3259 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3260 (double)_cleanup_times.num()
3261 : 0.0));
3262 if (G1ScrubRemSets) {
3263 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3264 _total_rs_scrub_time,
3265 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3266 (double)_cleanup_times.num()
3267 : 0.0));
3268 }
3269 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3270 (_init_times.sum() + _remark_times.sum() +
3271 _cleanup_times.sum())/1000.0);
3272 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3273 "(%8.2f s marking).",
3274 cmThread()->vtime_accum(),
3275 cmThread()->vtime_mark_accum());
3276 }
3278 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3279 if (use_parallel_marking_threads()) {
3280 _parallel_workers->print_worker_threads_on(st);
3281 }
3282 }
3284 void ConcurrentMark::print_on_error(outputStream* st) const {
3285 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3286 _prevMarkBitMap, _nextMarkBitMap);
3287 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3288 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3289 }
3291 // We take a break if someone is trying to stop the world.
3292 bool ConcurrentMark::do_yield_check(uint worker_id) {
3293 if (should_yield()) {
3294 if (worker_id == 0) {
3295 _g1h->g1_policy()->record_concurrent_pause();
3296 }
3297 cmThread()->yield();
3298 return true;
3299 } else {
3300 return false;
3301 }
3302 }
3304 bool ConcurrentMark::should_yield() {
3305 return cmThread()->should_yield();
3306 }
3308 bool ConcurrentMark::containing_card_is_marked(void* p) {
3309 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3310 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3311 }
3313 bool ConcurrentMark::containing_cards_are_marked(void* start,
3314 void* last) {
3315 return containing_card_is_marked(start) &&
3316 containing_card_is_marked(last);
3317 }
3319 #ifndef PRODUCT
3320 // for debugging purposes
3321 void ConcurrentMark::print_finger() {
3322 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3323 _heap_start, _heap_end, _finger);
3324 for (uint i = 0; i < _max_worker_id; ++i) {
3325 gclog_or_tty->print(" %u: "PTR_FORMAT, i, _tasks[i]->finger());
3326 }
3327 gclog_or_tty->print_cr("");
3328 }
3329 #endif
3331 void CMTask::scan_object(oop obj) {
3332 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3334 if (_cm->verbose_high()) {
3335 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3336 _worker_id, (void*) obj);
3337 }
3339 size_t obj_size = obj->size();
3340 _words_scanned += obj_size;
3342 obj->oop_iterate(_cm_oop_closure);
3343 statsOnly( ++_objs_scanned );
3344 check_limits();
3345 }
3347 // Closure for iteration over bitmaps
3348 class CMBitMapClosure : public BitMapClosure {
3349 private:
3350 // the bitmap that is being iterated over
3351 CMBitMap* _nextMarkBitMap;
3352 ConcurrentMark* _cm;
3353 CMTask* _task;
3355 public:
3356 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3357 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3359 bool do_bit(size_t offset) {
3360 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3361 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3362 assert( addr < _cm->finger(), "invariant");
3364 statsOnly( _task->increase_objs_found_on_bitmap() );
3365 assert(addr >= _task->finger(), "invariant");
3367 // We move that task's local finger along.
3368 _task->move_finger_to(addr);
3370 _task->scan_object(oop(addr));
3371 // we only partially drain the local queue and global stack
3372 _task->drain_local_queue(true);
3373 _task->drain_global_stack(true);
3375 // if the has_aborted flag has been raised, we need to bail out of
3376 // the iteration
3377 return !_task->has_aborted();
3378 }
3379 };
3381 // Closure for iterating over objects, currently only used for
3382 // processing SATB buffers.
3383 class CMObjectClosure : public ObjectClosure {
3384 private:
3385 CMTask* _task;
3387 public:
3388 void do_object(oop obj) {
3389 _task->deal_with_reference(obj);
3390 }
3392 CMObjectClosure(CMTask* task) : _task(task) { }
3393 };
3395 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3396 ConcurrentMark* cm,
3397 CMTask* task)
3398 : _g1h(g1h), _cm(cm), _task(task) {
3399 assert(_ref_processor == NULL, "should be initialized to NULL");
3401 if (G1UseConcMarkReferenceProcessing) {
3402 _ref_processor = g1h->ref_processor_cm();
3403 assert(_ref_processor != NULL, "should not be NULL");
3404 }
3405 }
3407 void CMTask::setup_for_region(HeapRegion* hr) {
3408 // Separated the asserts so that we know which one fires.
3409 assert(hr != NULL,
3410 "claim_region() should have filtered out continues humongous regions");
3411 assert(!hr->continuesHumongous(),
3412 "claim_region() should have filtered out continues humongous regions");
3414 if (_cm->verbose_low()) {
3415 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3416 _worker_id, hr);
3417 }
3419 _curr_region = hr;
3420 _finger = hr->bottom();
3421 update_region_limit();
3422 }
3424 void CMTask::update_region_limit() {
3425 HeapRegion* hr = _curr_region;
3426 HeapWord* bottom = hr->bottom();
3427 HeapWord* limit = hr->next_top_at_mark_start();
3429 if (limit == bottom) {
3430 if (_cm->verbose_low()) {
3431 gclog_or_tty->print_cr("[%u] found an empty region "
3432 "["PTR_FORMAT", "PTR_FORMAT")",
3433 _worker_id, bottom, limit);
3434 }
3435 // The region was collected underneath our feet.
3436 // We set the finger to bottom to ensure that the bitmap
3437 // iteration that will follow this will not do anything.
3438 // (this is not a condition that holds when we set the region up,
3439 // as the region is not supposed to be empty in the first place)
3440 _finger = bottom;
3441 } else if (limit >= _region_limit) {
3442 assert(limit >= _finger, "peace of mind");
3443 } else {
3444 assert(limit < _region_limit, "only way to get here");
3445 // This can happen under some pretty unusual circumstances. An
3446 // evacuation pause empties the region underneath our feet (NTAMS
3447 // at bottom). We then do some allocation in the region (NTAMS
3448 // stays at bottom), followed by the region being used as a GC
3449 // alloc region (NTAMS will move to top() and the objects
3450 // originally below it will be grayed). All objects now marked in
3451 // the region are explicitly grayed, if below the global finger,
3452 // and we do not need in fact to scan anything else. So, we simply
3453 // set _finger to be limit to ensure that the bitmap iteration
3454 // doesn't do anything.
3455 _finger = limit;
3456 }
3458 _region_limit = limit;
3459 }
3461 void CMTask::giveup_current_region() {
3462 assert(_curr_region != NULL, "invariant");
3463 if (_cm->verbose_low()) {
3464 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3465 _worker_id, _curr_region);
3466 }
3467 clear_region_fields();
3468 }
3470 void CMTask::clear_region_fields() {
3471 // Values for these three fields that indicate that we're not
3472 // holding on to a region.
3473 _curr_region = NULL;
3474 _finger = NULL;
3475 _region_limit = NULL;
3476 }
3478 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3479 if (cm_oop_closure == NULL) {
3480 assert(_cm_oop_closure != NULL, "invariant");
3481 } else {
3482 assert(_cm_oop_closure == NULL, "invariant");
3483 }
3484 _cm_oop_closure = cm_oop_closure;
3485 }
3487 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3488 guarantee(nextMarkBitMap != NULL, "invariant");
3490 if (_cm->verbose_low()) {
3491 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3492 }
3494 _nextMarkBitMap = nextMarkBitMap;
3495 clear_region_fields();
3497 _calls = 0;
3498 _elapsed_time_ms = 0.0;
3499 _termination_time_ms = 0.0;
3500 _termination_start_time_ms = 0.0;
3502 #if _MARKING_STATS_
3503 _local_pushes = 0;
3504 _local_pops = 0;
3505 _local_max_size = 0;
3506 _objs_scanned = 0;
3507 _global_pushes = 0;
3508 _global_pops = 0;
3509 _global_max_size = 0;
3510 _global_transfers_to = 0;
3511 _global_transfers_from = 0;
3512 _regions_claimed = 0;
3513 _objs_found_on_bitmap = 0;
3514 _satb_buffers_processed = 0;
3515 _steal_attempts = 0;
3516 _steals = 0;
3517 _aborted = 0;
3518 _aborted_overflow = 0;
3519 _aborted_cm_aborted = 0;
3520 _aborted_yield = 0;
3521 _aborted_timed_out = 0;
3522 _aborted_satb = 0;
3523 _aborted_termination = 0;
3524 #endif // _MARKING_STATS_
3525 }
3527 bool CMTask::should_exit_termination() {
3528 regular_clock_call();
3529 // This is called when we are in the termination protocol. We should
3530 // quit if, for some reason, this task wants to abort or the global
3531 // stack is not empty (this means that we can get work from it).
3532 return !_cm->mark_stack_empty() || has_aborted();
3533 }
3535 void CMTask::reached_limit() {
3536 assert(_words_scanned >= _words_scanned_limit ||
3537 _refs_reached >= _refs_reached_limit ,
3538 "shouldn't have been called otherwise");
3539 regular_clock_call();
3540 }
3542 void CMTask::regular_clock_call() {
3543 if (has_aborted()) return;
3545 // First, we need to recalculate the words scanned and refs reached
3546 // limits for the next clock call.
3547 recalculate_limits();
3549 // During the regular clock call we do the following
3551 // (1) If an overflow has been flagged, then we abort.
3552 if (_cm->has_overflown()) {
3553 set_has_aborted();
3554 return;
3555 }
3557 // If we are not concurrent (i.e. we're doing remark) we don't need
3558 // to check anything else. The other steps are only needed during
3559 // the concurrent marking phase.
3560 if (!concurrent()) return;
3562 // (2) If marking has been aborted for Full GC, then we also abort.
3563 if (_cm->has_aborted()) {
3564 set_has_aborted();
3565 statsOnly( ++_aborted_cm_aborted );
3566 return;
3567 }
3569 double curr_time_ms = os::elapsedVTime() * 1000.0;
3571 // (3) If marking stats are enabled, then we update the step history.
3572 #if _MARKING_STATS_
3573 if (_words_scanned >= _words_scanned_limit) {
3574 ++_clock_due_to_scanning;
3575 }
3576 if (_refs_reached >= _refs_reached_limit) {
3577 ++_clock_due_to_marking;
3578 }
3580 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3581 _interval_start_time_ms = curr_time_ms;
3582 _all_clock_intervals_ms.add(last_interval_ms);
3584 if (_cm->verbose_medium()) {
3585 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3586 "scanned = %d%s, refs reached = %d%s",
3587 _worker_id, last_interval_ms,
3588 _words_scanned,
3589 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3590 _refs_reached,
3591 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3592 }
3593 #endif // _MARKING_STATS_
3595 // (4) We check whether we should yield. If we have to, then we abort.
3596 if (_cm->should_yield()) {
3597 // We should yield. To do this we abort the task. The caller is
3598 // responsible for yielding.
3599 set_has_aborted();
3600 statsOnly( ++_aborted_yield );
3601 return;
3602 }
3604 // (5) We check whether we've reached our time quota. If we have,
3605 // then we abort.
3606 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3607 if (elapsed_time_ms > _time_target_ms) {
3608 set_has_aborted();
3609 _has_timed_out = true;
3610 statsOnly( ++_aborted_timed_out );
3611 return;
3612 }
3614 // (6) Finally, we check whether there are enough completed STAB
3615 // buffers available for processing. If there are, we abort.
3616 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3617 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3618 if (_cm->verbose_low()) {
3619 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3620 _worker_id);
3621 }
3622 // we do need to process SATB buffers, we'll abort and restart
3623 // the marking task to do so
3624 set_has_aborted();
3625 statsOnly( ++_aborted_satb );
3626 return;
3627 }
3628 }
3630 void CMTask::recalculate_limits() {
3631 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3632 _words_scanned_limit = _real_words_scanned_limit;
3634 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3635 _refs_reached_limit = _real_refs_reached_limit;
3636 }
3638 void CMTask::decrease_limits() {
3639 // This is called when we believe that we're going to do an infrequent
3640 // operation which will increase the per byte scanned cost (i.e. move
3641 // entries to/from the global stack). It basically tries to decrease the
3642 // scanning limit so that the clock is called earlier.
3644 if (_cm->verbose_medium()) {
3645 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3646 }
3648 _words_scanned_limit = _real_words_scanned_limit -
3649 3 * words_scanned_period / 4;
3650 _refs_reached_limit = _real_refs_reached_limit -
3651 3 * refs_reached_period / 4;
3652 }
3654 void CMTask::move_entries_to_global_stack() {
3655 // local array where we'll store the entries that will be popped
3656 // from the local queue
3657 oop buffer[global_stack_transfer_size];
3659 int n = 0;
3660 oop obj;
3661 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3662 buffer[n] = obj;
3663 ++n;
3664 }
3666 if (n > 0) {
3667 // we popped at least one entry from the local queue
3669 statsOnly( ++_global_transfers_to; _local_pops += n );
3671 if (!_cm->mark_stack_push(buffer, n)) {
3672 if (_cm->verbose_low()) {
3673 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3674 _worker_id);
3675 }
3676 set_has_aborted();
3677 } else {
3678 // the transfer was successful
3680 if (_cm->verbose_medium()) {
3681 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3682 _worker_id, n);
3683 }
3684 statsOnly( int tmp_size = _cm->mark_stack_size();
3685 if (tmp_size > _global_max_size) {
3686 _global_max_size = tmp_size;
3687 }
3688 _global_pushes += n );
3689 }
3690 }
3692 // this operation was quite expensive, so decrease the limits
3693 decrease_limits();
3694 }
3696 void CMTask::get_entries_from_global_stack() {
3697 // local array where we'll store the entries that will be popped
3698 // from the global stack.
3699 oop buffer[global_stack_transfer_size];
3700 int n;
3701 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3702 assert(n <= global_stack_transfer_size,
3703 "we should not pop more than the given limit");
3704 if (n > 0) {
3705 // yes, we did actually pop at least one entry
3707 statsOnly( ++_global_transfers_from; _global_pops += n );
3708 if (_cm->verbose_medium()) {
3709 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3710 _worker_id, n);
3711 }
3712 for (int i = 0; i < n; ++i) {
3713 bool success = _task_queue->push(buffer[i]);
3714 // We only call this when the local queue is empty or under a
3715 // given target limit. So, we do not expect this push to fail.
3716 assert(success, "invariant");
3717 }
3719 statsOnly( int tmp_size = _task_queue->size();
3720 if (tmp_size > _local_max_size) {
3721 _local_max_size = tmp_size;
3722 }
3723 _local_pushes += n );
3724 }
3726 // this operation was quite expensive, so decrease the limits
3727 decrease_limits();
3728 }
3730 void CMTask::drain_local_queue(bool partially) {
3731 if (has_aborted()) return;
3733 // Decide what the target size is, depending whether we're going to
3734 // drain it partially (so that other tasks can steal if they run out
3735 // of things to do) or totally (at the very end).
3736 size_t target_size;
3737 if (partially) {
3738 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3739 } else {
3740 target_size = 0;
3741 }
3743 if (_task_queue->size() > target_size) {
3744 if (_cm->verbose_high()) {
3745 gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3746 _worker_id, target_size);
3747 }
3749 oop obj;
3750 bool ret = _task_queue->pop_local(obj);
3751 while (ret) {
3752 statsOnly( ++_local_pops );
3754 if (_cm->verbose_high()) {
3755 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3756 (void*) obj);
3757 }
3759 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3760 assert(!_g1h->is_on_master_free_list(
3761 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3763 scan_object(obj);
3765 if (_task_queue->size() <= target_size || has_aborted()) {
3766 ret = false;
3767 } else {
3768 ret = _task_queue->pop_local(obj);
3769 }
3770 }
3772 if (_cm->verbose_high()) {
3773 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3774 _worker_id, _task_queue->size());
3775 }
3776 }
3777 }
3779 void CMTask::drain_global_stack(bool partially) {
3780 if (has_aborted()) return;
3782 // We have a policy to drain the local queue before we attempt to
3783 // drain the global stack.
3784 assert(partially || _task_queue->size() == 0, "invariant");
3786 // Decide what the target size is, depending whether we're going to
3787 // drain it partially (so that other tasks can steal if they run out
3788 // of things to do) or totally (at the very end). Notice that,
3789 // because we move entries from the global stack in chunks or
3790 // because another task might be doing the same, we might in fact
3791 // drop below the target. But, this is not a problem.
3792 size_t target_size;
3793 if (partially) {
3794 target_size = _cm->partial_mark_stack_size_target();
3795 } else {
3796 target_size = 0;
3797 }
3799 if (_cm->mark_stack_size() > target_size) {
3800 if (_cm->verbose_low()) {
3801 gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3802 _worker_id, target_size);
3803 }
3805 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3806 get_entries_from_global_stack();
3807 drain_local_queue(partially);
3808 }
3810 if (_cm->verbose_low()) {
3811 gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3812 _worker_id, _cm->mark_stack_size());
3813 }
3814 }
3815 }
3817 // SATB Queue has several assumptions on whether to call the par or
3818 // non-par versions of the methods. this is why some of the code is
3819 // replicated. We should really get rid of the single-threaded version
3820 // of the code to simplify things.
3821 void CMTask::drain_satb_buffers() {
3822 if (has_aborted()) return;
3824 // We set this so that the regular clock knows that we're in the
3825 // middle of draining buffers and doesn't set the abort flag when it
3826 // notices that SATB buffers are available for draining. It'd be
3827 // very counter productive if it did that. :-)
3828 _draining_satb_buffers = true;
3830 CMObjectClosure oc(this);
3831 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3832 if (G1CollectedHeap::use_parallel_gc_threads()) {
3833 satb_mq_set.set_par_closure(_worker_id, &oc);
3834 } else {
3835 satb_mq_set.set_closure(&oc);
3836 }
3838 // This keeps claiming and applying the closure to completed buffers
3839 // until we run out of buffers or we need to abort.
3840 if (G1CollectedHeap::use_parallel_gc_threads()) {
3841 while (!has_aborted() &&
3842 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3843 if (_cm->verbose_medium()) {
3844 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3845 }
3846 statsOnly( ++_satb_buffers_processed );
3847 regular_clock_call();
3848 }
3849 } else {
3850 while (!has_aborted() &&
3851 satb_mq_set.apply_closure_to_completed_buffer()) {
3852 if (_cm->verbose_medium()) {
3853 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3854 }
3855 statsOnly( ++_satb_buffers_processed );
3856 regular_clock_call();
3857 }
3858 }
3860 if (!concurrent() && !has_aborted()) {
3861 // We should only do this during remark.
3862 if (G1CollectedHeap::use_parallel_gc_threads()) {
3863 satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3864 } else {
3865 satb_mq_set.iterate_closure_all_threads();
3866 }
3867 }
3869 _draining_satb_buffers = false;
3871 assert(has_aborted() ||
3872 concurrent() ||
3873 satb_mq_set.completed_buffers_num() == 0, "invariant");
3875 if (G1CollectedHeap::use_parallel_gc_threads()) {
3876 satb_mq_set.set_par_closure(_worker_id, NULL);
3877 } else {
3878 satb_mq_set.set_closure(NULL);
3879 }
3881 // again, this was a potentially expensive operation, decrease the
3882 // limits to get the regular clock call early
3883 decrease_limits();
3884 }
3886 void CMTask::print_stats() {
3887 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3888 _worker_id, _calls);
3889 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3890 _elapsed_time_ms, _termination_time_ms);
3891 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3892 _step_times_ms.num(), _step_times_ms.avg(),
3893 _step_times_ms.sd());
3894 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3895 _step_times_ms.maximum(), _step_times_ms.sum());
3897 #if _MARKING_STATS_
3898 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3899 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3900 _all_clock_intervals_ms.sd());
3901 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3902 _all_clock_intervals_ms.maximum(),
3903 _all_clock_intervals_ms.sum());
3904 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3905 _clock_due_to_scanning, _clock_due_to_marking);
3906 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3907 _objs_scanned, _objs_found_on_bitmap);
3908 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3909 _local_pushes, _local_pops, _local_max_size);
3910 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3911 _global_pushes, _global_pops, _global_max_size);
3912 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3913 _global_transfers_to,_global_transfers_from);
3914 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
3915 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3916 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3917 _steal_attempts, _steals);
3918 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3919 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3920 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3921 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3922 _aborted_timed_out, _aborted_satb, _aborted_termination);
3923 #endif // _MARKING_STATS_
3924 }
3926 /*****************************************************************************
3928 The do_marking_step(time_target_ms, ...) method is the building
3929 block of the parallel marking framework. It can be called in parallel
3930 with other invocations of do_marking_step() on different tasks
3931 (but only one per task, obviously) and concurrently with the
3932 mutator threads, or during remark, hence it eliminates the need
3933 for two versions of the code. When called during remark, it will
3934 pick up from where the task left off during the concurrent marking
3935 phase. Interestingly, tasks are also claimable during evacuation
3936 pauses too, since do_marking_step() ensures that it aborts before
3937 it needs to yield.
3939 The data structures that it uses to do marking work are the
3940 following:
3942 (1) Marking Bitmap. If there are gray objects that appear only
3943 on the bitmap (this happens either when dealing with an overflow
3944 or when the initial marking phase has simply marked the roots
3945 and didn't push them on the stack), then tasks claim heap
3946 regions whose bitmap they then scan to find gray objects. A
3947 global finger indicates where the end of the last claimed region
3948 is. A local finger indicates how far into the region a task has
3949 scanned. The two fingers are used to determine how to gray an
3950 object (i.e. whether simply marking it is OK, as it will be
3951 visited by a task in the future, or whether it needs to be also
3952 pushed on a stack).
3954 (2) Local Queue. The local queue of the task which is accessed
3955 reasonably efficiently by the task. Other tasks can steal from
3956 it when they run out of work. Throughout the marking phase, a
3957 task attempts to keep its local queue short but not totally
3958 empty, so that entries are available for stealing by other
3959 tasks. Only when there is no more work, a task will totally
3960 drain its local queue.
3962 (3) Global Mark Stack. This handles local queue overflow. During
3963 marking only sets of entries are moved between it and the local
3964 queues, as access to it requires a mutex and more fine-grain
3965 interaction with it which might cause contention. If it
3966 overflows, then the marking phase should restart and iterate
3967 over the bitmap to identify gray objects. Throughout the marking
3968 phase, tasks attempt to keep the global mark stack at a small
3969 length but not totally empty, so that entries are available for
3970 popping by other tasks. Only when there is no more work, tasks
3971 will totally drain the global mark stack.
3973 (4) SATB Buffer Queue. This is where completed SATB buffers are
3974 made available. Buffers are regularly removed from this queue
3975 and scanned for roots, so that the queue doesn't get too
3976 long. During remark, all completed buffers are processed, as
3977 well as the filled in parts of any uncompleted buffers.
3979 The do_marking_step() method tries to abort when the time target
3980 has been reached. There are a few other cases when the
3981 do_marking_step() method also aborts:
3983 (1) When the marking phase has been aborted (after a Full GC).
3985 (2) When a global overflow (on the global stack) has been
3986 triggered. Before the task aborts, it will actually sync up with
3987 the other tasks to ensure that all the marking data structures
3988 (local queues, stacks, fingers etc.) are re-initialized so that
3989 when do_marking_step() completes, the marking phase can
3990 immediately restart.
3992 (3) When enough completed SATB buffers are available. The
3993 do_marking_step() method only tries to drain SATB buffers right
3994 at the beginning. So, if enough buffers are available, the
3995 marking step aborts and the SATB buffers are processed at
3996 the beginning of the next invocation.
3998 (4) To yield. when we have to yield then we abort and yield
3999 right at the end of do_marking_step(). This saves us from a lot
4000 of hassle as, by yielding we might allow a Full GC. If this
4001 happens then objects will be compacted underneath our feet, the
4002 heap might shrink, etc. We save checking for this by just
4003 aborting and doing the yield right at the end.
4005 From the above it follows that the do_marking_step() method should
4006 be called in a loop (or, otherwise, regularly) until it completes.
4008 If a marking step completes without its has_aborted() flag being
4009 true, it means it has completed the current marking phase (and
4010 also all other marking tasks have done so and have all synced up).
4012 A method called regular_clock_call() is invoked "regularly" (in
4013 sub ms intervals) throughout marking. It is this clock method that
4014 checks all the abort conditions which were mentioned above and
4015 decides when the task should abort. A work-based scheme is used to
4016 trigger this clock method: when the number of object words the
4017 marking phase has scanned or the number of references the marking
4018 phase has visited reach a given limit. Additional invocations to
4019 the method clock have been planted in a few other strategic places
4020 too. The initial reason for the clock method was to avoid calling
4021 vtime too regularly, as it is quite expensive. So, once it was in
4022 place, it was natural to piggy-back all the other conditions on it
4023 too and not constantly check them throughout the code.
4025 If do_termination is true then do_marking_step will enter its
4026 termination protocol.
4028 The value of is_serial must be true when do_marking_step is being
4029 called serially (i.e. by the VMThread) and do_marking_step should
4030 skip any synchronization in the termination and overflow code.
4031 Examples include the serial remark code and the serial reference
4032 processing closures.
4034 The value of is_serial must be false when do_marking_step is
4035 being called by any of the worker threads in a work gang.
4036 Examples include the concurrent marking code (CMMarkingTask),
4037 the MT remark code, and the MT reference processing closures.
4039 *****************************************************************************/
4041 void CMTask::do_marking_step(double time_target_ms,
4042 bool do_termination,
4043 bool is_serial) {
4044 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4045 assert(concurrent() == _cm->concurrent(), "they should be the same");
4047 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4048 assert(_task_queues != NULL, "invariant");
4049 assert(_task_queue != NULL, "invariant");
4050 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4052 assert(!_claimed,
4053 "only one thread should claim this task at any one time");
4055 // OK, this doesn't safeguard again all possible scenarios, as it is
4056 // possible for two threads to set the _claimed flag at the same
4057 // time. But it is only for debugging purposes anyway and it will
4058 // catch most problems.
4059 _claimed = true;
4061 _start_time_ms = os::elapsedVTime() * 1000.0;
4062 statsOnly( _interval_start_time_ms = _start_time_ms );
4064 // If do_stealing is true then do_marking_step will attempt to
4065 // steal work from the other CMTasks. It only makes sense to
4066 // enable stealing when the termination protocol is enabled
4067 // and do_marking_step() is not being called serially.
4068 bool do_stealing = do_termination && !is_serial;
4070 double diff_prediction_ms =
4071 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4072 _time_target_ms = time_target_ms - diff_prediction_ms;
4074 // set up the variables that are used in the work-based scheme to
4075 // call the regular clock method
4076 _words_scanned = 0;
4077 _refs_reached = 0;
4078 recalculate_limits();
4080 // clear all flags
4081 clear_has_aborted();
4082 _has_timed_out = false;
4083 _draining_satb_buffers = false;
4085 ++_calls;
4087 if (_cm->verbose_low()) {
4088 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4089 "target = %1.2lfms >>>>>>>>>>",
4090 _worker_id, _calls, _time_target_ms);
4091 }
4093 // Set up the bitmap and oop closures. Anything that uses them is
4094 // eventually called from this method, so it is OK to allocate these
4095 // statically.
4096 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4097 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4098 set_cm_oop_closure(&cm_oop_closure);
4100 if (_cm->has_overflown()) {
4101 // This can happen if the mark stack overflows during a GC pause
4102 // and this task, after a yield point, restarts. We have to abort
4103 // as we need to get into the overflow protocol which happens
4104 // right at the end of this task.
4105 set_has_aborted();
4106 }
4108 // First drain any available SATB buffers. After this, we will not
4109 // look at SATB buffers before the next invocation of this method.
4110 // If enough completed SATB buffers are queued up, the regular clock
4111 // will abort this task so that it restarts.
4112 drain_satb_buffers();
4113 // ...then partially drain the local queue and the global stack
4114 drain_local_queue(true);
4115 drain_global_stack(true);
4117 do {
4118 if (!has_aborted() && _curr_region != NULL) {
4119 // This means that we're already holding on to a region.
4120 assert(_finger != NULL, "if region is not NULL, then the finger "
4121 "should not be NULL either");
4123 // We might have restarted this task after an evacuation pause
4124 // which might have evacuated the region we're holding on to
4125 // underneath our feet. Let's read its limit again to make sure
4126 // that we do not iterate over a region of the heap that
4127 // contains garbage (update_region_limit() will also move
4128 // _finger to the start of the region if it is found empty).
4129 update_region_limit();
4130 // We will start from _finger not from the start of the region,
4131 // as we might be restarting this task after aborting half-way
4132 // through scanning this region. In this case, _finger points to
4133 // the address where we last found a marked object. If this is a
4134 // fresh region, _finger points to start().
4135 MemRegion mr = MemRegion(_finger, _region_limit);
4137 if (_cm->verbose_low()) {
4138 gclog_or_tty->print_cr("[%u] we're scanning part "
4139 "["PTR_FORMAT", "PTR_FORMAT") "
4140 "of region "HR_FORMAT,
4141 _worker_id, _finger, _region_limit,
4142 HR_FORMAT_PARAMS(_curr_region));
4143 }
4145 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4146 "humongous regions should go around loop once only");
4148 // Some special cases:
4149 // If the memory region is empty, we can just give up the region.
4150 // If the current region is humongous then we only need to check
4151 // the bitmap for the bit associated with the start of the object,
4152 // scan the object if it's live, and give up the region.
4153 // Otherwise, let's iterate over the bitmap of the part of the region
4154 // that is left.
4155 // If the iteration is successful, give up the region.
4156 if (mr.is_empty()) {
4157 giveup_current_region();
4158 regular_clock_call();
4159 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4160 if (_nextMarkBitMap->isMarked(mr.start())) {
4161 // The object is marked - apply the closure
4162 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4163 bitmap_closure.do_bit(offset);
4164 }
4165 // Even if this task aborted while scanning the humongous object
4166 // we can (and should) give up the current region.
4167 giveup_current_region();
4168 regular_clock_call();
4169 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4170 giveup_current_region();
4171 regular_clock_call();
4172 } else {
4173 assert(has_aborted(), "currently the only way to do so");
4174 // The only way to abort the bitmap iteration is to return
4175 // false from the do_bit() method. However, inside the
4176 // do_bit() method we move the _finger to point to the
4177 // object currently being looked at. So, if we bail out, we
4178 // have definitely set _finger to something non-null.
4179 assert(_finger != NULL, "invariant");
4181 // Region iteration was actually aborted. So now _finger
4182 // points to the address of the object we last scanned. If we
4183 // leave it there, when we restart this task, we will rescan
4184 // the object. It is easy to avoid this. We move the finger by
4185 // enough to point to the next possible object header (the
4186 // bitmap knows by how much we need to move it as it knows its
4187 // granularity).
4188 assert(_finger < _region_limit, "invariant");
4189 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4190 // Check if bitmap iteration was aborted while scanning the last object
4191 if (new_finger >= _region_limit) {
4192 giveup_current_region();
4193 } else {
4194 move_finger_to(new_finger);
4195 }
4196 }
4197 }
4198 // At this point we have either completed iterating over the
4199 // region we were holding on to, or we have aborted.
4201 // We then partially drain the local queue and the global stack.
4202 // (Do we really need this?)
4203 drain_local_queue(true);
4204 drain_global_stack(true);
4206 // Read the note on the claim_region() method on why it might
4207 // return NULL with potentially more regions available for
4208 // claiming and why we have to check out_of_regions() to determine
4209 // whether we're done or not.
4210 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4211 // We are going to try to claim a new region. We should have
4212 // given up on the previous one.
4213 // Separated the asserts so that we know which one fires.
4214 assert(_curr_region == NULL, "invariant");
4215 assert(_finger == NULL, "invariant");
4216 assert(_region_limit == NULL, "invariant");
4217 if (_cm->verbose_low()) {
4218 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4219 }
4220 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4221 if (claimed_region != NULL) {
4222 // Yes, we managed to claim one
4223 statsOnly( ++_regions_claimed );
4225 if (_cm->verbose_low()) {
4226 gclog_or_tty->print_cr("[%u] we successfully claimed "
4227 "region "PTR_FORMAT,
4228 _worker_id, claimed_region);
4229 }
4231 setup_for_region(claimed_region);
4232 assert(_curr_region == claimed_region, "invariant");
4233 }
4234 // It is important to call the regular clock here. It might take
4235 // a while to claim a region if, for example, we hit a large
4236 // block of empty regions. So we need to call the regular clock
4237 // method once round the loop to make sure it's called
4238 // frequently enough.
4239 regular_clock_call();
4240 }
4242 if (!has_aborted() && _curr_region == NULL) {
4243 assert(_cm->out_of_regions(),
4244 "at this point we should be out of regions");
4245 }
4246 } while ( _curr_region != NULL && !has_aborted());
4248 if (!has_aborted()) {
4249 // We cannot check whether the global stack is empty, since other
4250 // tasks might be pushing objects to it concurrently.
4251 assert(_cm->out_of_regions(),
4252 "at this point we should be out of regions");
4254 if (_cm->verbose_low()) {
4255 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4256 }
4258 // Try to reduce the number of available SATB buffers so that
4259 // remark has less work to do.
4260 drain_satb_buffers();
4261 }
4263 // Since we've done everything else, we can now totally drain the
4264 // local queue and global stack.
4265 drain_local_queue(false);
4266 drain_global_stack(false);
4268 // Attempt at work stealing from other task's queues.
4269 if (do_stealing && !has_aborted()) {
4270 // We have not aborted. This means that we have finished all that
4271 // we could. Let's try to do some stealing...
4273 // We cannot check whether the global stack is empty, since other
4274 // tasks might be pushing objects to it concurrently.
4275 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4276 "only way to reach here");
4278 if (_cm->verbose_low()) {
4279 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4280 }
4282 while (!has_aborted()) {
4283 oop obj;
4284 statsOnly( ++_steal_attempts );
4286 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4287 if (_cm->verbose_medium()) {
4288 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4289 _worker_id, (void*) obj);
4290 }
4292 statsOnly( ++_steals );
4294 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4295 "any stolen object should be marked");
4296 scan_object(obj);
4298 // And since we're towards the end, let's totally drain the
4299 // local queue and global stack.
4300 drain_local_queue(false);
4301 drain_global_stack(false);
4302 } else {
4303 break;
4304 }
4305 }
4306 }
4308 // If we are about to wrap up and go into termination, check if we
4309 // should raise the overflow flag.
4310 if (do_termination && !has_aborted()) {
4311 if (_cm->force_overflow()->should_force()) {
4312 _cm->set_has_overflown();
4313 regular_clock_call();
4314 }
4315 }
4317 // We still haven't aborted. Now, let's try to get into the
4318 // termination protocol.
4319 if (do_termination && !has_aborted()) {
4320 // We cannot check whether the global stack is empty, since other
4321 // tasks might be concurrently pushing objects on it.
4322 // Separated the asserts so that we know which one fires.
4323 assert(_cm->out_of_regions(), "only way to reach here");
4324 assert(_task_queue->size() == 0, "only way to reach here");
4326 if (_cm->verbose_low()) {
4327 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4328 }
4330 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4332 // The CMTask class also extends the TerminatorTerminator class,
4333 // hence its should_exit_termination() method will also decide
4334 // whether to exit the termination protocol or not.
4335 bool finished = (is_serial ||
4336 _cm->terminator()->offer_termination(this));
4337 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4338 _termination_time_ms +=
4339 termination_end_time_ms - _termination_start_time_ms;
4341 if (finished) {
4342 // We're all done.
4344 if (_worker_id == 0) {
4345 // let's allow task 0 to do this
4346 if (concurrent()) {
4347 assert(_cm->concurrent_marking_in_progress(), "invariant");
4348 // we need to set this to false before the next
4349 // safepoint. This way we ensure that the marking phase
4350 // doesn't observe any more heap expansions.
4351 _cm->clear_concurrent_marking_in_progress();
4352 }
4353 }
4355 // We can now guarantee that the global stack is empty, since
4356 // all other tasks have finished. We separated the guarantees so
4357 // that, if a condition is false, we can immediately find out
4358 // which one.
4359 guarantee(_cm->out_of_regions(), "only way to reach here");
4360 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4361 guarantee(_task_queue->size() == 0, "only way to reach here");
4362 guarantee(!_cm->has_overflown(), "only way to reach here");
4363 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4365 if (_cm->verbose_low()) {
4366 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4367 }
4368 } else {
4369 // Apparently there's more work to do. Let's abort this task. It
4370 // will restart it and we can hopefully find more things to do.
4372 if (_cm->verbose_low()) {
4373 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4374 _worker_id);
4375 }
4377 set_has_aborted();
4378 statsOnly( ++_aborted_termination );
4379 }
4380 }
4382 // Mainly for debugging purposes to make sure that a pointer to the
4383 // closure which was statically allocated in this frame doesn't
4384 // escape it by accident.
4385 set_cm_oop_closure(NULL);
4386 double end_time_ms = os::elapsedVTime() * 1000.0;
4387 double elapsed_time_ms = end_time_ms - _start_time_ms;
4388 // Update the step history.
4389 _step_times_ms.add(elapsed_time_ms);
4391 if (has_aborted()) {
4392 // The task was aborted for some reason.
4394 statsOnly( ++_aborted );
4396 if (_has_timed_out) {
4397 double diff_ms = elapsed_time_ms - _time_target_ms;
4398 // Keep statistics of how well we did with respect to hitting
4399 // our target only if we actually timed out (if we aborted for
4400 // other reasons, then the results might get skewed).
4401 _marking_step_diffs_ms.add(diff_ms);
4402 }
4404 if (_cm->has_overflown()) {
4405 // This is the interesting one. We aborted because a global
4406 // overflow was raised. This means we have to restart the
4407 // marking phase and start iterating over regions. However, in
4408 // order to do this we have to make sure that all tasks stop
4409 // what they are doing and re-initialise in a safe manner. We
4410 // will achieve this with the use of two barrier sync points.
4412 if (_cm->verbose_low()) {
4413 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4414 }
4416 if (!is_serial) {
4417 // We only need to enter the sync barrier if being called
4418 // from a parallel context
4419 _cm->enter_first_sync_barrier(_worker_id);
4421 // When we exit this sync barrier we know that all tasks have
4422 // stopped doing marking work. So, it's now safe to
4423 // re-initialise our data structures. At the end of this method,
4424 // task 0 will clear the global data structures.
4425 }
4427 statsOnly( ++_aborted_overflow );
4429 // We clear the local state of this task...
4430 clear_region_fields();
4432 if (!is_serial) {
4433 // ...and enter the second barrier.
4434 _cm->enter_second_sync_barrier(_worker_id);
4435 }
4436 // At this point, if we're during the concurrent phase of
4437 // marking, everything has been re-initialized and we're
4438 // ready to restart.
4439 }
4441 if (_cm->verbose_low()) {
4442 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4443 "elapsed = %1.2lfms <<<<<<<<<<",
4444 _worker_id, _time_target_ms, elapsed_time_ms);
4445 if (_cm->has_aborted()) {
4446 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4447 _worker_id);
4448 }
4449 }
4450 } else {
4451 if (_cm->verbose_low()) {
4452 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4453 "elapsed = %1.2lfms <<<<<<<<<<",
4454 _worker_id, _time_target_ms, elapsed_time_ms);
4455 }
4456 }
4458 _claimed = false;
4459 }
4461 CMTask::CMTask(uint worker_id,
4462 ConcurrentMark* cm,
4463 size_t* marked_bytes,
4464 BitMap* card_bm,
4465 CMTaskQueue* task_queue,
4466 CMTaskQueueSet* task_queues)
4467 : _g1h(G1CollectedHeap::heap()),
4468 _worker_id(worker_id), _cm(cm),
4469 _claimed(false),
4470 _nextMarkBitMap(NULL), _hash_seed(17),
4471 _task_queue(task_queue),
4472 _task_queues(task_queues),
4473 _cm_oop_closure(NULL),
4474 _marked_bytes_array(marked_bytes),
4475 _card_bm(card_bm) {
4476 guarantee(task_queue != NULL, "invariant");
4477 guarantee(task_queues != NULL, "invariant");
4479 statsOnly( _clock_due_to_scanning = 0;
4480 _clock_due_to_marking = 0 );
4482 _marking_step_diffs_ms.add(0.5);
4483 }
4485 // These are formatting macros that are used below to ensure
4486 // consistent formatting. The *_H_* versions are used to format the
4487 // header for a particular value and they should be kept consistent
4488 // with the corresponding macro. Also note that most of the macros add
4489 // the necessary white space (as a prefix) which makes them a bit
4490 // easier to compose.
4492 // All the output lines are prefixed with this string to be able to
4493 // identify them easily in a large log file.
4494 #define G1PPRL_LINE_PREFIX "###"
4496 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4497 #ifdef _LP64
4498 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4499 #else // _LP64
4500 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4501 #endif // _LP64
4503 // For per-region info
4504 #define G1PPRL_TYPE_FORMAT " %-4s"
4505 #define G1PPRL_TYPE_H_FORMAT " %4s"
4506 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4507 #define G1PPRL_BYTE_H_FORMAT " %9s"
4508 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4509 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4511 // For summary info
4512 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4513 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4514 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4515 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4517 G1PrintRegionLivenessInfoClosure::
4518 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4519 : _out(out),
4520 _total_used_bytes(0), _total_capacity_bytes(0),
4521 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4522 _hum_used_bytes(0), _hum_capacity_bytes(0),
4523 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4524 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4525 MemRegion g1_committed = g1h->g1_committed();
4526 MemRegion g1_reserved = g1h->g1_reserved();
4527 double now = os::elapsedTime();
4529 // Print the header of the output.
4530 _out->cr();
4531 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4532 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4533 G1PPRL_SUM_ADDR_FORMAT("committed")
4534 G1PPRL_SUM_ADDR_FORMAT("reserved")
4535 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4536 g1_committed.start(), g1_committed.end(),
4537 g1_reserved.start(), g1_reserved.end(),
4538 HeapRegion::GrainBytes);
4539 _out->print_cr(G1PPRL_LINE_PREFIX);
4540 _out->print_cr(G1PPRL_LINE_PREFIX
4541 G1PPRL_TYPE_H_FORMAT
4542 G1PPRL_ADDR_BASE_H_FORMAT
4543 G1PPRL_BYTE_H_FORMAT
4544 G1PPRL_BYTE_H_FORMAT
4545 G1PPRL_BYTE_H_FORMAT
4546 G1PPRL_DOUBLE_H_FORMAT,
4547 "type", "address-range",
4548 "used", "prev-live", "next-live", "gc-eff");
4549 _out->print_cr(G1PPRL_LINE_PREFIX
4550 G1PPRL_TYPE_H_FORMAT
4551 G1PPRL_ADDR_BASE_H_FORMAT
4552 G1PPRL_BYTE_H_FORMAT
4553 G1PPRL_BYTE_H_FORMAT
4554 G1PPRL_BYTE_H_FORMAT
4555 G1PPRL_DOUBLE_H_FORMAT,
4556 "", "",
4557 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4558 }
4560 // It takes as a parameter a reference to one of the _hum_* fields, it
4561 // deduces the corresponding value for a region in a humongous region
4562 // series (either the region size, or what's left if the _hum_* field
4563 // is < the region size), and updates the _hum_* field accordingly.
4564 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4565 size_t bytes = 0;
4566 // The > 0 check is to deal with the prev and next live bytes which
4567 // could be 0.
4568 if (*hum_bytes > 0) {
4569 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4570 *hum_bytes -= bytes;
4571 }
4572 return bytes;
4573 }
4575 // It deduces the values for a region in a humongous region series
4576 // from the _hum_* fields and updates those accordingly. It assumes
4577 // that that _hum_* fields have already been set up from the "starts
4578 // humongous" region and we visit the regions in address order.
4579 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4580 size_t* capacity_bytes,
4581 size_t* prev_live_bytes,
4582 size_t* next_live_bytes) {
4583 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4584 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4585 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4586 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4587 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4588 }
4590 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4591 const char* type = "";
4592 HeapWord* bottom = r->bottom();
4593 HeapWord* end = r->end();
4594 size_t capacity_bytes = r->capacity();
4595 size_t used_bytes = r->used();
4596 size_t prev_live_bytes = r->live_bytes();
4597 size_t next_live_bytes = r->next_live_bytes();
4598 double gc_eff = r->gc_efficiency();
4599 if (r->used() == 0) {
4600 type = "FREE";
4601 } else if (r->is_survivor()) {
4602 type = "SURV";
4603 } else if (r->is_young()) {
4604 type = "EDEN";
4605 } else if (r->startsHumongous()) {
4606 type = "HUMS";
4608 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4609 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4610 "they should have been zeroed after the last time we used them");
4611 // Set up the _hum_* fields.
4612 _hum_capacity_bytes = capacity_bytes;
4613 _hum_used_bytes = used_bytes;
4614 _hum_prev_live_bytes = prev_live_bytes;
4615 _hum_next_live_bytes = next_live_bytes;
4616 get_hum_bytes(&used_bytes, &capacity_bytes,
4617 &prev_live_bytes, &next_live_bytes);
4618 end = bottom + HeapRegion::GrainWords;
4619 } else if (r->continuesHumongous()) {
4620 type = "HUMC";
4621 get_hum_bytes(&used_bytes, &capacity_bytes,
4622 &prev_live_bytes, &next_live_bytes);
4623 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4624 } else {
4625 type = "OLD";
4626 }
4628 _total_used_bytes += used_bytes;
4629 _total_capacity_bytes += capacity_bytes;
4630 _total_prev_live_bytes += prev_live_bytes;
4631 _total_next_live_bytes += next_live_bytes;
4633 // Print a line for this particular region.
4634 _out->print_cr(G1PPRL_LINE_PREFIX
4635 G1PPRL_TYPE_FORMAT
4636 G1PPRL_ADDR_BASE_FORMAT
4637 G1PPRL_BYTE_FORMAT
4638 G1PPRL_BYTE_FORMAT
4639 G1PPRL_BYTE_FORMAT
4640 G1PPRL_DOUBLE_FORMAT,
4641 type, bottom, end,
4642 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4644 return false;
4645 }
4647 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4648 // Print the footer of the output.
4649 _out->print_cr(G1PPRL_LINE_PREFIX);
4650 _out->print_cr(G1PPRL_LINE_PREFIX
4651 " SUMMARY"
4652 G1PPRL_SUM_MB_FORMAT("capacity")
4653 G1PPRL_SUM_MB_PERC_FORMAT("used")
4654 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4655 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4656 bytes_to_mb(_total_capacity_bytes),
4657 bytes_to_mb(_total_used_bytes),
4658 perc(_total_used_bytes, _total_capacity_bytes),
4659 bytes_to_mb(_total_prev_live_bytes),
4660 perc(_total_prev_live_bytes, _total_capacity_bytes),
4661 bytes_to_mb(_total_next_live_bytes),
4662 perc(_total_next_live_bytes, _total_capacity_bytes));
4663 _out->cr();
4664 }