Wed, 19 Jan 2011 09:35:17 -0500
7011379: G1: overly long concurrent marking cycles
Summary: This changeset introduces filtering of SATB buffers at the point when they are about to be enqueued. If this filtering clears enough entries on each buffer, the buffer can then be re-used and not enqueued. This cuts down the number of SATB buffers that need to be processed by the concurrent marking threads.
Reviewed-by: johnc, ysr
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1RemSet.hpp"
32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
34 #include "gc_implementation/shared/vmGCOperations.hpp"
35 #include "memory/genOopClosures.inline.hpp"
36 #include "memory/referencePolicy.hpp"
37 #include "memory/resourceArea.hpp"
38 #include "oops/oop.inline.hpp"
39 #include "runtime/handles.inline.hpp"
40 #include "runtime/java.hpp"
42 //
43 // CMS Bit Map Wrapper
45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
46 _bm((uintptr_t*)NULL,0),
47 _shifter(shifter) {
48 _bmStartWord = (HeapWord*)(rs.base());
49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
50 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
54 // For now we'll just commit all of the bit map up fromt.
55 // Later on we'll try to be more parsimonious with swap.
56 guarantee(_virtual_space.initialize(brs, brs.size()),
57 "couldn't reseve backing store for CMS bit map");
58 assert(_virtual_space.committed_size() == brs.size(),
59 "didn't reserve backing store for all of CMS bit map?");
60 _bm.set_map((uintptr_t*)_virtual_space.low());
61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
62 _bmWordSize, "inconsistency in bit map sizing");
63 _bm.set_size(_bmWordSize >> _shifter);
64 }
66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
67 HeapWord* limit) const {
68 // First we must round addr *up* to a possible object boundary.
69 addr = (HeapWord*)align_size_up((intptr_t)addr,
70 HeapWordSize << _shifter);
71 size_t addrOffset = heapWordToOffset(addr);
72 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
73 size_t limitOffset = heapWordToOffset(limit);
74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
75 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
76 assert(nextAddr >= addr, "get_next_one postcondition");
77 assert(nextAddr == limit || isMarked(nextAddr),
78 "get_next_one postcondition");
79 return nextAddr;
80 }
82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
83 HeapWord* limit) const {
84 size_t addrOffset = heapWordToOffset(addr);
85 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
86 size_t limitOffset = heapWordToOffset(limit);
87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
88 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
89 assert(nextAddr >= addr, "get_next_one postcondition");
90 assert(nextAddr == limit || !isMarked(nextAddr),
91 "get_next_one postcondition");
92 return nextAddr;
93 }
95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
97 return (int) (diff >> _shifter);
98 }
100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
101 HeapWord* left = MAX2(_bmStartWord, mr.start());
102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
103 if (right > left) {
104 // Right-open interval [leftOffset, rightOffset).
105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
106 } else {
107 return true;
108 }
109 }
111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
112 size_t from_start_index,
113 HeapWord* to_start_word,
114 size_t word_num) {
115 _bm.mostly_disjoint_range_union(from_bitmap,
116 from_start_index,
117 heapWordToOffset(to_start_word),
118 word_num);
119 }
121 #ifndef PRODUCT
122 bool CMBitMapRO::covers(ReservedSpace rs) const {
123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
125 "size inconsistency");
126 return _bmStartWord == (HeapWord*)(rs.base()) &&
127 _bmWordSize == rs.size()>>LogHeapWordSize;
128 }
129 #endif
131 void CMBitMap::clearAll() {
132 _bm.clear();
133 return;
134 }
136 void CMBitMap::markRange(MemRegion mr) {
137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 assert(!mr.is_empty(), "unexpected empty region");
139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
140 ((HeapWord *) mr.end())),
141 "markRange memory region end is not card aligned");
142 // convert address range into offset range
143 _bm.at_put_range(heapWordToOffset(mr.start()),
144 heapWordToOffset(mr.end()), true);
145 }
147 void CMBitMap::clearRange(MemRegion mr) {
148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
149 assert(!mr.is_empty(), "unexpected empty region");
150 // convert address range into offset range
151 _bm.at_put_range(heapWordToOffset(mr.start()),
152 heapWordToOffset(mr.end()), false);
153 }
155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
156 HeapWord* end_addr) {
157 HeapWord* start = getNextMarkedWordAddress(addr);
158 start = MIN2(start, end_addr);
159 HeapWord* end = getNextUnmarkedWordAddress(start);
160 end = MIN2(end, end_addr);
161 assert(start <= end, "Consistency check");
162 MemRegion mr(start, end);
163 if (!mr.is_empty()) {
164 clearRange(mr);
165 }
166 return mr;
167 }
169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
170 _base(NULL), _cm(cm)
171 #ifdef ASSERT
172 , _drain_in_progress(false)
173 , _drain_in_progress_yields(false)
174 #endif
175 {}
177 void CMMarkStack::allocate(size_t size) {
178 _base = NEW_C_HEAP_ARRAY(oop, size);
179 if (_base == NULL)
180 vm_exit_during_initialization("Failed to allocate "
181 "CM region mark stack");
182 _index = 0;
183 // QQQQ cast ...
184 _capacity = (jint) size;
185 _oops_do_bound = -1;
186 NOT_PRODUCT(_max_depth = 0);
187 }
189 CMMarkStack::~CMMarkStack() {
190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
191 }
193 void CMMarkStack::par_push(oop ptr) {
194 while (true) {
195 if (isFull()) {
196 _overflow = true;
197 return;
198 }
199 // Otherwise...
200 jint index = _index;
201 jint next_index = index+1;
202 jint res = Atomic::cmpxchg(next_index, &_index, index);
203 if (res == index) {
204 _base[index] = ptr;
205 // Note that we don't maintain this atomically. We could, but it
206 // doesn't seem necessary.
207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
208 return;
209 }
210 // Otherwise, we need to try again.
211 }
212 }
214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
215 while (true) {
216 if (isFull()) {
217 _overflow = true;
218 return;
219 }
220 // Otherwise...
221 jint index = _index;
222 jint next_index = index + n;
223 if (next_index > _capacity) {
224 _overflow = true;
225 return;
226 }
227 jint res = Atomic::cmpxchg(next_index, &_index, index);
228 if (res == index) {
229 for (int i = 0; i < n; i++) {
230 int ind = index + i;
231 assert(ind < _capacity, "By overflow test above.");
232 _base[ind] = ptr_arr[i];
233 }
234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
235 return;
236 }
237 // Otherwise, we need to try again.
238 }
239 }
242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
244 jint start = _index;
245 jint next_index = start + n;
246 if (next_index > _capacity) {
247 _overflow = true;
248 return;
249 }
250 // Otherwise.
251 _index = next_index;
252 for (int i = 0; i < n; i++) {
253 int ind = start + i;
254 assert(ind < _capacity, "By overflow test above.");
255 _base[ind] = ptr_arr[i];
256 }
257 }
260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
262 jint index = _index;
263 if (index == 0) {
264 *n = 0;
265 return false;
266 } else {
267 int k = MIN2(max, index);
268 jint new_ind = index - k;
269 for (int j = 0; j < k; j++) {
270 ptr_arr[j] = _base[new_ind + j];
271 }
272 _index = new_ind;
273 *n = k;
274 return true;
275 }
276 }
279 CMRegionStack::CMRegionStack() : _base(NULL) {}
281 void CMRegionStack::allocate(size_t size) {
282 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
283 if (_base == NULL)
284 vm_exit_during_initialization("Failed to allocate "
285 "CM region mark stack");
286 _index = 0;
287 // QQQQ cast ...
288 _capacity = (jint) size;
289 }
291 CMRegionStack::~CMRegionStack() {
292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
293 }
295 void CMRegionStack::push_lock_free(MemRegion mr) {
296 assert(mr.word_size() > 0, "Precondition");
297 while (true) {
298 jint index = _index;
300 if (index >= _capacity) {
301 _overflow = true;
302 return;
303 }
304 // Otherwise...
305 jint next_index = index+1;
306 jint res = Atomic::cmpxchg(next_index, &_index, index);
307 if (res == index) {
308 _base[index] = mr;
309 return;
310 }
311 // Otherwise, we need to try again.
312 }
313 }
315 // Lock-free pop of the region stack. Called during the concurrent
316 // marking / remark phases. Should only be called in tandem with
317 // other lock-free pops.
318 MemRegion CMRegionStack::pop_lock_free() {
319 while (true) {
320 jint index = _index;
322 if (index == 0) {
323 return MemRegion();
324 }
325 // Otherwise...
326 jint next_index = index-1;
327 jint res = Atomic::cmpxchg(next_index, &_index, index);
328 if (res == index) {
329 MemRegion mr = _base[next_index];
330 if (mr.start() != NULL) {
331 assert(mr.end() != NULL, "invariant");
332 assert(mr.word_size() > 0, "invariant");
333 return mr;
334 } else {
335 // that entry was invalidated... let's skip it
336 assert(mr.end() == NULL, "invariant");
337 }
338 }
339 // Otherwise, we need to try again.
340 }
341 }
343 #if 0
344 // The routines that manipulate the region stack with a lock are
345 // not currently used. They should be retained, however, as a
346 // diagnostic aid.
348 void CMRegionStack::push_with_lock(MemRegion mr) {
349 assert(mr.word_size() > 0, "Precondition");
350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
352 if (isFull()) {
353 _overflow = true;
354 return;
355 }
357 _base[_index] = mr;
358 _index += 1;
359 }
361 MemRegion CMRegionStack::pop_with_lock() {
362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
364 while (true) {
365 if (_index == 0) {
366 return MemRegion();
367 }
368 _index -= 1;
370 MemRegion mr = _base[_index];
371 if (mr.start() != NULL) {
372 assert(mr.end() != NULL, "invariant");
373 assert(mr.word_size() > 0, "invariant");
374 return mr;
375 } else {
376 // that entry was invalidated... let's skip it
377 assert(mr.end() == NULL, "invariant");
378 }
379 }
380 }
381 #endif
383 bool CMRegionStack::invalidate_entries_into_cset() {
384 bool result = false;
385 G1CollectedHeap* g1h = G1CollectedHeap::heap();
386 for (int i = 0; i < _oops_do_bound; ++i) {
387 MemRegion mr = _base[i];
388 if (mr.start() != NULL) {
389 assert(mr.end() != NULL, "invariant");
390 assert(mr.word_size() > 0, "invariant");
391 HeapRegion* hr = g1h->heap_region_containing(mr.start());
392 assert(hr != NULL, "invariant");
393 if (hr->in_collection_set()) {
394 // The region points into the collection set
395 _base[i] = MemRegion();
396 result = true;
397 }
398 } else {
399 // that entry was invalidated... let's skip it
400 assert(mr.end() == NULL, "invariant");
401 }
402 }
403 return result;
404 }
406 template<class OopClosureClass>
407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
409 || SafepointSynchronize::is_at_safepoint(),
410 "Drain recursion must be yield-safe.");
411 bool res = true;
412 debug_only(_drain_in_progress = true);
413 debug_only(_drain_in_progress_yields = yield_after);
414 while (!isEmpty()) {
415 oop newOop = pop();
416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
417 assert(newOop->is_oop(), "Expected an oop");
418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
419 "only grey objects on this stack");
420 // iterate over the oops in this oop, marking and pushing
421 // the ones in CMS generation.
422 newOop->oop_iterate(cl);
423 if (yield_after && _cm->do_yield_check()) {
424 res = false; break;
425 }
426 }
427 debug_only(_drain_in_progress = false);
428 return res;
429 }
431 void CMMarkStack::oops_do(OopClosure* f) {
432 if (_index == 0) return;
433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
434 "Bound must be set.");
435 for (int i = 0; i < _oops_do_bound; i++) {
436 f->do_oop(&_base[i]);
437 }
438 _oops_do_bound = -1;
439 }
441 bool ConcurrentMark::not_yet_marked(oop obj) const {
442 return (_g1h->is_obj_ill(obj)
443 || (_g1h->is_in_permanent(obj)
444 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
445 }
447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
449 #endif // _MSC_VER
451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
452 int max_regions) :
453 _markBitMap1(rs, MinObjAlignment - 1),
454 _markBitMap2(rs, MinObjAlignment - 1),
456 _parallel_marking_threads(0),
457 _sleep_factor(0.0),
458 _marking_task_overhead(1.0),
459 _cleanup_sleep_factor(0.0),
460 _cleanup_task_overhead(1.0),
461 _region_bm(max_regions, false /* in_resource_area*/),
462 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
463 CardTableModRefBS::card_shift,
464 false /* in_resource_area*/),
465 _prevMarkBitMap(&_markBitMap1),
466 _nextMarkBitMap(&_markBitMap2),
467 _at_least_one_mark_complete(false),
469 _markStack(this),
470 _regionStack(),
471 // _finger set in set_non_marking_state
473 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
474 // _active_tasks set in set_non_marking_state
475 // _tasks set inside the constructor
476 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
477 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
479 _has_overflown(false),
480 _concurrent(false),
481 _has_aborted(false),
482 _restart_for_overflow(false),
483 _concurrent_marking_in_progress(false),
484 _should_gray_objects(false),
486 // _verbose_level set below
488 _init_times(),
489 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
490 _cleanup_times(),
491 _total_counting_time(0.0),
492 _total_rs_scrub_time(0.0),
494 _parallel_workers(NULL)
495 {
496 CMVerboseLevel verbose_level =
497 (CMVerboseLevel) G1MarkingVerboseLevel;
498 if (verbose_level < no_verbose)
499 verbose_level = no_verbose;
500 if (verbose_level > high_verbose)
501 verbose_level = high_verbose;
502 _verbose_level = verbose_level;
504 if (verbose_low())
505 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
506 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
508 _markStack.allocate(MarkStackSize);
509 _regionStack.allocate(G1MarkRegionStackSize);
511 // Create & start a ConcurrentMark thread.
512 _cmThread = new ConcurrentMarkThread(this);
513 assert(cmThread() != NULL, "CM Thread should have been created");
514 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
516 _g1h = G1CollectedHeap::heap();
517 assert(CGC_lock != NULL, "Where's the CGC_lock?");
518 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
519 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
521 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
522 satb_qs.set_buffer_size(G1SATBBufferSize);
524 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
525 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
526 for (int i = 0 ; i < size; i++) {
527 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
528 }
530 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
531 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
533 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
534 _active_tasks = _max_task_num;
535 for (int i = 0; i < (int) _max_task_num; ++i) {
536 CMTaskQueue* task_queue = new CMTaskQueue();
537 task_queue->initialize();
538 _task_queues->register_queue(i, task_queue);
540 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
541 _accum_task_vtime[i] = 0.0;
542 }
544 if (ConcGCThreads > ParallelGCThreads) {
545 vm_exit_during_initialization("Can't have more ConcGCThreads "
546 "than ParallelGCThreads.");
547 }
548 if (ParallelGCThreads == 0) {
549 // if we are not running with any parallel GC threads we will not
550 // spawn any marking threads either
551 _parallel_marking_threads = 0;
552 _sleep_factor = 0.0;
553 _marking_task_overhead = 1.0;
554 } else {
555 if (ConcGCThreads > 0) {
556 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
557 // if both are set
559 _parallel_marking_threads = ConcGCThreads;
560 _sleep_factor = 0.0;
561 _marking_task_overhead = 1.0;
562 } else if (G1MarkingOverheadPercent > 0) {
563 // we will calculate the number of parallel marking threads
564 // based on a target overhead with respect to the soft real-time
565 // goal
567 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
568 double overall_cm_overhead =
569 (double) MaxGCPauseMillis * marking_overhead /
570 (double) GCPauseIntervalMillis;
571 double cpu_ratio = 1.0 / (double) os::processor_count();
572 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
573 double marking_task_overhead =
574 overall_cm_overhead / marking_thread_num *
575 (double) os::processor_count();
576 double sleep_factor =
577 (1.0 - marking_task_overhead) / marking_task_overhead;
579 _parallel_marking_threads = (size_t) marking_thread_num;
580 _sleep_factor = sleep_factor;
581 _marking_task_overhead = marking_task_overhead;
582 } else {
583 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
584 _sleep_factor = 0.0;
585 _marking_task_overhead = 1.0;
586 }
588 if (parallel_marking_threads() > 1)
589 _cleanup_task_overhead = 1.0;
590 else
591 _cleanup_task_overhead = marking_task_overhead();
592 _cleanup_sleep_factor =
593 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
595 #if 0
596 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
597 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
598 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
599 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
600 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
601 #endif
603 guarantee(parallel_marking_threads() > 0, "peace of mind");
604 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
605 (int) _parallel_marking_threads, false, true);
606 if (_parallel_workers == NULL) {
607 vm_exit_during_initialization("Failed necessary allocation.");
608 } else {
609 _parallel_workers->initialize_workers();
610 }
611 }
613 // so that the call below can read a sensible value
614 _heap_start = (HeapWord*) rs.base();
615 set_non_marking_state();
616 }
618 void ConcurrentMark::update_g1_committed(bool force) {
619 // If concurrent marking is not in progress, then we do not need to
620 // update _heap_end. This has a subtle and important
621 // side-effect. Imagine that two evacuation pauses happen between
622 // marking completion and remark. The first one can grow the
623 // heap (hence now the finger is below the heap end). Then, the
624 // second one could unnecessarily push regions on the region
625 // stack. This causes the invariant that the region stack is empty
626 // at the beginning of remark to be false. By ensuring that we do
627 // not observe heap expansions after marking is complete, then we do
628 // not have this problem.
629 if (!concurrent_marking_in_progress() && !force)
630 return;
632 MemRegion committed = _g1h->g1_committed();
633 assert(committed.start() == _heap_start, "start shouldn't change");
634 HeapWord* new_end = committed.end();
635 if (new_end > _heap_end) {
636 // The heap has been expanded.
638 _heap_end = new_end;
639 }
640 // Notice that the heap can also shrink. However, this only happens
641 // during a Full GC (at least currently) and the entire marking
642 // phase will bail out and the task will not be restarted. So, let's
643 // do nothing.
644 }
646 void ConcurrentMark::reset() {
647 // Starting values for these two. This should be called in a STW
648 // phase. CM will be notified of any future g1_committed expansions
649 // will be at the end of evacuation pauses, when tasks are
650 // inactive.
651 MemRegion committed = _g1h->g1_committed();
652 _heap_start = committed.start();
653 _heap_end = committed.end();
655 // Separated the asserts so that we know which one fires.
656 assert(_heap_start != NULL, "heap bounds should look ok");
657 assert(_heap_end != NULL, "heap bounds should look ok");
658 assert(_heap_start < _heap_end, "heap bounds should look ok");
660 // reset all the marking data structures and any necessary flags
661 clear_marking_state();
663 if (verbose_low())
664 gclog_or_tty->print_cr("[global] resetting");
666 // We do reset all of them, since different phases will use
667 // different number of active threads. So, it's easiest to have all
668 // of them ready.
669 for (int i = 0; i < (int) _max_task_num; ++i) {
670 _tasks[i]->reset(_nextMarkBitMap);
671 }
673 // we need this to make sure that the flag is on during the evac
674 // pause with initial mark piggy-backed
675 set_concurrent_marking_in_progress();
676 }
678 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
679 assert(active_tasks <= _max_task_num, "we should not have more");
681 _active_tasks = active_tasks;
682 // Need to update the three data structures below according to the
683 // number of active threads for this phase.
684 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
685 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
686 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
688 _concurrent = concurrent;
689 // We propagate this to all tasks, not just the active ones.
690 for (int i = 0; i < (int) _max_task_num; ++i)
691 _tasks[i]->set_concurrent(concurrent);
693 if (concurrent) {
694 set_concurrent_marking_in_progress();
695 } else {
696 // We currently assume that the concurrent flag has been set to
697 // false before we start remark. At this point we should also be
698 // in a STW phase.
699 assert(!concurrent_marking_in_progress(), "invariant");
700 assert(_finger == _heap_end, "only way to get here");
701 update_g1_committed(true);
702 }
703 }
705 void ConcurrentMark::set_non_marking_state() {
706 // We set the global marking state to some default values when we're
707 // not doing marking.
708 clear_marking_state();
709 _active_tasks = 0;
710 clear_concurrent_marking_in_progress();
711 }
713 ConcurrentMark::~ConcurrentMark() {
714 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
715 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
716 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
717 _par_cleanup_thread_state);
719 for (int i = 0; i < (int) _max_task_num; ++i) {
720 delete _task_queues->queue(i);
721 delete _tasks[i];
722 }
723 delete _task_queues;
724 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
725 }
727 // This closure is used to mark refs into the g1 generation
728 // from external roots in the CMS bit map.
729 // Called at the first checkpoint.
730 //
732 void ConcurrentMark::clearNextBitmap() {
733 G1CollectedHeap* g1h = G1CollectedHeap::heap();
734 G1CollectorPolicy* g1p = g1h->g1_policy();
736 // Make sure that the concurrent mark thread looks to still be in
737 // the current cycle.
738 guarantee(cmThread()->during_cycle(), "invariant");
740 // We are finishing up the current cycle by clearing the next
741 // marking bitmap and getting it ready for the next cycle. During
742 // this time no other cycle can start. So, let's make sure that this
743 // is the case.
744 guarantee(!g1h->mark_in_progress(), "invariant");
746 // clear the mark bitmap (no grey objects to start with).
747 // We need to do this in chunks and offer to yield in between
748 // each chunk.
749 HeapWord* start = _nextMarkBitMap->startWord();
750 HeapWord* end = _nextMarkBitMap->endWord();
751 HeapWord* cur = start;
752 size_t chunkSize = M;
753 while (cur < end) {
754 HeapWord* next = cur + chunkSize;
755 if (next > end)
756 next = end;
757 MemRegion mr(cur,next);
758 _nextMarkBitMap->clearRange(mr);
759 cur = next;
760 do_yield_check();
762 // Repeat the asserts from above. We'll do them as asserts here to
763 // minimize their overhead on the product. However, we'll have
764 // them as guarantees at the beginning / end of the bitmap
765 // clearing to get some checking in the product.
766 assert(cmThread()->during_cycle(), "invariant");
767 assert(!g1h->mark_in_progress(), "invariant");
768 }
770 // Repeat the asserts from above.
771 guarantee(cmThread()->during_cycle(), "invariant");
772 guarantee(!g1h->mark_in_progress(), "invariant");
773 }
775 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
776 public:
777 bool doHeapRegion(HeapRegion* r) {
778 if (!r->continuesHumongous()) {
779 r->note_start_of_marking(true);
780 }
781 return false;
782 }
783 };
785 void ConcurrentMark::checkpointRootsInitialPre() {
786 G1CollectedHeap* g1h = G1CollectedHeap::heap();
787 G1CollectorPolicy* g1p = g1h->g1_policy();
789 _has_aborted = false;
791 #ifndef PRODUCT
792 if (G1PrintReachableAtInitialMark) {
793 print_reachable("at-cycle-start",
794 true /* use_prev_marking */, true /* all */);
795 }
796 #endif
798 // Initialise marking structures. This has to be done in a STW phase.
799 reset();
800 }
802 class CMMarkRootsClosure: public OopsInGenClosure {
803 private:
804 ConcurrentMark* _cm;
805 G1CollectedHeap* _g1h;
806 bool _do_barrier;
808 public:
809 CMMarkRootsClosure(ConcurrentMark* cm,
810 G1CollectedHeap* g1h,
811 bool do_barrier) : _cm(cm), _g1h(g1h),
812 _do_barrier(do_barrier) { }
814 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
815 virtual void do_oop( oop* p) { do_oop_work(p); }
817 template <class T> void do_oop_work(T* p) {
818 T heap_oop = oopDesc::load_heap_oop(p);
819 if (!oopDesc::is_null(heap_oop)) {
820 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
821 assert(obj->is_oop() || obj->mark() == NULL,
822 "expected an oop, possibly with mark word displaced");
823 HeapWord* addr = (HeapWord*)obj;
824 if (_g1h->is_in_g1_reserved(addr)) {
825 _cm->grayRoot(obj);
826 }
827 }
828 if (_do_barrier) {
829 assert(!_g1h->is_in_g1_reserved(p),
830 "Should be called on external roots");
831 do_barrier(p);
832 }
833 }
834 };
836 void ConcurrentMark::checkpointRootsInitialPost() {
837 G1CollectedHeap* g1h = G1CollectedHeap::heap();
839 // For each region note start of marking.
840 NoteStartOfMarkHRClosure startcl;
841 g1h->heap_region_iterate(&startcl);
843 // Start weak-reference discovery.
844 ReferenceProcessor* rp = g1h->ref_processor();
845 rp->verify_no_references_recorded();
846 rp->enable_discovery(); // enable ("weak") refs discovery
847 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
849 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
850 // This is the start of the marking cycle, we're expected all
851 // threads to have SATB queues with active set to false.
852 satb_mq_set.set_active_all_threads(true, /* new active value */
853 false /* expected_active */);
855 // update_g1_committed() will be called at the end of an evac pause
856 // when marking is on. So, it's also called at the end of the
857 // initial-mark pause to update the heap end, if the heap expands
858 // during it. No need to call it here.
859 }
861 // Checkpoint the roots into this generation from outside
862 // this generation. [Note this initial checkpoint need only
863 // be approximate -- we'll do a catch up phase subsequently.]
864 void ConcurrentMark::checkpointRootsInitial() {
865 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
866 G1CollectedHeap* g1h = G1CollectedHeap::heap();
868 double start = os::elapsedTime();
870 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
871 g1p->record_concurrent_mark_init_start();
872 checkpointRootsInitialPre();
874 // YSR: when concurrent precleaning is in place, we'll
875 // need to clear the cached card table here
877 ResourceMark rm;
878 HandleMark hm;
880 g1h->ensure_parsability(false);
881 g1h->perm_gen()->save_marks();
883 CMMarkRootsClosure notOlder(this, g1h, false);
884 CMMarkRootsClosure older(this, g1h, true);
886 g1h->set_marking_started();
887 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
889 g1h->process_strong_roots(true, // activate StrongRootsScope
890 false, // fake perm gen collection
891 SharedHeap::SO_AllClasses,
892 ¬Older, // Regular roots
893 NULL, // do not visit active blobs
894 &older // Perm Gen Roots
895 );
896 checkpointRootsInitialPost();
898 // Statistics.
899 double end = os::elapsedTime();
900 _init_times.add((end - start) * 1000.0);
902 g1p->record_concurrent_mark_init_end();
903 }
905 /*
906 Notice that in the next two methods, we actually leave the STS
907 during the barrier sync and join it immediately afterwards. If we
908 do not do this, this then the following deadlock can occur: one
909 thread could be in the barrier sync code, waiting for the other
910 thread to also sync up, whereas another one could be trying to
911 yield, while also waiting for the other threads to sync up too.
913 Because the thread that does the sync barrier has left the STS, it
914 is possible to be suspended for a Full GC or an evacuation pause
915 could occur. This is actually safe, since the entering the sync
916 barrier is one of the last things do_marking_step() does, and it
917 doesn't manipulate any data structures afterwards.
918 */
920 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
921 if (verbose_low())
922 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
924 ConcurrentGCThread::stsLeave();
925 _first_overflow_barrier_sync.enter();
926 ConcurrentGCThread::stsJoin();
927 // at this point everyone should have synced up and not be doing any
928 // more work
930 if (verbose_low())
931 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
933 // let task 0 do this
934 if (task_num == 0) {
935 // task 0 is responsible for clearing the global data structures
936 clear_marking_state();
938 if (PrintGC) {
939 gclog_or_tty->date_stamp(PrintGCDateStamps);
940 gclog_or_tty->stamp(PrintGCTimeStamps);
941 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
942 }
943 }
945 // after this, each task should reset its own data structures then
946 // then go into the second barrier
947 }
949 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
950 if (verbose_low())
951 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
953 ConcurrentGCThread::stsLeave();
954 _second_overflow_barrier_sync.enter();
955 ConcurrentGCThread::stsJoin();
956 // at this point everything should be re-initialised and ready to go
958 if (verbose_low())
959 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
960 }
962 void ConcurrentMark::grayRoot(oop p) {
963 HeapWord* addr = (HeapWord*) p;
964 // We can't really check against _heap_start and _heap_end, since it
965 // is possible during an evacuation pause with piggy-backed
966 // initial-mark that the committed space is expanded during the
967 // pause without CM observing this change. So the assertions below
968 // is a bit conservative; but better than nothing.
969 assert(_g1h->g1_committed().contains(addr),
970 "address should be within the heap bounds");
972 if (!_nextMarkBitMap->isMarked(addr))
973 _nextMarkBitMap->parMark(addr);
974 }
976 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
977 // The objects on the region have already been marked "in bulk" by
978 // the caller. We only need to decide whether to push the region on
979 // the region stack or not.
981 if (!concurrent_marking_in_progress() || !_should_gray_objects)
982 // We're done with marking and waiting for remark. We do not need to
983 // push anything else on the region stack.
984 return;
986 HeapWord* finger = _finger;
988 if (verbose_low())
989 gclog_or_tty->print_cr("[global] attempting to push "
990 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
991 PTR_FORMAT, mr.start(), mr.end(), finger);
993 if (mr.start() < finger) {
994 // The finger is always heap region aligned and it is not possible
995 // for mr to span heap regions.
996 assert(mr.end() <= finger, "invariant");
998 // Separated the asserts so that we know which one fires.
999 assert(mr.start() <= mr.end(),
1000 "region boundaries should fall within the committed space");
1001 assert(_heap_start <= mr.start(),
1002 "region boundaries should fall within the committed space");
1003 assert(mr.end() <= _heap_end,
1004 "region boundaries should fall within the committed space");
1005 if (verbose_low())
1006 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1007 "below the finger, pushing it",
1008 mr.start(), mr.end());
1010 if (!region_stack_push_lock_free(mr)) {
1011 if (verbose_low())
1012 gclog_or_tty->print_cr("[global] region stack has overflown.");
1013 }
1014 }
1015 }
1017 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1018 // The object is not marked by the caller. We need to at least mark
1019 // it and maybe push in on the stack.
1021 HeapWord* addr = (HeapWord*)p;
1022 if (!_nextMarkBitMap->isMarked(addr)) {
1023 // We definitely need to mark it, irrespective whether we bail out
1024 // because we're done with marking.
1025 if (_nextMarkBitMap->parMark(addr)) {
1026 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1027 // If we're done with concurrent marking and we're waiting for
1028 // remark, then we're not pushing anything on the stack.
1029 return;
1031 // No OrderAccess:store_load() is needed. It is implicit in the
1032 // CAS done in parMark(addr) above
1033 HeapWord* finger = _finger;
1035 if (addr < finger) {
1036 if (!mark_stack_push(oop(addr))) {
1037 if (verbose_low())
1038 gclog_or_tty->print_cr("[global] global stack overflow "
1039 "during parMark");
1040 }
1041 }
1042 }
1043 }
1044 }
1046 class CMConcurrentMarkingTask: public AbstractGangTask {
1047 private:
1048 ConcurrentMark* _cm;
1049 ConcurrentMarkThread* _cmt;
1051 public:
1052 void work(int worker_i) {
1053 assert(Thread::current()->is_ConcurrentGC_thread(),
1054 "this should only be done by a conc GC thread");
1055 ResourceMark rm;
1057 double start_vtime = os::elapsedVTime();
1059 ConcurrentGCThread::stsJoin();
1061 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1062 CMTask* the_task = _cm->task(worker_i);
1063 the_task->record_start_time();
1064 if (!_cm->has_aborted()) {
1065 do {
1066 double start_vtime_sec = os::elapsedVTime();
1067 double start_time_sec = os::elapsedTime();
1068 the_task->do_marking_step(10.0);
1069 double end_time_sec = os::elapsedTime();
1070 double end_vtime_sec = os::elapsedVTime();
1071 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1072 double elapsed_time_sec = end_time_sec - start_time_sec;
1073 _cm->clear_has_overflown();
1075 bool ret = _cm->do_yield_check(worker_i);
1077 jlong sleep_time_ms;
1078 if (!_cm->has_aborted() && the_task->has_aborted()) {
1079 sleep_time_ms =
1080 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1081 ConcurrentGCThread::stsLeave();
1082 os::sleep(Thread::current(), sleep_time_ms, false);
1083 ConcurrentGCThread::stsJoin();
1084 }
1085 double end_time2_sec = os::elapsedTime();
1086 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1088 #if 0
1089 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1090 "overhead %1.4lf",
1091 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1092 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1093 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1094 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1095 #endif
1096 } while (!_cm->has_aborted() && the_task->has_aborted());
1097 }
1098 the_task->record_end_time();
1099 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1101 ConcurrentGCThread::stsLeave();
1103 double end_vtime = os::elapsedVTime();
1104 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1105 }
1107 CMConcurrentMarkingTask(ConcurrentMark* cm,
1108 ConcurrentMarkThread* cmt) :
1109 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1111 ~CMConcurrentMarkingTask() { }
1112 };
1114 void ConcurrentMark::markFromRoots() {
1115 // we might be tempted to assert that:
1116 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1117 // "inconsistent argument?");
1118 // However that wouldn't be right, because it's possible that
1119 // a safepoint is indeed in progress as a younger generation
1120 // stop-the-world GC happens even as we mark in this generation.
1122 _restart_for_overflow = false;
1124 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1126 CMConcurrentMarkingTask markingTask(this, cmThread());
1127 if (parallel_marking_threads() > 0)
1128 _parallel_workers->run_task(&markingTask);
1129 else
1130 markingTask.work(0);
1131 print_stats();
1132 }
1134 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1135 // world is stopped at this checkpoint
1136 assert(SafepointSynchronize::is_at_safepoint(),
1137 "world should be stopped");
1138 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1140 // If a full collection has happened, we shouldn't do this.
1141 if (has_aborted()) {
1142 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1143 return;
1144 }
1146 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1148 if (VerifyDuringGC) {
1149 HandleMark hm; // handle scope
1150 gclog_or_tty->print(" VerifyDuringGC:(before)");
1151 Universe::heap()->prepare_for_verify();
1152 Universe::verify(true, false, true);
1153 }
1155 G1CollectorPolicy* g1p = g1h->g1_policy();
1156 g1p->record_concurrent_mark_remark_start();
1158 double start = os::elapsedTime();
1160 checkpointRootsFinalWork();
1162 double mark_work_end = os::elapsedTime();
1164 weakRefsWork(clear_all_soft_refs);
1166 if (has_overflown()) {
1167 // Oops. We overflowed. Restart concurrent marking.
1168 _restart_for_overflow = true;
1169 // Clear the flag. We do not need it any more.
1170 clear_has_overflown();
1171 if (G1TraceMarkStackOverflow)
1172 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1173 } else {
1174 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1175 // We're done with marking.
1176 // This is the end of the marking cycle, we're expected all
1177 // threads to have SATB queues with active set to true.
1178 satb_mq_set.set_active_all_threads(false, /* new active value */
1179 true /* expected_active */);
1181 if (VerifyDuringGC) {
1182 HandleMark hm; // handle scope
1183 gclog_or_tty->print(" VerifyDuringGC:(after)");
1184 Universe::heap()->prepare_for_verify();
1185 Universe::heap()->verify(/* allow_dirty */ true,
1186 /* silent */ false,
1187 /* use_prev_marking */ false);
1188 }
1189 }
1191 #if VERIFY_OBJS_PROCESSED
1192 _scan_obj_cl.objs_processed = 0;
1193 ThreadLocalObjQueue::objs_enqueued = 0;
1194 #endif
1196 // Statistics
1197 double now = os::elapsedTime();
1198 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1199 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1200 _remark_times.add((now - start) * 1000.0);
1202 g1p->record_concurrent_mark_remark_end();
1203 }
1206 #define CARD_BM_TEST_MODE 0
1208 class CalcLiveObjectsClosure: public HeapRegionClosure {
1210 CMBitMapRO* _bm;
1211 ConcurrentMark* _cm;
1212 bool _changed;
1213 bool _yield;
1214 size_t _words_done;
1215 size_t _tot_live;
1216 size_t _tot_used;
1217 size_t _regions_done;
1218 double _start_vtime_sec;
1220 BitMap* _region_bm;
1221 BitMap* _card_bm;
1222 intptr_t _bottom_card_num;
1223 bool _final;
1225 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1226 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1227 #if CARD_BM_TEST_MODE
1228 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1229 #else
1230 _card_bm->par_at_put(i - _bottom_card_num, 1);
1231 #endif
1232 }
1233 }
1235 public:
1236 CalcLiveObjectsClosure(bool final,
1237 CMBitMapRO *bm, ConcurrentMark *cm,
1238 BitMap* region_bm, BitMap* card_bm) :
1239 _bm(bm), _cm(cm), _changed(false), _yield(true),
1240 _words_done(0), _tot_live(0), _tot_used(0),
1241 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1242 _regions_done(0), _start_vtime_sec(0.0)
1243 {
1244 _bottom_card_num =
1245 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1246 CardTableModRefBS::card_shift);
1247 }
1249 // It takes a region that's not empty (i.e., it has at least one
1250 // live object in it and sets its corresponding bit on the region
1251 // bitmap to 1. If the region is "starts humongous" it will also set
1252 // to 1 the bits on the region bitmap that correspond to its
1253 // associated "continues humongous" regions.
1254 void set_bit_for_region(HeapRegion* hr) {
1255 assert(!hr->continuesHumongous(), "should have filtered those out");
1257 size_t index = hr->hrs_index();
1258 if (!hr->startsHumongous()) {
1259 // Normal (non-humongous) case: just set the bit.
1260 _region_bm->par_at_put((BitMap::idx_t) index, true);
1261 } else {
1262 // Starts humongous case: calculate how many regions are part of
1263 // this humongous region and then set the bit range. It might
1264 // have been a bit more efficient to look at the object that
1265 // spans these humongous regions to calculate their number from
1266 // the object's size. However, it's a good idea to calculate
1267 // this based on the metadata itself, and not the region
1268 // contents, so that this code is not aware of what goes into
1269 // the humongous regions (in case this changes in the future).
1270 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1271 size_t end_index = index + 1;
1272 while (end_index < g1h->n_regions()) {
1273 HeapRegion* chr = g1h->region_at(end_index);
1274 if (!chr->continuesHumongous()) {
1275 break;
1276 }
1277 end_index += 1;
1278 }
1279 _region_bm->par_at_put_range((BitMap::idx_t) index,
1280 (BitMap::idx_t) end_index, true);
1281 }
1282 }
1284 bool doHeapRegion(HeapRegion* hr) {
1285 if (!_final && _regions_done == 0)
1286 _start_vtime_sec = os::elapsedVTime();
1288 if (hr->continuesHumongous()) {
1289 // We will ignore these here and process them when their
1290 // associated "starts humongous" region is processed (see
1291 // set_bit_for_heap_region()). Note that we cannot rely on their
1292 // associated "starts humongous" region to have their bit set to
1293 // 1 since, due to the region chunking in the parallel region
1294 // iteration, a "continues humongous" region might be visited
1295 // before its associated "starts humongous".
1296 return false;
1297 }
1299 HeapWord* nextTop = hr->next_top_at_mark_start();
1300 HeapWord* start = hr->top_at_conc_mark_count();
1301 assert(hr->bottom() <= start && start <= hr->end() &&
1302 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1303 start <= nextTop,
1304 "Preconditions.");
1305 // Otherwise, record the number of word's we'll examine.
1306 size_t words_done = (nextTop - start);
1307 // Find the first marked object at or after "start".
1308 start = _bm->getNextMarkedWordAddress(start, nextTop);
1309 size_t marked_bytes = 0;
1311 // Below, the term "card num" means the result of shifting an address
1312 // by the card shift -- address 0 corresponds to card number 0. One
1313 // must subtract the card num of the bottom of the heap to obtain a
1314 // card table index.
1315 // The first card num of the sequence of live cards currently being
1316 // constructed. -1 ==> no sequence.
1317 intptr_t start_card_num = -1;
1318 // The last card num of the sequence of live cards currently being
1319 // constructed. -1 ==> no sequence.
1320 intptr_t last_card_num = -1;
1322 while (start < nextTop) {
1323 if (_yield && _cm->do_yield_check()) {
1324 // We yielded. It might be for a full collection, in which case
1325 // all bets are off; terminate the traversal.
1326 if (_cm->has_aborted()) {
1327 _changed = false;
1328 return true;
1329 } else {
1330 // Otherwise, it might be a collection pause, and the region
1331 // we're looking at might be in the collection set. We'll
1332 // abandon this region.
1333 return false;
1334 }
1335 }
1336 oop obj = oop(start);
1337 int obj_sz = obj->size();
1338 // The card num of the start of the current object.
1339 intptr_t obj_card_num =
1340 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1342 HeapWord* obj_last = start + obj_sz - 1;
1343 intptr_t obj_last_card_num =
1344 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1346 if (obj_card_num != last_card_num) {
1347 if (start_card_num == -1) {
1348 assert(last_card_num == -1, "Both or neither.");
1349 start_card_num = obj_card_num;
1350 } else {
1351 assert(last_card_num != -1, "Both or neither.");
1352 assert(obj_card_num >= last_card_num, "Inv");
1353 if ((obj_card_num - last_card_num) > 1) {
1354 // Mark the last run, and start a new one.
1355 mark_card_num_range(start_card_num, last_card_num);
1356 start_card_num = obj_card_num;
1357 }
1358 }
1359 #if CARD_BM_TEST_MODE
1360 /*
1361 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1362 obj_card_num - _bottom_card_num,
1363 obj_last_card_num - _bottom_card_num);
1364 */
1365 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1366 _card_bm->par_at_put(j - _bottom_card_num, 1);
1367 }
1368 #endif
1369 }
1370 // In any case, we set the last card num.
1371 last_card_num = obj_last_card_num;
1373 marked_bytes += (size_t)obj_sz * HeapWordSize;
1374 // Find the next marked object after this one.
1375 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1376 _changed = true;
1377 }
1378 // Handle the last range, if any.
1379 if (start_card_num != -1)
1380 mark_card_num_range(start_card_num, last_card_num);
1381 if (_final) {
1382 // Mark the allocated-since-marking portion...
1383 HeapWord* tp = hr->top();
1384 if (nextTop < tp) {
1385 start_card_num =
1386 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1387 last_card_num =
1388 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1389 mark_card_num_range(start_card_num, last_card_num);
1390 // This definitely means the region has live objects.
1391 set_bit_for_region(hr);
1392 }
1393 }
1395 hr->add_to_marked_bytes(marked_bytes);
1396 // Update the live region bitmap.
1397 if (marked_bytes > 0) {
1398 set_bit_for_region(hr);
1399 }
1400 hr->set_top_at_conc_mark_count(nextTop);
1401 _tot_live += hr->next_live_bytes();
1402 _tot_used += hr->used();
1403 _words_done = words_done;
1405 if (!_final) {
1406 ++_regions_done;
1407 if (_regions_done % 10 == 0) {
1408 double end_vtime_sec = os::elapsedVTime();
1409 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1410 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1411 jlong sleep_time_ms =
1412 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1413 os::sleep(Thread::current(), sleep_time_ms, false);
1414 _start_vtime_sec = end_vtime_sec;
1415 }
1416 }
1417 }
1419 return false;
1420 }
1422 bool changed() { return _changed; }
1423 void reset() { _changed = false; _words_done = 0; }
1424 void no_yield() { _yield = false; }
1425 size_t words_done() { return _words_done; }
1426 size_t tot_live() { return _tot_live; }
1427 size_t tot_used() { return _tot_used; }
1428 };
1431 void ConcurrentMark::calcDesiredRegions() {
1432 _region_bm.clear();
1433 _card_bm.clear();
1434 CalcLiveObjectsClosure calccl(false /*final*/,
1435 nextMarkBitMap(), this,
1436 &_region_bm, &_card_bm);
1437 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1438 g1h->heap_region_iterate(&calccl);
1440 do {
1441 calccl.reset();
1442 g1h->heap_region_iterate(&calccl);
1443 } while (calccl.changed());
1444 }
1446 class G1ParFinalCountTask: public AbstractGangTask {
1447 protected:
1448 G1CollectedHeap* _g1h;
1449 CMBitMap* _bm;
1450 size_t _n_workers;
1451 size_t *_live_bytes;
1452 size_t *_used_bytes;
1453 BitMap* _region_bm;
1454 BitMap* _card_bm;
1455 public:
1456 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1457 BitMap* region_bm, BitMap* card_bm) :
1458 AbstractGangTask("G1 final counting"), _g1h(g1h),
1459 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1460 {
1461 if (ParallelGCThreads > 0)
1462 _n_workers = _g1h->workers()->total_workers();
1463 else
1464 _n_workers = 1;
1465 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1466 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1467 }
1469 ~G1ParFinalCountTask() {
1470 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1471 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1472 }
1474 void work(int i) {
1475 CalcLiveObjectsClosure calccl(true /*final*/,
1476 _bm, _g1h->concurrent_mark(),
1477 _region_bm, _card_bm);
1478 calccl.no_yield();
1479 if (G1CollectedHeap::use_parallel_gc_threads()) {
1480 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1481 HeapRegion::FinalCountClaimValue);
1482 } else {
1483 _g1h->heap_region_iterate(&calccl);
1484 }
1485 assert(calccl.complete(), "Shouldn't have yielded!");
1487 assert((size_t) i < _n_workers, "invariant");
1488 _live_bytes[i] = calccl.tot_live();
1489 _used_bytes[i] = calccl.tot_used();
1490 }
1491 size_t live_bytes() {
1492 size_t live_bytes = 0;
1493 for (size_t i = 0; i < _n_workers; ++i)
1494 live_bytes += _live_bytes[i];
1495 return live_bytes;
1496 }
1497 size_t used_bytes() {
1498 size_t used_bytes = 0;
1499 for (size_t i = 0; i < _n_workers; ++i)
1500 used_bytes += _used_bytes[i];
1501 return used_bytes;
1502 }
1503 };
1505 class G1ParNoteEndTask;
1507 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1508 G1CollectedHeap* _g1;
1509 int _worker_num;
1510 size_t _max_live_bytes;
1511 size_t _regions_claimed;
1512 size_t _freed_bytes;
1513 size_t _cleared_h_regions;
1514 size_t _freed_regions;
1515 UncleanRegionList* _unclean_region_list;
1516 double _claimed_region_time;
1517 double _max_region_time;
1519 public:
1520 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1521 UncleanRegionList* list,
1522 int worker_num);
1523 size_t freed_bytes() { return _freed_bytes; }
1524 size_t cleared_h_regions() { return _cleared_h_regions; }
1525 size_t freed_regions() { return _freed_regions; }
1526 UncleanRegionList* unclean_region_list() {
1527 return _unclean_region_list;
1528 }
1530 bool doHeapRegion(HeapRegion *r);
1532 size_t max_live_bytes() { return _max_live_bytes; }
1533 size_t regions_claimed() { return _regions_claimed; }
1534 double claimed_region_time_sec() { return _claimed_region_time; }
1535 double max_region_time_sec() { return _max_region_time; }
1536 };
1538 class G1ParNoteEndTask: public AbstractGangTask {
1539 friend class G1NoteEndOfConcMarkClosure;
1540 protected:
1541 G1CollectedHeap* _g1h;
1542 size_t _max_live_bytes;
1543 size_t _freed_bytes;
1544 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1545 public:
1546 G1ParNoteEndTask(G1CollectedHeap* g1h,
1547 ConcurrentMark::ParCleanupThreadState**
1548 par_cleanup_thread_state) :
1549 AbstractGangTask("G1 note end"), _g1h(g1h),
1550 _max_live_bytes(0), _freed_bytes(0),
1551 _par_cleanup_thread_state(par_cleanup_thread_state)
1552 {}
1554 void work(int i) {
1555 double start = os::elapsedTime();
1556 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1557 &_par_cleanup_thread_state[i]->list,
1558 i);
1559 if (G1CollectedHeap::use_parallel_gc_threads()) {
1560 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1561 HeapRegion::NoteEndClaimValue);
1562 } else {
1563 _g1h->heap_region_iterate(&g1_note_end);
1564 }
1565 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1567 // Now finish up freeing the current thread's regions.
1568 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1569 g1_note_end.cleared_h_regions(),
1570 0, NULL);
1571 {
1572 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1573 _max_live_bytes += g1_note_end.max_live_bytes();
1574 _freed_bytes += g1_note_end.freed_bytes();
1575 }
1576 double end = os::elapsedTime();
1577 if (G1PrintParCleanupStats) {
1578 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1579 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1580 i, start, end, (end-start)*1000.0,
1581 g1_note_end.regions_claimed(),
1582 g1_note_end.claimed_region_time_sec()*1000.0,
1583 g1_note_end.max_region_time_sec()*1000.0);
1584 }
1585 }
1586 size_t max_live_bytes() { return _max_live_bytes; }
1587 size_t freed_bytes() { return _freed_bytes; }
1588 };
1590 class G1ParScrubRemSetTask: public AbstractGangTask {
1591 protected:
1592 G1RemSet* _g1rs;
1593 BitMap* _region_bm;
1594 BitMap* _card_bm;
1595 public:
1596 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1597 BitMap* region_bm, BitMap* card_bm) :
1598 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1599 _region_bm(region_bm), _card_bm(card_bm)
1600 {}
1602 void work(int i) {
1603 if (G1CollectedHeap::use_parallel_gc_threads()) {
1604 _g1rs->scrub_par(_region_bm, _card_bm, i,
1605 HeapRegion::ScrubRemSetClaimValue);
1606 } else {
1607 _g1rs->scrub(_region_bm, _card_bm);
1608 }
1609 }
1611 };
1613 G1NoteEndOfConcMarkClosure::
1614 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1615 UncleanRegionList* list,
1616 int worker_num)
1617 : _g1(g1), _worker_num(worker_num),
1618 _max_live_bytes(0), _regions_claimed(0),
1619 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1620 _claimed_region_time(0.0), _max_region_time(0.0),
1621 _unclean_region_list(list)
1622 {}
1624 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1625 // We use a claim value of zero here because all regions
1626 // were claimed with value 1 in the FinalCount task.
1627 r->reset_gc_time_stamp();
1628 if (!r->continuesHumongous()) {
1629 double start = os::elapsedTime();
1630 _regions_claimed++;
1631 r->note_end_of_marking();
1632 _max_live_bytes += r->max_live_bytes();
1633 _g1->free_region_if_totally_empty_work(r,
1634 _freed_bytes,
1635 _cleared_h_regions,
1636 _freed_regions,
1637 _unclean_region_list,
1638 true /*par*/);
1639 double region_time = (os::elapsedTime() - start);
1640 _claimed_region_time += region_time;
1641 if (region_time > _max_region_time) _max_region_time = region_time;
1642 }
1643 return false;
1644 }
1646 void ConcurrentMark::cleanup() {
1647 // world is stopped at this checkpoint
1648 assert(SafepointSynchronize::is_at_safepoint(),
1649 "world should be stopped");
1650 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1652 // If a full collection has happened, we shouldn't do this.
1653 if (has_aborted()) {
1654 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1655 return;
1656 }
1658 if (VerifyDuringGC) {
1659 HandleMark hm; // handle scope
1660 gclog_or_tty->print(" VerifyDuringGC:(before)");
1661 Universe::heap()->prepare_for_verify();
1662 Universe::verify(/* allow dirty */ true,
1663 /* silent */ false,
1664 /* prev marking */ true);
1665 }
1667 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1668 g1p->record_concurrent_mark_cleanup_start();
1670 double start = os::elapsedTime();
1672 // Do counting once more with the world stopped for good measure.
1673 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1674 &_region_bm, &_card_bm);
1675 if (G1CollectedHeap::use_parallel_gc_threads()) {
1676 assert(g1h->check_heap_region_claim_values(
1677 HeapRegion::InitialClaimValue),
1678 "sanity check");
1680 int n_workers = g1h->workers()->total_workers();
1681 g1h->set_par_threads(n_workers);
1682 g1h->workers()->run_task(&g1_par_count_task);
1683 g1h->set_par_threads(0);
1685 assert(g1h->check_heap_region_claim_values(
1686 HeapRegion::FinalCountClaimValue),
1687 "sanity check");
1688 } else {
1689 g1_par_count_task.work(0);
1690 }
1692 size_t known_garbage_bytes =
1693 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1694 #if 0
1695 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1696 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1697 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1698 (double) known_garbage_bytes / (double) (1024 * 1024));
1699 #endif // 0
1700 g1p->set_known_garbage_bytes(known_garbage_bytes);
1702 size_t start_used_bytes = g1h->used();
1703 _at_least_one_mark_complete = true;
1704 g1h->set_marking_complete();
1706 double count_end = os::elapsedTime();
1707 double this_final_counting_time = (count_end - start);
1708 if (G1PrintParCleanupStats) {
1709 gclog_or_tty->print_cr("Cleanup:");
1710 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1711 this_final_counting_time*1000.0);
1712 }
1713 _total_counting_time += this_final_counting_time;
1715 // Install newly created mark bitMap as "prev".
1716 swapMarkBitMaps();
1718 g1h->reset_gc_time_stamp();
1720 // Note end of marking in all heap regions.
1721 double note_end_start = os::elapsedTime();
1722 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1723 if (G1CollectedHeap::use_parallel_gc_threads()) {
1724 int n_workers = g1h->workers()->total_workers();
1725 g1h->set_par_threads(n_workers);
1726 g1h->workers()->run_task(&g1_par_note_end_task);
1727 g1h->set_par_threads(0);
1729 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1730 "sanity check");
1731 } else {
1732 g1_par_note_end_task.work(0);
1733 }
1734 g1h->set_unclean_regions_coming(true);
1735 double note_end_end = os::elapsedTime();
1736 // Tell the mutators that there might be unclean regions coming...
1737 if (G1PrintParCleanupStats) {
1738 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1739 (note_end_end - note_end_start)*1000.0);
1740 }
1743 // call below, since it affects the metric by which we sort the heap
1744 // regions.
1745 if (G1ScrubRemSets) {
1746 double rs_scrub_start = os::elapsedTime();
1747 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1748 if (G1CollectedHeap::use_parallel_gc_threads()) {
1749 int n_workers = g1h->workers()->total_workers();
1750 g1h->set_par_threads(n_workers);
1751 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1752 g1h->set_par_threads(0);
1754 assert(g1h->check_heap_region_claim_values(
1755 HeapRegion::ScrubRemSetClaimValue),
1756 "sanity check");
1757 } else {
1758 g1_par_scrub_rs_task.work(0);
1759 }
1761 double rs_scrub_end = os::elapsedTime();
1762 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1763 _total_rs_scrub_time += this_rs_scrub_time;
1764 }
1766 // this will also free any regions totally full of garbage objects,
1767 // and sort the regions.
1768 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1769 g1_par_note_end_task.freed_bytes(),
1770 g1_par_note_end_task.max_live_bytes());
1772 // Statistics.
1773 double end = os::elapsedTime();
1774 _cleanup_times.add((end - start) * 1000.0);
1776 // G1CollectedHeap::heap()->print();
1777 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1778 // G1CollectedHeap::heap()->get_gc_time_stamp());
1780 if (PrintGC || PrintGCDetails) {
1781 g1h->print_size_transition(gclog_or_tty,
1782 start_used_bytes,
1783 g1h->used(),
1784 g1h->capacity());
1785 }
1787 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1788 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1790 // We need to make this be a "collection" so any collection pause that
1791 // races with it goes around and waits for completeCleanup to finish.
1792 g1h->increment_total_collections();
1794 if (VerifyDuringGC) {
1795 HandleMark hm; // handle scope
1796 gclog_or_tty->print(" VerifyDuringGC:(after)");
1797 Universe::heap()->prepare_for_verify();
1798 Universe::verify(/* allow dirty */ true,
1799 /* silent */ false,
1800 /* prev marking */ true);
1801 }
1802 }
1804 void ConcurrentMark::completeCleanup() {
1805 // A full collection intervened.
1806 if (has_aborted()) return;
1808 int first = 0;
1809 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1810 for (int t = 0; t < last; t++) {
1811 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1812 assert(list->well_formed(), "Inv");
1813 HeapRegion* hd = list->hd();
1814 while (hd != NULL) {
1815 // Now finish up the other stuff.
1816 hd->rem_set()->clear();
1817 HeapRegion* next_hd = hd->next_from_unclean_list();
1818 (void)list->pop();
1819 assert(list->hd() == next_hd, "how not?");
1820 _g1h->put_region_on_unclean_list(hd);
1821 if (!hd->isHumongous()) {
1822 // Add this to the _free_regions count by 1.
1823 _g1h->finish_free_region_work(0, 0, 1, NULL);
1824 }
1825 hd = list->hd();
1826 assert(hd == next_hd, "how not?");
1827 }
1828 }
1829 }
1831 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1832 HeapWord* addr = (HeapWord*)obj;
1833 return addr != NULL &&
1834 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1835 }
1837 class G1CMKeepAliveClosure: public OopClosure {
1838 G1CollectedHeap* _g1;
1839 ConcurrentMark* _cm;
1840 CMBitMap* _bitMap;
1841 public:
1842 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1843 CMBitMap* bitMap) :
1844 _g1(g1), _cm(cm),
1845 _bitMap(bitMap) {}
1847 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1848 virtual void do_oop( oop* p) { do_oop_work(p); }
1850 template <class T> void do_oop_work(T* p) {
1851 oop thisOop = oopDesc::load_decode_heap_oop(p);
1852 HeapWord* addr = (HeapWord*)thisOop;
1853 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1854 _bitMap->mark(addr);
1855 _cm->mark_stack_push(thisOop);
1856 }
1857 }
1858 };
1860 class G1CMDrainMarkingStackClosure: public VoidClosure {
1861 CMMarkStack* _markStack;
1862 CMBitMap* _bitMap;
1863 G1CMKeepAliveClosure* _oopClosure;
1864 public:
1865 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1866 G1CMKeepAliveClosure* oopClosure) :
1867 _bitMap(bitMap),
1868 _markStack(markStack),
1869 _oopClosure(oopClosure)
1870 {}
1872 void do_void() {
1873 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1874 }
1875 };
1877 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1878 ResourceMark rm;
1879 HandleMark hm;
1880 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1881 ReferenceProcessor* rp = g1h->ref_processor();
1883 // See the comment in G1CollectedHeap::ref_processing_init()
1884 // about how reference processing currently works in G1.
1886 // Process weak references.
1887 rp->setup_policy(clear_all_soft_refs);
1888 assert(_markStack.isEmpty(), "mark stack should be empty");
1890 G1CMIsAliveClosure g1_is_alive(g1h);
1891 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
1892 G1CMDrainMarkingStackClosure
1893 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
1895 // XXXYYY Also: copy the parallel ref processing code from CMS.
1896 rp->process_discovered_references(&g1_is_alive,
1897 &g1_keep_alive,
1898 &g1_drain_mark_stack,
1899 NULL);
1900 assert(_markStack.overflow() || _markStack.isEmpty(),
1901 "mark stack should be empty (unless it overflowed)");
1902 if (_markStack.overflow()) {
1903 set_has_overflown();
1904 }
1906 rp->enqueue_discovered_references();
1907 rp->verify_no_references_recorded();
1908 assert(!rp->discovery_enabled(), "should have been disabled");
1910 // Now clean up stale oops in SymbolTable and StringTable
1911 SymbolTable::unlink(&g1_is_alive);
1912 StringTable::unlink(&g1_is_alive);
1913 }
1915 void ConcurrentMark::swapMarkBitMaps() {
1916 CMBitMapRO* temp = _prevMarkBitMap;
1917 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1918 _nextMarkBitMap = (CMBitMap*) temp;
1919 }
1921 class CMRemarkTask: public AbstractGangTask {
1922 private:
1923 ConcurrentMark *_cm;
1925 public:
1926 void work(int worker_i) {
1927 // Since all available tasks are actually started, we should
1928 // only proceed if we're supposed to be actived.
1929 if ((size_t)worker_i < _cm->active_tasks()) {
1930 CMTask* task = _cm->task(worker_i);
1931 task->record_start_time();
1932 do {
1933 task->do_marking_step(1000000000.0 /* something very large */);
1934 } while (task->has_aborted() && !_cm->has_overflown());
1935 // If we overflow, then we do not want to restart. We instead
1936 // want to abort remark and do concurrent marking again.
1937 task->record_end_time();
1938 }
1939 }
1941 CMRemarkTask(ConcurrentMark* cm) :
1942 AbstractGangTask("Par Remark"), _cm(cm) { }
1943 };
1945 void ConcurrentMark::checkpointRootsFinalWork() {
1946 ResourceMark rm;
1947 HandleMark hm;
1948 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1950 g1h->ensure_parsability(false);
1952 if (G1CollectedHeap::use_parallel_gc_threads()) {
1953 G1CollectedHeap::StrongRootsScope srs(g1h);
1954 // this is remark, so we'll use up all available threads
1955 int active_workers = ParallelGCThreads;
1956 set_phase(active_workers, false);
1958 CMRemarkTask remarkTask(this);
1959 // We will start all available threads, even if we decide that the
1960 // active_workers will be fewer. The extra ones will just bail out
1961 // immediately.
1962 int n_workers = g1h->workers()->total_workers();
1963 g1h->set_par_threads(n_workers);
1964 g1h->workers()->run_task(&remarkTask);
1965 g1h->set_par_threads(0);
1966 } else {
1967 G1CollectedHeap::StrongRootsScope srs(g1h);
1968 // this is remark, so we'll use up all available threads
1969 int active_workers = 1;
1970 set_phase(active_workers, false);
1972 CMRemarkTask remarkTask(this);
1973 // We will start all available threads, even if we decide that the
1974 // active_workers will be fewer. The extra ones will just bail out
1975 // immediately.
1976 remarkTask.work(0);
1977 }
1978 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1979 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1981 print_stats();
1983 if (!restart_for_overflow())
1984 set_non_marking_state();
1986 #if VERIFY_OBJS_PROCESSED
1987 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1988 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1989 _scan_obj_cl.objs_processed,
1990 ThreadLocalObjQueue::objs_enqueued);
1991 guarantee(_scan_obj_cl.objs_processed ==
1992 ThreadLocalObjQueue::objs_enqueued,
1993 "Different number of objs processed and enqueued.");
1994 }
1995 #endif
1996 }
1998 #ifndef PRODUCT
2000 class PrintReachableOopClosure: public OopClosure {
2001 private:
2002 G1CollectedHeap* _g1h;
2003 CMBitMapRO* _bitmap;
2004 outputStream* _out;
2005 bool _use_prev_marking;
2006 bool _all;
2008 public:
2009 PrintReachableOopClosure(CMBitMapRO* bitmap,
2010 outputStream* out,
2011 bool use_prev_marking,
2012 bool all) :
2013 _g1h(G1CollectedHeap::heap()),
2014 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2016 void do_oop(narrowOop* p) { do_oop_work(p); }
2017 void do_oop( oop* p) { do_oop_work(p); }
2019 template <class T> void do_oop_work(T* p) {
2020 oop obj = oopDesc::load_decode_heap_oop(p);
2021 const char* str = NULL;
2022 const char* str2 = "";
2024 if (obj == NULL) {
2025 str = "";
2026 } else if (!_g1h->is_in_g1_reserved(obj)) {
2027 str = " O";
2028 } else {
2029 HeapRegion* hr = _g1h->heap_region_containing(obj);
2030 guarantee(hr != NULL, "invariant");
2031 bool over_tams = false;
2032 if (_use_prev_marking) {
2033 over_tams = hr->obj_allocated_since_prev_marking(obj);
2034 } else {
2035 over_tams = hr->obj_allocated_since_next_marking(obj);
2036 }
2037 bool marked = _bitmap->isMarked((HeapWord*) obj);
2039 if (over_tams) {
2040 str = " >";
2041 if (marked) {
2042 str2 = " AND MARKED";
2043 }
2044 } else if (marked) {
2045 str = " M";
2046 } else {
2047 str = " NOT";
2048 }
2049 }
2051 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2052 p, (void*) obj, str, str2);
2053 }
2054 };
2056 class PrintReachableObjectClosure : public ObjectClosure {
2057 private:
2058 CMBitMapRO* _bitmap;
2059 outputStream* _out;
2060 bool _use_prev_marking;
2061 bool _all;
2062 HeapRegion* _hr;
2064 public:
2065 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2066 outputStream* out,
2067 bool use_prev_marking,
2068 bool all,
2069 HeapRegion* hr) :
2070 _bitmap(bitmap), _out(out),
2071 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2073 void do_object(oop o) {
2074 bool over_tams;
2075 if (_use_prev_marking) {
2076 over_tams = _hr->obj_allocated_since_prev_marking(o);
2077 } else {
2078 over_tams = _hr->obj_allocated_since_next_marking(o);
2079 }
2080 bool marked = _bitmap->isMarked((HeapWord*) o);
2081 bool print_it = _all || over_tams || marked;
2083 if (print_it) {
2084 _out->print_cr(" "PTR_FORMAT"%s",
2085 o, (over_tams) ? " >" : (marked) ? " M" : "");
2086 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2087 o->oop_iterate(&oopCl);
2088 }
2089 }
2090 };
2092 class PrintReachableRegionClosure : public HeapRegionClosure {
2093 private:
2094 CMBitMapRO* _bitmap;
2095 outputStream* _out;
2096 bool _use_prev_marking;
2097 bool _all;
2099 public:
2100 bool doHeapRegion(HeapRegion* hr) {
2101 HeapWord* b = hr->bottom();
2102 HeapWord* e = hr->end();
2103 HeapWord* t = hr->top();
2104 HeapWord* p = NULL;
2105 if (_use_prev_marking) {
2106 p = hr->prev_top_at_mark_start();
2107 } else {
2108 p = hr->next_top_at_mark_start();
2109 }
2110 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2111 "TAMS: "PTR_FORMAT, b, e, t, p);
2112 _out->cr();
2114 HeapWord* from = b;
2115 HeapWord* to = t;
2117 if (to > from) {
2118 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2119 _out->cr();
2120 PrintReachableObjectClosure ocl(_bitmap, _out,
2121 _use_prev_marking, _all, hr);
2122 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2123 _out->cr();
2124 }
2126 return false;
2127 }
2129 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2130 outputStream* out,
2131 bool use_prev_marking,
2132 bool all) :
2133 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2134 };
2136 void ConcurrentMark::print_reachable(const char* str,
2137 bool use_prev_marking,
2138 bool all) {
2139 gclog_or_tty->cr();
2140 gclog_or_tty->print_cr("== Doing heap dump... ");
2142 if (G1PrintReachableBaseFile == NULL) {
2143 gclog_or_tty->print_cr(" #### error: no base file defined");
2144 return;
2145 }
2147 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2148 (JVM_MAXPATHLEN - 1)) {
2149 gclog_or_tty->print_cr(" #### error: file name too long");
2150 return;
2151 }
2153 char file_name[JVM_MAXPATHLEN];
2154 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2155 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2157 fileStream fout(file_name);
2158 if (!fout.is_open()) {
2159 gclog_or_tty->print_cr(" #### error: could not open file");
2160 return;
2161 }
2163 outputStream* out = &fout;
2165 CMBitMapRO* bitmap = NULL;
2166 if (use_prev_marking) {
2167 bitmap = _prevMarkBitMap;
2168 } else {
2169 bitmap = _nextMarkBitMap;
2170 }
2172 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2173 out->cr();
2175 out->print_cr("--- ITERATING OVER REGIONS");
2176 out->cr();
2177 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2178 _g1h->heap_region_iterate(&rcl);
2179 out->cr();
2181 gclog_or_tty->print_cr(" done");
2182 gclog_or_tty->flush();
2183 }
2185 #endif // PRODUCT
2187 // This note is for drainAllSATBBuffers and the code in between.
2188 // In the future we could reuse a task to do this work during an
2189 // evacuation pause (since now tasks are not active and can be claimed
2190 // during an evacuation pause). This was a late change to the code and
2191 // is currently not being taken advantage of.
2193 class CMGlobalObjectClosure : public ObjectClosure {
2194 private:
2195 ConcurrentMark* _cm;
2197 public:
2198 void do_object(oop obj) {
2199 _cm->deal_with_reference(obj);
2200 }
2202 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2203 };
2205 void ConcurrentMark::deal_with_reference(oop obj) {
2206 if (verbose_high())
2207 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2208 (void*) obj);
2211 HeapWord* objAddr = (HeapWord*) obj;
2212 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2213 if (_g1h->is_in_g1_reserved(objAddr)) {
2214 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2215 HeapRegion* hr = _g1h->heap_region_containing(obj);
2216 if (_g1h->is_obj_ill(obj, hr)) {
2217 if (verbose_high())
2218 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2219 "marked", (void*) obj);
2221 // we need to mark it first
2222 if (_nextMarkBitMap->parMark(objAddr)) {
2223 // No OrderAccess:store_load() is needed. It is implicit in the
2224 // CAS done in parMark(objAddr) above
2225 HeapWord* finger = _finger;
2226 if (objAddr < finger) {
2227 if (verbose_high())
2228 gclog_or_tty->print_cr("[global] below the global finger "
2229 "("PTR_FORMAT"), pushing it", finger);
2230 if (!mark_stack_push(obj)) {
2231 if (verbose_low())
2232 gclog_or_tty->print_cr("[global] global stack overflow during "
2233 "deal_with_reference");
2234 }
2235 }
2236 }
2237 }
2238 }
2239 }
2241 void ConcurrentMark::drainAllSATBBuffers() {
2242 CMGlobalObjectClosure oc(this);
2243 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2244 satb_mq_set.set_closure(&oc);
2246 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2247 if (verbose_medium())
2248 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2249 }
2251 // no need to check whether we should do this, as this is only
2252 // called during an evacuation pause
2253 satb_mq_set.iterate_closure_all_threads();
2255 satb_mq_set.set_closure(NULL);
2256 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2257 }
2259 void ConcurrentMark::markPrev(oop p) {
2260 // Note we are overriding the read-only view of the prev map here, via
2261 // the cast.
2262 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2263 }
2265 void ConcurrentMark::clear(oop p) {
2266 assert(p != NULL && p->is_oop(), "expected an oop");
2267 HeapWord* addr = (HeapWord*)p;
2268 assert(addr >= _nextMarkBitMap->startWord() ||
2269 addr < _nextMarkBitMap->endWord(), "in a region");
2271 _nextMarkBitMap->clear(addr);
2272 }
2274 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2275 // Note we are overriding the read-only view of the prev map here, via
2276 // the cast.
2277 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2278 _nextMarkBitMap->clearRange(mr);
2279 }
2281 HeapRegion*
2282 ConcurrentMark::claim_region(int task_num) {
2283 // "checkpoint" the finger
2284 HeapWord* finger = _finger;
2286 // _heap_end will not change underneath our feet; it only changes at
2287 // yield points.
2288 while (finger < _heap_end) {
2289 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2291 // is the gap between reading the finger and doing the CAS too long?
2293 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2294 HeapWord* bottom = curr_region->bottom();
2295 HeapWord* end = curr_region->end();
2296 HeapWord* limit = curr_region->next_top_at_mark_start();
2298 if (verbose_low())
2299 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2300 "["PTR_FORMAT", "PTR_FORMAT"), "
2301 "limit = "PTR_FORMAT,
2302 task_num, curr_region, bottom, end, limit);
2304 HeapWord* res =
2305 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2306 if (res == finger) {
2307 // we succeeded
2309 // notice that _finger == end cannot be guaranteed here since,
2310 // someone else might have moved the finger even further
2311 assert(_finger >= end, "the finger should have moved forward");
2313 if (verbose_low())
2314 gclog_or_tty->print_cr("[%d] we were successful with region = "
2315 PTR_FORMAT, task_num, curr_region);
2317 if (limit > bottom) {
2318 if (verbose_low())
2319 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2320 "returning it ", task_num, curr_region);
2321 return curr_region;
2322 } else {
2323 assert(limit == bottom,
2324 "the region limit should be at bottom");
2325 if (verbose_low())
2326 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2327 "returning NULL", task_num, curr_region);
2328 // we return NULL and the caller should try calling
2329 // claim_region() again.
2330 return NULL;
2331 }
2332 } else {
2333 assert(_finger > finger, "the finger should have moved forward");
2334 if (verbose_low())
2335 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2336 "global finger = "PTR_FORMAT", "
2337 "our finger = "PTR_FORMAT,
2338 task_num, _finger, finger);
2340 // read it again
2341 finger = _finger;
2342 }
2343 }
2345 return NULL;
2346 }
2348 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2349 bool result = false;
2350 for (int i = 0; i < (int)_max_task_num; ++i) {
2351 CMTask* the_task = _tasks[i];
2352 MemRegion mr = the_task->aborted_region();
2353 if (mr.start() != NULL) {
2354 assert(mr.end() != NULL, "invariant");
2355 assert(mr.word_size() > 0, "invariant");
2356 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2357 assert(hr != NULL, "invariant");
2358 if (hr->in_collection_set()) {
2359 // The region points into the collection set
2360 the_task->set_aborted_region(MemRegion());
2361 result = true;
2362 }
2363 }
2364 }
2365 return result;
2366 }
2368 bool ConcurrentMark::has_aborted_regions() {
2369 for (int i = 0; i < (int)_max_task_num; ++i) {
2370 CMTask* the_task = _tasks[i];
2371 MemRegion mr = the_task->aborted_region();
2372 if (mr.start() != NULL) {
2373 assert(mr.end() != NULL, "invariant");
2374 assert(mr.word_size() > 0, "invariant");
2375 return true;
2376 }
2377 }
2378 return false;
2379 }
2381 void ConcurrentMark::oops_do(OopClosure* cl) {
2382 if (_markStack.size() > 0 && verbose_low())
2383 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2384 "size = %d", _markStack.size());
2385 // we first iterate over the contents of the mark stack...
2386 _markStack.oops_do(cl);
2388 for (int i = 0; i < (int)_max_task_num; ++i) {
2389 OopTaskQueue* queue = _task_queues->queue((int)i);
2391 if (queue->size() > 0 && verbose_low())
2392 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2393 "size = %d", i, queue->size());
2395 // ...then over the contents of the all the task queues.
2396 queue->oops_do(cl);
2397 }
2399 // Invalidate any entries, that are in the region stack, that
2400 // point into the collection set
2401 if (_regionStack.invalidate_entries_into_cset()) {
2402 // otherwise, any gray objects copied during the evacuation pause
2403 // might not be visited.
2404 assert(_should_gray_objects, "invariant");
2405 }
2407 // Invalidate any aborted regions, recorded in the individual CM
2408 // tasks, that point into the collection set.
2409 if (invalidate_aborted_regions_in_cset()) {
2410 // otherwise, any gray objects copied during the evacuation pause
2411 // might not be visited.
2412 assert(_should_gray_objects, "invariant");
2413 }
2415 }
2417 void ConcurrentMark::clear_marking_state() {
2418 _markStack.setEmpty();
2419 _markStack.clear_overflow();
2420 _regionStack.setEmpty();
2421 _regionStack.clear_overflow();
2422 clear_has_overflown();
2423 _finger = _heap_start;
2425 for (int i = 0; i < (int)_max_task_num; ++i) {
2426 OopTaskQueue* queue = _task_queues->queue(i);
2427 queue->set_empty();
2428 // Clear any partial regions from the CMTasks
2429 _tasks[i]->clear_aborted_region();
2430 }
2431 }
2433 void ConcurrentMark::print_stats() {
2434 if (verbose_stats()) {
2435 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2436 for (size_t i = 0; i < _active_tasks; ++i) {
2437 _tasks[i]->print_stats();
2438 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2439 }
2440 }
2441 }
2443 class CSMarkOopClosure: public OopClosure {
2444 friend class CSMarkBitMapClosure;
2446 G1CollectedHeap* _g1h;
2447 CMBitMap* _bm;
2448 ConcurrentMark* _cm;
2449 oop* _ms;
2450 jint* _array_ind_stack;
2451 int _ms_size;
2452 int _ms_ind;
2453 int _array_increment;
2455 bool push(oop obj, int arr_ind = 0) {
2456 if (_ms_ind == _ms_size) {
2457 gclog_or_tty->print_cr("Mark stack is full.");
2458 return false;
2459 }
2460 _ms[_ms_ind] = obj;
2461 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2462 _ms_ind++;
2463 return true;
2464 }
2466 oop pop() {
2467 if (_ms_ind == 0) return NULL;
2468 else {
2469 _ms_ind--;
2470 return _ms[_ms_ind];
2471 }
2472 }
2474 template <class T> bool drain() {
2475 while (_ms_ind > 0) {
2476 oop obj = pop();
2477 assert(obj != NULL, "Since index was non-zero.");
2478 if (obj->is_objArray()) {
2479 jint arr_ind = _array_ind_stack[_ms_ind];
2480 objArrayOop aobj = objArrayOop(obj);
2481 jint len = aobj->length();
2482 jint next_arr_ind = arr_ind + _array_increment;
2483 if (next_arr_ind < len) {
2484 push(obj, next_arr_ind);
2485 }
2486 // Now process this portion of this one.
2487 int lim = MIN2(next_arr_ind, len);
2488 for (int j = arr_ind; j < lim; j++) {
2489 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2490 }
2492 } else {
2493 obj->oop_iterate(this);
2494 }
2495 if (abort()) return false;
2496 }
2497 return true;
2498 }
2500 public:
2501 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2502 _g1h(G1CollectedHeap::heap()),
2503 _cm(cm),
2504 _bm(cm->nextMarkBitMap()),
2505 _ms_size(ms_size), _ms_ind(0),
2506 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2507 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2508 _array_increment(MAX2(ms_size/8, 16))
2509 {}
2511 ~CSMarkOopClosure() {
2512 FREE_C_HEAP_ARRAY(oop, _ms);
2513 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2514 }
2516 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2517 virtual void do_oop( oop* p) { do_oop_work(p); }
2519 template <class T> void do_oop_work(T* p) {
2520 T heap_oop = oopDesc::load_heap_oop(p);
2521 if (oopDesc::is_null(heap_oop)) return;
2522 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2523 if (obj->is_forwarded()) {
2524 // If the object has already been forwarded, we have to make sure
2525 // that it's marked. So follow the forwarding pointer. Note that
2526 // this does the right thing for self-forwarding pointers in the
2527 // evacuation failure case.
2528 obj = obj->forwardee();
2529 }
2530 HeapRegion* hr = _g1h->heap_region_containing(obj);
2531 if (hr != NULL) {
2532 if (hr->in_collection_set()) {
2533 if (_g1h->is_obj_ill(obj)) {
2534 _bm->mark((HeapWord*)obj);
2535 if (!push(obj)) {
2536 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2537 set_abort();
2538 }
2539 }
2540 } else {
2541 // Outside the collection set; we need to gray it
2542 _cm->deal_with_reference(obj);
2543 }
2544 }
2545 }
2546 };
2548 class CSMarkBitMapClosure: public BitMapClosure {
2549 G1CollectedHeap* _g1h;
2550 CMBitMap* _bitMap;
2551 ConcurrentMark* _cm;
2552 CSMarkOopClosure _oop_cl;
2553 public:
2554 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2555 _g1h(G1CollectedHeap::heap()),
2556 _bitMap(cm->nextMarkBitMap()),
2557 _oop_cl(cm, ms_size)
2558 {}
2560 ~CSMarkBitMapClosure() {}
2562 bool do_bit(size_t offset) {
2563 // convert offset into a HeapWord*
2564 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2565 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2566 "address out of range");
2567 assert(_bitMap->isMarked(addr), "tautology");
2568 oop obj = oop(addr);
2569 if (!obj->is_forwarded()) {
2570 if (!_oop_cl.push(obj)) return false;
2571 if (UseCompressedOops) {
2572 if (!_oop_cl.drain<narrowOop>()) return false;
2573 } else {
2574 if (!_oop_cl.drain<oop>()) return false;
2575 }
2576 }
2577 // Otherwise...
2578 return true;
2579 }
2580 };
2583 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2584 CMBitMap* _bm;
2585 CSMarkBitMapClosure _bit_cl;
2586 enum SomePrivateConstants {
2587 MSSize = 1000
2588 };
2589 bool _completed;
2590 public:
2591 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2592 _bm(cm->nextMarkBitMap()),
2593 _bit_cl(cm, MSSize),
2594 _completed(true)
2595 {}
2597 ~CompleteMarkingInCSHRClosure() {}
2599 bool doHeapRegion(HeapRegion* r) {
2600 if (!r->evacuation_failed()) {
2601 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2602 if (!mr.is_empty()) {
2603 if (!_bm->iterate(&_bit_cl, mr)) {
2604 _completed = false;
2605 return true;
2606 }
2607 }
2608 }
2609 return false;
2610 }
2612 bool completed() { return _completed; }
2613 };
2615 class ClearMarksInHRClosure: public HeapRegionClosure {
2616 CMBitMap* _bm;
2617 public:
2618 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2620 bool doHeapRegion(HeapRegion* r) {
2621 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2622 MemRegion usedMR = r->used_region();
2623 _bm->clearRange(r->used_region());
2624 }
2625 return false;
2626 }
2627 };
2629 void ConcurrentMark::complete_marking_in_collection_set() {
2630 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2632 if (!g1h->mark_in_progress()) {
2633 g1h->g1_policy()->record_mark_closure_time(0.0);
2634 return;
2635 }
2637 int i = 1;
2638 double start = os::elapsedTime();
2639 while (true) {
2640 i++;
2641 CompleteMarkingInCSHRClosure cmplt(this);
2642 g1h->collection_set_iterate(&cmplt);
2643 if (cmplt.completed()) break;
2644 }
2645 double end_time = os::elapsedTime();
2646 double elapsed_time_ms = (end_time - start) * 1000.0;
2647 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2649 ClearMarksInHRClosure clr(nextMarkBitMap());
2650 g1h->collection_set_iterate(&clr);
2651 }
2653 // The next two methods deal with the following optimisation. Some
2654 // objects are gray by being marked and located above the finger. If
2655 // they are copied, during an evacuation pause, below the finger then
2656 // the need to be pushed on the stack. The observation is that, if
2657 // there are no regions in the collection set located above the
2658 // finger, then the above cannot happen, hence we do not need to
2659 // explicitly gray any objects when copying them to below the
2660 // finger. The global stack will be scanned to ensure that, if it
2661 // points to objects being copied, it will update their
2662 // location. There is a tricky situation with the gray objects in
2663 // region stack that are being coped, however. See the comment in
2664 // newCSet().
2666 void ConcurrentMark::newCSet() {
2667 if (!concurrent_marking_in_progress())
2668 // nothing to do if marking is not in progress
2669 return;
2671 // find what the lowest finger is among the global and local fingers
2672 _min_finger = _finger;
2673 for (int i = 0; i < (int)_max_task_num; ++i) {
2674 CMTask* task = _tasks[i];
2675 HeapWord* task_finger = task->finger();
2676 if (task_finger != NULL && task_finger < _min_finger)
2677 _min_finger = task_finger;
2678 }
2680 _should_gray_objects = false;
2682 // This fixes a very subtle and fustrating bug. It might be the case
2683 // that, during en evacuation pause, heap regions that contain
2684 // objects that are gray (by being in regions contained in the
2685 // region stack) are included in the collection set. Since such gray
2686 // objects will be moved, and because it's not easy to redirect
2687 // region stack entries to point to a new location (because objects
2688 // in one region might be scattered to multiple regions after they
2689 // are copied), one option is to ensure that all marked objects
2690 // copied during a pause are pushed on the stack. Notice, however,
2691 // that this problem can only happen when the region stack is not
2692 // empty during an evacuation pause. So, we make the fix a bit less
2693 // conservative and ensure that regions are pushed on the stack,
2694 // irrespective whether all collection set regions are below the
2695 // finger, if the region stack is not empty. This is expected to be
2696 // a rare case, so I don't think it's necessary to be smarted about it.
2697 if (!region_stack_empty() || has_aborted_regions())
2698 _should_gray_objects = true;
2699 }
2701 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2702 if (!concurrent_marking_in_progress())
2703 return;
2705 HeapWord* region_end = hr->end();
2706 if (region_end > _min_finger)
2707 _should_gray_objects = true;
2708 }
2710 // abandon current marking iteration due to a Full GC
2711 void ConcurrentMark::abort() {
2712 // Clear all marks to force marking thread to do nothing
2713 _nextMarkBitMap->clearAll();
2714 // Empty mark stack
2715 clear_marking_state();
2716 for (int i = 0; i < (int)_max_task_num; ++i) {
2717 _tasks[i]->clear_region_fields();
2718 }
2719 _has_aborted = true;
2721 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2722 satb_mq_set.abandon_partial_marking();
2723 // This can be called either during or outside marking, we'll read
2724 // the expected_active value from the SATB queue set.
2725 satb_mq_set.set_active_all_threads(
2726 false, /* new active value */
2727 satb_mq_set.is_active() /* expected_active */);
2728 }
2730 static void print_ms_time_info(const char* prefix, const char* name,
2731 NumberSeq& ns) {
2732 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2733 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2734 if (ns.num() > 0) {
2735 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2736 prefix, ns.sd(), ns.maximum());
2737 }
2738 }
2740 void ConcurrentMark::print_summary_info() {
2741 gclog_or_tty->print_cr(" Concurrent marking:");
2742 print_ms_time_info(" ", "init marks", _init_times);
2743 print_ms_time_info(" ", "remarks", _remark_times);
2744 {
2745 print_ms_time_info(" ", "final marks", _remark_mark_times);
2746 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2748 }
2749 print_ms_time_info(" ", "cleanups", _cleanup_times);
2750 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2751 _total_counting_time,
2752 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2753 (double)_cleanup_times.num()
2754 : 0.0));
2755 if (G1ScrubRemSets) {
2756 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2757 _total_rs_scrub_time,
2758 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2759 (double)_cleanup_times.num()
2760 : 0.0));
2761 }
2762 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2763 (_init_times.sum() + _remark_times.sum() +
2764 _cleanup_times.sum())/1000.0);
2765 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2766 "(%8.2f s marking, %8.2f s counting).",
2767 cmThread()->vtime_accum(),
2768 cmThread()->vtime_mark_accum(),
2769 cmThread()->vtime_count_accum());
2770 }
2772 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2773 _parallel_workers->print_worker_threads_on(st);
2774 }
2776 // Closures
2777 // XXX: there seems to be a lot of code duplication here;
2778 // should refactor and consolidate the shared code.
2780 // This closure is used to mark refs into the CMS generation in
2781 // the CMS bit map. Called at the first checkpoint.
2783 // We take a break if someone is trying to stop the world.
2784 bool ConcurrentMark::do_yield_check(int worker_i) {
2785 if (should_yield()) {
2786 if (worker_i == 0)
2787 _g1h->g1_policy()->record_concurrent_pause();
2788 cmThread()->yield();
2789 if (worker_i == 0)
2790 _g1h->g1_policy()->record_concurrent_pause_end();
2791 return true;
2792 } else {
2793 return false;
2794 }
2795 }
2797 bool ConcurrentMark::should_yield() {
2798 return cmThread()->should_yield();
2799 }
2801 bool ConcurrentMark::containing_card_is_marked(void* p) {
2802 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2803 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2804 }
2806 bool ConcurrentMark::containing_cards_are_marked(void* start,
2807 void* last) {
2808 return
2809 containing_card_is_marked(start) &&
2810 containing_card_is_marked(last);
2811 }
2813 #ifndef PRODUCT
2814 // for debugging purposes
2815 void ConcurrentMark::print_finger() {
2816 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2817 _heap_start, _heap_end, _finger);
2818 for (int i = 0; i < (int) _max_task_num; ++i) {
2819 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2820 }
2821 gclog_or_tty->print_cr("");
2822 }
2823 #endif
2825 // Closure for iteration over bitmaps
2826 class CMBitMapClosure : public BitMapClosure {
2827 private:
2828 // the bitmap that is being iterated over
2829 CMBitMap* _nextMarkBitMap;
2830 ConcurrentMark* _cm;
2831 CMTask* _task;
2832 // true if we're scanning a heap region claimed by the task (so that
2833 // we move the finger along), false if we're not, i.e. currently when
2834 // scanning a heap region popped from the region stack (so that we
2835 // do not move the task finger along; it'd be a mistake if we did so).
2836 bool _scanning_heap_region;
2838 public:
2839 CMBitMapClosure(CMTask *task,
2840 ConcurrentMark* cm,
2841 CMBitMap* nextMarkBitMap)
2842 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2844 void set_scanning_heap_region(bool scanning_heap_region) {
2845 _scanning_heap_region = scanning_heap_region;
2846 }
2848 bool do_bit(size_t offset) {
2849 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2850 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2851 assert( addr < _cm->finger(), "invariant");
2853 if (_scanning_heap_region) {
2854 statsOnly( _task->increase_objs_found_on_bitmap() );
2855 assert(addr >= _task->finger(), "invariant");
2856 // We move that task's local finger along.
2857 _task->move_finger_to(addr);
2858 } else {
2859 // We move the task's region finger along.
2860 _task->move_region_finger_to(addr);
2861 }
2863 _task->scan_object(oop(addr));
2864 // we only partially drain the local queue and global stack
2865 _task->drain_local_queue(true);
2866 _task->drain_global_stack(true);
2868 // if the has_aborted flag has been raised, we need to bail out of
2869 // the iteration
2870 return !_task->has_aborted();
2871 }
2872 };
2874 // Closure for iterating over objects, currently only used for
2875 // processing SATB buffers.
2876 class CMObjectClosure : public ObjectClosure {
2877 private:
2878 CMTask* _task;
2880 public:
2881 void do_object(oop obj) {
2882 _task->deal_with_reference(obj);
2883 }
2885 CMObjectClosure(CMTask* task) : _task(task) { }
2886 };
2888 // Closure for iterating over object fields
2889 class CMOopClosure : public OopClosure {
2890 private:
2891 G1CollectedHeap* _g1h;
2892 ConcurrentMark* _cm;
2893 CMTask* _task;
2895 public:
2896 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2897 virtual void do_oop( oop* p) { do_oop_work(p); }
2899 template <class T> void do_oop_work(T* p) {
2900 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2901 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2902 "invariant");
2904 oop obj = oopDesc::load_decode_heap_oop(p);
2905 if (_cm->verbose_high())
2906 gclog_or_tty->print_cr("[%d] we're looking at location "
2907 "*"PTR_FORMAT" = "PTR_FORMAT,
2908 _task->task_id(), p, (void*) obj);
2909 _task->deal_with_reference(obj);
2910 }
2912 CMOopClosure(G1CollectedHeap* g1h,
2913 ConcurrentMark* cm,
2914 CMTask* task)
2915 : _g1h(g1h), _cm(cm), _task(task)
2916 {
2917 _ref_processor = g1h->ref_processor();
2918 assert(_ref_processor != NULL, "should not be NULL");
2919 }
2920 };
2922 void CMTask::setup_for_region(HeapRegion* hr) {
2923 // Separated the asserts so that we know which one fires.
2924 assert(hr != NULL,
2925 "claim_region() should have filtered out continues humongous regions");
2926 assert(!hr->continuesHumongous(),
2927 "claim_region() should have filtered out continues humongous regions");
2929 if (_cm->verbose_low())
2930 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2931 _task_id, hr);
2933 _curr_region = hr;
2934 _finger = hr->bottom();
2935 update_region_limit();
2936 }
2938 void CMTask::update_region_limit() {
2939 HeapRegion* hr = _curr_region;
2940 HeapWord* bottom = hr->bottom();
2941 HeapWord* limit = hr->next_top_at_mark_start();
2943 if (limit == bottom) {
2944 if (_cm->verbose_low())
2945 gclog_or_tty->print_cr("[%d] found an empty region "
2946 "["PTR_FORMAT", "PTR_FORMAT")",
2947 _task_id, bottom, limit);
2948 // The region was collected underneath our feet.
2949 // We set the finger to bottom to ensure that the bitmap
2950 // iteration that will follow this will not do anything.
2951 // (this is not a condition that holds when we set the region up,
2952 // as the region is not supposed to be empty in the first place)
2953 _finger = bottom;
2954 } else if (limit >= _region_limit) {
2955 assert(limit >= _finger, "peace of mind");
2956 } else {
2957 assert(limit < _region_limit, "only way to get here");
2958 // This can happen under some pretty unusual circumstances. An
2959 // evacuation pause empties the region underneath our feet (NTAMS
2960 // at bottom). We then do some allocation in the region (NTAMS
2961 // stays at bottom), followed by the region being used as a GC
2962 // alloc region (NTAMS will move to top() and the objects
2963 // originally below it will be grayed). All objects now marked in
2964 // the region are explicitly grayed, if below the global finger,
2965 // and we do not need in fact to scan anything else. So, we simply
2966 // set _finger to be limit to ensure that the bitmap iteration
2967 // doesn't do anything.
2968 _finger = limit;
2969 }
2971 _region_limit = limit;
2972 }
2974 void CMTask::giveup_current_region() {
2975 assert(_curr_region != NULL, "invariant");
2976 if (_cm->verbose_low())
2977 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2978 _task_id, _curr_region);
2979 clear_region_fields();
2980 }
2982 void CMTask::clear_region_fields() {
2983 // Values for these three fields that indicate that we're not
2984 // holding on to a region.
2985 _curr_region = NULL;
2986 _finger = NULL;
2987 _region_limit = NULL;
2989 _region_finger = NULL;
2990 }
2992 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2993 guarantee(nextMarkBitMap != NULL, "invariant");
2995 if (_cm->verbose_low())
2996 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2998 _nextMarkBitMap = nextMarkBitMap;
2999 clear_region_fields();
3000 assert(_aborted_region.is_empty(), "should have been cleared");
3002 _calls = 0;
3003 _elapsed_time_ms = 0.0;
3004 _termination_time_ms = 0.0;
3005 _termination_start_time_ms = 0.0;
3007 #if _MARKING_STATS_
3008 _local_pushes = 0;
3009 _local_pops = 0;
3010 _local_max_size = 0;
3011 _objs_scanned = 0;
3012 _global_pushes = 0;
3013 _global_pops = 0;
3014 _global_max_size = 0;
3015 _global_transfers_to = 0;
3016 _global_transfers_from = 0;
3017 _region_stack_pops = 0;
3018 _regions_claimed = 0;
3019 _objs_found_on_bitmap = 0;
3020 _satb_buffers_processed = 0;
3021 _steal_attempts = 0;
3022 _steals = 0;
3023 _aborted = 0;
3024 _aborted_overflow = 0;
3025 _aborted_cm_aborted = 0;
3026 _aborted_yield = 0;
3027 _aborted_timed_out = 0;
3028 _aborted_satb = 0;
3029 _aborted_termination = 0;
3030 #endif // _MARKING_STATS_
3031 }
3033 bool CMTask::should_exit_termination() {
3034 regular_clock_call();
3035 // This is called when we are in the termination protocol. We should
3036 // quit if, for some reason, this task wants to abort or the global
3037 // stack is not empty (this means that we can get work from it).
3038 return !_cm->mark_stack_empty() || has_aborted();
3039 }
3041 // This determines whether the method below will check both the local
3042 // and global fingers when determining whether to push on the stack a
3043 // gray object (value 1) or whether it will only check the global one
3044 // (value 0). The tradeoffs are that the former will be a bit more
3045 // accurate and possibly push less on the stack, but it might also be
3046 // a little bit slower.
3048 #define _CHECK_BOTH_FINGERS_ 1
3050 void CMTask::deal_with_reference(oop obj) {
3051 if (_cm->verbose_high())
3052 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3053 _task_id, (void*) obj);
3055 ++_refs_reached;
3057 HeapWord* objAddr = (HeapWord*) obj;
3058 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3059 if (_g1h->is_in_g1_reserved(objAddr)) {
3060 assert(obj != NULL, "is_in_g1_reserved should ensure this");
3061 HeapRegion* hr = _g1h->heap_region_containing(obj);
3062 if (_g1h->is_obj_ill(obj, hr)) {
3063 if (_cm->verbose_high())
3064 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3065 _task_id, (void*) obj);
3067 // we need to mark it first
3068 if (_nextMarkBitMap->parMark(objAddr)) {
3069 // No OrderAccess:store_load() is needed. It is implicit in the
3070 // CAS done in parMark(objAddr) above
3071 HeapWord* global_finger = _cm->finger();
3073 #if _CHECK_BOTH_FINGERS_
3074 // we will check both the local and global fingers
3076 if (_finger != NULL && objAddr < _finger) {
3077 if (_cm->verbose_high())
3078 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3079 "pushing it", _task_id, _finger);
3080 push(obj);
3081 } else if (_curr_region != NULL && objAddr < _region_limit) {
3082 // do nothing
3083 } else if (objAddr < global_finger) {
3084 // Notice that the global finger might be moving forward
3085 // concurrently. This is not a problem. In the worst case, we
3086 // mark the object while it is above the global finger and, by
3087 // the time we read the global finger, it has moved forward
3088 // passed this object. In this case, the object will probably
3089 // be visited when a task is scanning the region and will also
3090 // be pushed on the stack. So, some duplicate work, but no
3091 // correctness problems.
3093 if (_cm->verbose_high())
3094 gclog_or_tty->print_cr("[%d] below the global finger "
3095 "("PTR_FORMAT"), pushing it",
3096 _task_id, global_finger);
3097 push(obj);
3098 } else {
3099 // do nothing
3100 }
3101 #else // _CHECK_BOTH_FINGERS_
3102 // we will only check the global finger
3104 if (objAddr < global_finger) {
3105 // see long comment above
3107 if (_cm->verbose_high())
3108 gclog_or_tty->print_cr("[%d] below the global finger "
3109 "("PTR_FORMAT"), pushing it",
3110 _task_id, global_finger);
3111 push(obj);
3112 }
3113 #endif // _CHECK_BOTH_FINGERS_
3114 }
3115 }
3116 }
3117 }
3119 void CMTask::push(oop obj) {
3120 HeapWord* objAddr = (HeapWord*) obj;
3121 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3122 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3123 "invariant");
3124 assert(!_g1h->is_obj_ill(obj), "invariant");
3125 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3127 if (_cm->verbose_high())
3128 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3130 if (!_task_queue->push(obj)) {
3131 // The local task queue looks full. We need to push some entries
3132 // to the global stack.
3134 if (_cm->verbose_medium())
3135 gclog_or_tty->print_cr("[%d] task queue overflow, "
3136 "moving entries to the global stack",
3137 _task_id);
3138 move_entries_to_global_stack();
3140 // this should succeed since, even if we overflow the global
3141 // stack, we should have definitely removed some entries from the
3142 // local queue. So, there must be space on it.
3143 bool success = _task_queue->push(obj);
3144 assert(success, "invariant");
3145 }
3147 statsOnly( int tmp_size = _task_queue->size();
3148 if (tmp_size > _local_max_size)
3149 _local_max_size = tmp_size;
3150 ++_local_pushes );
3151 }
3153 void CMTask::reached_limit() {
3154 assert(_words_scanned >= _words_scanned_limit ||
3155 _refs_reached >= _refs_reached_limit ,
3156 "shouldn't have been called otherwise");
3157 regular_clock_call();
3158 }
3160 void CMTask::regular_clock_call() {
3161 if (has_aborted())
3162 return;
3164 // First, we need to recalculate the words scanned and refs reached
3165 // limits for the next clock call.
3166 recalculate_limits();
3168 // During the regular clock call we do the following
3170 // (1) If an overflow has been flagged, then we abort.
3171 if (_cm->has_overflown()) {
3172 set_has_aborted();
3173 return;
3174 }
3176 // If we are not concurrent (i.e. we're doing remark) we don't need
3177 // to check anything else. The other steps are only needed during
3178 // the concurrent marking phase.
3179 if (!concurrent())
3180 return;
3182 // (2) If marking has been aborted for Full GC, then we also abort.
3183 if (_cm->has_aborted()) {
3184 set_has_aborted();
3185 statsOnly( ++_aborted_cm_aborted );
3186 return;
3187 }
3189 double curr_time_ms = os::elapsedVTime() * 1000.0;
3191 // (3) If marking stats are enabled, then we update the step history.
3192 #if _MARKING_STATS_
3193 if (_words_scanned >= _words_scanned_limit)
3194 ++_clock_due_to_scanning;
3195 if (_refs_reached >= _refs_reached_limit)
3196 ++_clock_due_to_marking;
3198 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3199 _interval_start_time_ms = curr_time_ms;
3200 _all_clock_intervals_ms.add(last_interval_ms);
3202 if (_cm->verbose_medium()) {
3203 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3204 "scanned = %d%s, refs reached = %d%s",
3205 _task_id, last_interval_ms,
3206 _words_scanned,
3207 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3208 _refs_reached,
3209 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3210 }
3211 #endif // _MARKING_STATS_
3213 // (4) We check whether we should yield. If we have to, then we abort.
3214 if (_cm->should_yield()) {
3215 // We should yield. To do this we abort the task. The caller is
3216 // responsible for yielding.
3217 set_has_aborted();
3218 statsOnly( ++_aborted_yield );
3219 return;
3220 }
3222 // (5) We check whether we've reached our time quota. If we have,
3223 // then we abort.
3224 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3225 if (elapsed_time_ms > _time_target_ms) {
3226 set_has_aborted();
3227 _has_aborted_timed_out = true;
3228 statsOnly( ++_aborted_timed_out );
3229 return;
3230 }
3232 // (6) Finally, we check whether there are enough completed STAB
3233 // buffers available for processing. If there are, we abort.
3234 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3235 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3236 if (_cm->verbose_low())
3237 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3238 _task_id);
3239 // we do need to process SATB buffers, we'll abort and restart
3240 // the marking task to do so
3241 set_has_aborted();
3242 statsOnly( ++_aborted_satb );
3243 return;
3244 }
3245 }
3247 void CMTask::recalculate_limits() {
3248 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3249 _words_scanned_limit = _real_words_scanned_limit;
3251 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3252 _refs_reached_limit = _real_refs_reached_limit;
3253 }
3255 void CMTask::decrease_limits() {
3256 // This is called when we believe that we're going to do an infrequent
3257 // operation which will increase the per byte scanned cost (i.e. move
3258 // entries to/from the global stack). It basically tries to decrease the
3259 // scanning limit so that the clock is called earlier.
3261 if (_cm->verbose_medium())
3262 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3264 _words_scanned_limit = _real_words_scanned_limit -
3265 3 * words_scanned_period / 4;
3266 _refs_reached_limit = _real_refs_reached_limit -
3267 3 * refs_reached_period / 4;
3268 }
3270 void CMTask::move_entries_to_global_stack() {
3271 // local array where we'll store the entries that will be popped
3272 // from the local queue
3273 oop buffer[global_stack_transfer_size];
3275 int n = 0;
3276 oop obj;
3277 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3278 buffer[n] = obj;
3279 ++n;
3280 }
3282 if (n > 0) {
3283 // we popped at least one entry from the local queue
3285 statsOnly( ++_global_transfers_to; _local_pops += n );
3287 if (!_cm->mark_stack_push(buffer, n)) {
3288 if (_cm->verbose_low())
3289 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3290 set_has_aborted();
3291 } else {
3292 // the transfer was successful
3294 if (_cm->verbose_medium())
3295 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3296 _task_id, n);
3297 statsOnly( int tmp_size = _cm->mark_stack_size();
3298 if (tmp_size > _global_max_size)
3299 _global_max_size = tmp_size;
3300 _global_pushes += n );
3301 }
3302 }
3304 // this operation was quite expensive, so decrease the limits
3305 decrease_limits();
3306 }
3308 void CMTask::get_entries_from_global_stack() {
3309 // local array where we'll store the entries that will be popped
3310 // from the global stack.
3311 oop buffer[global_stack_transfer_size];
3312 int n;
3313 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3314 assert(n <= global_stack_transfer_size,
3315 "we should not pop more than the given limit");
3316 if (n > 0) {
3317 // yes, we did actually pop at least one entry
3319 statsOnly( ++_global_transfers_from; _global_pops += n );
3320 if (_cm->verbose_medium())
3321 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3322 _task_id, n);
3323 for (int i = 0; i < n; ++i) {
3324 bool success = _task_queue->push(buffer[i]);
3325 // We only call this when the local queue is empty or under a
3326 // given target limit. So, we do not expect this push to fail.
3327 assert(success, "invariant");
3328 }
3330 statsOnly( int tmp_size = _task_queue->size();
3331 if (tmp_size > _local_max_size)
3332 _local_max_size = tmp_size;
3333 _local_pushes += n );
3334 }
3336 // this operation was quite expensive, so decrease the limits
3337 decrease_limits();
3338 }
3340 void CMTask::drain_local_queue(bool partially) {
3341 if (has_aborted())
3342 return;
3344 // Decide what the target size is, depending whether we're going to
3345 // drain it partially (so that other tasks can steal if they run out
3346 // of things to do) or totally (at the very end).
3347 size_t target_size;
3348 if (partially)
3349 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3350 else
3351 target_size = 0;
3353 if (_task_queue->size() > target_size) {
3354 if (_cm->verbose_high())
3355 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3356 _task_id, target_size);
3358 oop obj;
3359 bool ret = _task_queue->pop_local(obj);
3360 while (ret) {
3361 statsOnly( ++_local_pops );
3363 if (_cm->verbose_high())
3364 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3365 (void*) obj);
3367 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3368 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3369 "invariant");
3371 scan_object(obj);
3373 if (_task_queue->size() <= target_size || has_aborted())
3374 ret = false;
3375 else
3376 ret = _task_queue->pop_local(obj);
3377 }
3379 if (_cm->verbose_high())
3380 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3381 _task_id, _task_queue->size());
3382 }
3383 }
3385 void CMTask::drain_global_stack(bool partially) {
3386 if (has_aborted())
3387 return;
3389 // We have a policy to drain the local queue before we attempt to
3390 // drain the global stack.
3391 assert(partially || _task_queue->size() == 0, "invariant");
3393 // Decide what the target size is, depending whether we're going to
3394 // drain it partially (so that other tasks can steal if they run out
3395 // of things to do) or totally (at the very end). Notice that,
3396 // because we move entries from the global stack in chunks or
3397 // because another task might be doing the same, we might in fact
3398 // drop below the target. But, this is not a problem.
3399 size_t target_size;
3400 if (partially)
3401 target_size = _cm->partial_mark_stack_size_target();
3402 else
3403 target_size = 0;
3405 if (_cm->mark_stack_size() > target_size) {
3406 if (_cm->verbose_low())
3407 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3408 _task_id, target_size);
3410 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3411 get_entries_from_global_stack();
3412 drain_local_queue(partially);
3413 }
3415 if (_cm->verbose_low())
3416 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3417 _task_id, _cm->mark_stack_size());
3418 }
3419 }
3421 // SATB Queue has several assumptions on whether to call the par or
3422 // non-par versions of the methods. this is why some of the code is
3423 // replicated. We should really get rid of the single-threaded version
3424 // of the code to simplify things.
3425 void CMTask::drain_satb_buffers() {
3426 if (has_aborted())
3427 return;
3429 // We set this so that the regular clock knows that we're in the
3430 // middle of draining buffers and doesn't set the abort flag when it
3431 // notices that SATB buffers are available for draining. It'd be
3432 // very counter productive if it did that. :-)
3433 _draining_satb_buffers = true;
3435 CMObjectClosure oc(this);
3436 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3437 if (G1CollectedHeap::use_parallel_gc_threads())
3438 satb_mq_set.set_par_closure(_task_id, &oc);
3439 else
3440 satb_mq_set.set_closure(&oc);
3442 // This keeps claiming and applying the closure to completed buffers
3443 // until we run out of buffers or we need to abort.
3444 if (G1CollectedHeap::use_parallel_gc_threads()) {
3445 while (!has_aborted() &&
3446 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3447 if (_cm->verbose_medium())
3448 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3449 statsOnly( ++_satb_buffers_processed );
3450 regular_clock_call();
3451 }
3452 } else {
3453 while (!has_aborted() &&
3454 satb_mq_set.apply_closure_to_completed_buffer()) {
3455 if (_cm->verbose_medium())
3456 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3457 statsOnly( ++_satb_buffers_processed );
3458 regular_clock_call();
3459 }
3460 }
3462 if (!concurrent() && !has_aborted()) {
3463 // We should only do this during remark.
3464 if (G1CollectedHeap::use_parallel_gc_threads())
3465 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3466 else
3467 satb_mq_set.iterate_closure_all_threads();
3468 }
3470 _draining_satb_buffers = false;
3472 assert(has_aborted() ||
3473 concurrent() ||
3474 satb_mq_set.completed_buffers_num() == 0, "invariant");
3476 if (G1CollectedHeap::use_parallel_gc_threads())
3477 satb_mq_set.set_par_closure(_task_id, NULL);
3478 else
3479 satb_mq_set.set_closure(NULL);
3481 // again, this was a potentially expensive operation, decrease the
3482 // limits to get the regular clock call early
3483 decrease_limits();
3484 }
3486 void CMTask::drain_region_stack(BitMapClosure* bc) {
3487 if (has_aborted())
3488 return;
3490 assert(_region_finger == NULL,
3491 "it should be NULL when we're not scanning a region");
3493 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3494 if (_cm->verbose_low())
3495 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3496 _task_id, _cm->region_stack_size());
3498 MemRegion mr;
3500 if (!_aborted_region.is_empty()) {
3501 mr = _aborted_region;
3502 _aborted_region = MemRegion();
3504 if (_cm->verbose_low())
3505 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3506 _task_id, mr.start(), mr.end());
3507 } else {
3508 mr = _cm->region_stack_pop_lock_free();
3509 // it returns MemRegion() if the pop fails
3510 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3511 }
3513 while (mr.start() != NULL) {
3514 if (_cm->verbose_medium())
3515 gclog_or_tty->print_cr("[%d] we are scanning region "
3516 "["PTR_FORMAT", "PTR_FORMAT")",
3517 _task_id, mr.start(), mr.end());
3519 assert(mr.end() <= _cm->finger(),
3520 "otherwise the region shouldn't be on the stack");
3521 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3522 if (_nextMarkBitMap->iterate(bc, mr)) {
3523 assert(!has_aborted(),
3524 "cannot abort the task without aborting the bitmap iteration");
3526 // We finished iterating over the region without aborting.
3527 regular_clock_call();
3528 if (has_aborted())
3529 mr = MemRegion();
3530 else {
3531 mr = _cm->region_stack_pop_lock_free();
3532 // it returns MemRegion() if the pop fails
3533 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3534 }
3535 } else {
3536 assert(has_aborted(), "currently the only way to do so");
3538 // The only way to abort the bitmap iteration is to return
3539 // false from the do_bit() method. However, inside the
3540 // do_bit() method we move the _region_finger to point to the
3541 // object currently being looked at. So, if we bail out, we
3542 // have definitely set _region_finger to something non-null.
3543 assert(_region_finger != NULL, "invariant");
3545 // Make sure that any previously aborted region has been
3546 // cleared.
3547 assert(_aborted_region.is_empty(), "aborted region not cleared");
3549 // The iteration was actually aborted. So now _region_finger
3550 // points to the address of the object we last scanned. If we
3551 // leave it there, when we restart this task, we will rescan
3552 // the object. It is easy to avoid this. We move the finger by
3553 // enough to point to the next possible object header (the
3554 // bitmap knows by how much we need to move it as it knows its
3555 // granularity).
3556 MemRegion newRegion =
3557 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3559 if (!newRegion.is_empty()) {
3560 if (_cm->verbose_low()) {
3561 gclog_or_tty->print_cr("[%d] recording unscanned region"
3562 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3563 _task_id,
3564 newRegion.start(), newRegion.end());
3565 }
3566 // Now record the part of the region we didn't scan to
3567 // make sure this task scans it later.
3568 _aborted_region = newRegion;
3569 }
3570 // break from while
3571 mr = MemRegion();
3572 }
3573 _region_finger = NULL;
3574 }
3576 if (_cm->verbose_low())
3577 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3578 _task_id, _cm->region_stack_size());
3579 }
3580 }
3582 void CMTask::print_stats() {
3583 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3584 _task_id, _calls);
3585 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3586 _elapsed_time_ms, _termination_time_ms);
3587 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3588 _step_times_ms.num(), _step_times_ms.avg(),
3589 _step_times_ms.sd());
3590 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3591 _step_times_ms.maximum(), _step_times_ms.sum());
3593 #if _MARKING_STATS_
3594 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3595 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3596 _all_clock_intervals_ms.sd());
3597 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3598 _all_clock_intervals_ms.maximum(),
3599 _all_clock_intervals_ms.sum());
3600 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3601 _clock_due_to_scanning, _clock_due_to_marking);
3602 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3603 _objs_scanned, _objs_found_on_bitmap);
3604 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3605 _local_pushes, _local_pops, _local_max_size);
3606 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3607 _global_pushes, _global_pops, _global_max_size);
3608 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3609 _global_transfers_to,_global_transfers_from);
3610 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3611 _regions_claimed, _region_stack_pops);
3612 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3613 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3614 _steal_attempts, _steals);
3615 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3616 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3617 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3618 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3619 _aborted_timed_out, _aborted_satb, _aborted_termination);
3620 #endif // _MARKING_STATS_
3621 }
3623 /*****************************************************************************
3625 The do_marking_step(time_target_ms) method is the building block
3626 of the parallel marking framework. It can be called in parallel
3627 with other invocations of do_marking_step() on different tasks
3628 (but only one per task, obviously) and concurrently with the
3629 mutator threads, or during remark, hence it eliminates the need
3630 for two versions of the code. When called during remark, it will
3631 pick up from where the task left off during the concurrent marking
3632 phase. Interestingly, tasks are also claimable during evacuation
3633 pauses too, since do_marking_step() ensures that it aborts before
3634 it needs to yield.
3636 The data structures that is uses to do marking work are the
3637 following:
3639 (1) Marking Bitmap. If there are gray objects that appear only
3640 on the bitmap (this happens either when dealing with an overflow
3641 or when the initial marking phase has simply marked the roots
3642 and didn't push them on the stack), then tasks claim heap
3643 regions whose bitmap they then scan to find gray objects. A
3644 global finger indicates where the end of the last claimed region
3645 is. A local finger indicates how far into the region a task has
3646 scanned. The two fingers are used to determine how to gray an
3647 object (i.e. whether simply marking it is OK, as it will be
3648 visited by a task in the future, or whether it needs to be also
3649 pushed on a stack).
3651 (2) Local Queue. The local queue of the task which is accessed
3652 reasonably efficiently by the task. Other tasks can steal from
3653 it when they run out of work. Throughout the marking phase, a
3654 task attempts to keep its local queue short but not totally
3655 empty, so that entries are available for stealing by other
3656 tasks. Only when there is no more work, a task will totally
3657 drain its local queue.
3659 (3) Global Mark Stack. This handles local queue overflow. During
3660 marking only sets of entries are moved between it and the local
3661 queues, as access to it requires a mutex and more fine-grain
3662 interaction with it which might cause contention. If it
3663 overflows, then the marking phase should restart and iterate
3664 over the bitmap to identify gray objects. Throughout the marking
3665 phase, tasks attempt to keep the global mark stack at a small
3666 length but not totally empty, so that entries are available for
3667 popping by other tasks. Only when there is no more work, tasks
3668 will totally drain the global mark stack.
3670 (4) Global Region Stack. Entries on it correspond to areas of
3671 the bitmap that need to be scanned since they contain gray
3672 objects. Pushes on the region stack only happen during
3673 evacuation pauses and typically correspond to areas covered by
3674 GC LABS. If it overflows, then the marking phase should restart
3675 and iterate over the bitmap to identify gray objects. Tasks will
3676 try to totally drain the region stack as soon as possible.
3678 (5) SATB Buffer Queue. This is where completed SATB buffers are
3679 made available. Buffers are regularly removed from this queue
3680 and scanned for roots, so that the queue doesn't get too
3681 long. During remark, all completed buffers are processed, as
3682 well as the filled in parts of any uncompleted buffers.
3684 The do_marking_step() method tries to abort when the time target
3685 has been reached. There are a few other cases when the
3686 do_marking_step() method also aborts:
3688 (1) When the marking phase has been aborted (after a Full GC).
3690 (2) When a global overflow (either on the global stack or the
3691 region stack) has been triggered. Before the task aborts, it
3692 will actually sync up with the other tasks to ensure that all
3693 the marking data structures (local queues, stacks, fingers etc.)
3694 are re-initialised so that when do_marking_step() completes,
3695 the marking phase can immediately restart.
3697 (3) When enough completed SATB buffers are available. The
3698 do_marking_step() method only tries to drain SATB buffers right
3699 at the beginning. So, if enough buffers are available, the
3700 marking step aborts and the SATB buffers are processed at
3701 the beginning of the next invocation.
3703 (4) To yield. when we have to yield then we abort and yield
3704 right at the end of do_marking_step(). This saves us from a lot
3705 of hassle as, by yielding we might allow a Full GC. If this
3706 happens then objects will be compacted underneath our feet, the
3707 heap might shrink, etc. We save checking for this by just
3708 aborting and doing the yield right at the end.
3710 From the above it follows that the do_marking_step() method should
3711 be called in a loop (or, otherwise, regularly) until it completes.
3713 If a marking step completes without its has_aborted() flag being
3714 true, it means it has completed the current marking phase (and
3715 also all other marking tasks have done so and have all synced up).
3717 A method called regular_clock_call() is invoked "regularly" (in
3718 sub ms intervals) throughout marking. It is this clock method that
3719 checks all the abort conditions which were mentioned above and
3720 decides when the task should abort. A work-based scheme is used to
3721 trigger this clock method: when the number of object words the
3722 marking phase has scanned or the number of references the marking
3723 phase has visited reach a given limit. Additional invocations to
3724 the method clock have been planted in a few other strategic places
3725 too. The initial reason for the clock method was to avoid calling
3726 vtime too regularly, as it is quite expensive. So, once it was in
3727 place, it was natural to piggy-back all the other conditions on it
3728 too and not constantly check them throughout the code.
3730 *****************************************************************************/
3732 void CMTask::do_marking_step(double time_target_ms) {
3733 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3734 assert(concurrent() == _cm->concurrent(), "they should be the same");
3736 assert(concurrent() || _cm->region_stack_empty(),
3737 "the region stack should have been cleared before remark");
3738 assert(concurrent() || !_cm->has_aborted_regions(),
3739 "aborted regions should have been cleared before remark");
3740 assert(_region_finger == NULL,
3741 "this should be non-null only when a region is being scanned");
3743 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3744 assert(_task_queues != NULL, "invariant");
3745 assert(_task_queue != NULL, "invariant");
3746 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3748 assert(!_claimed,
3749 "only one thread should claim this task at any one time");
3751 // OK, this doesn't safeguard again all possible scenarios, as it is
3752 // possible for two threads to set the _claimed flag at the same
3753 // time. But it is only for debugging purposes anyway and it will
3754 // catch most problems.
3755 _claimed = true;
3757 _start_time_ms = os::elapsedVTime() * 1000.0;
3758 statsOnly( _interval_start_time_ms = _start_time_ms );
3760 double diff_prediction_ms =
3761 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3762 _time_target_ms = time_target_ms - diff_prediction_ms;
3764 // set up the variables that are used in the work-based scheme to
3765 // call the regular clock method
3766 _words_scanned = 0;
3767 _refs_reached = 0;
3768 recalculate_limits();
3770 // clear all flags
3771 clear_has_aborted();
3772 _has_aborted_timed_out = false;
3773 _draining_satb_buffers = false;
3775 ++_calls;
3777 if (_cm->verbose_low())
3778 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3779 "target = %1.2lfms >>>>>>>>>>",
3780 _task_id, _calls, _time_target_ms);
3782 // Set up the bitmap and oop closures. Anything that uses them is
3783 // eventually called from this method, so it is OK to allocate these
3784 // statically.
3785 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3786 CMOopClosure oop_closure(_g1h, _cm, this);
3787 set_oop_closure(&oop_closure);
3789 if (_cm->has_overflown()) {
3790 // This can happen if the region stack or the mark stack overflows
3791 // during a GC pause and this task, after a yield point,
3792 // restarts. We have to abort as we need to get into the overflow
3793 // protocol which happens right at the end of this task.
3794 set_has_aborted();
3795 }
3797 // First drain any available SATB buffers. After this, we will not
3798 // look at SATB buffers before the next invocation of this method.
3799 // If enough completed SATB buffers are queued up, the regular clock
3800 // will abort this task so that it restarts.
3801 drain_satb_buffers();
3802 // ...then partially drain the local queue and the global stack
3803 drain_local_queue(true);
3804 drain_global_stack(true);
3806 // Then totally drain the region stack. We will not look at
3807 // it again before the next invocation of this method. Entries on
3808 // the region stack are only added during evacuation pauses, for
3809 // which we have to yield. When we do, we abort the task anyway so
3810 // it will look at the region stack again when it restarts.
3811 bitmap_closure.set_scanning_heap_region(false);
3812 drain_region_stack(&bitmap_closure);
3813 // ...then partially drain the local queue and the global stack
3814 drain_local_queue(true);
3815 drain_global_stack(true);
3817 do {
3818 if (!has_aborted() && _curr_region != NULL) {
3819 // This means that we're already holding on to a region.
3820 assert(_finger != NULL, "if region is not NULL, then the finger "
3821 "should not be NULL either");
3823 // We might have restarted this task after an evacuation pause
3824 // which might have evacuated the region we're holding on to
3825 // underneath our feet. Let's read its limit again to make sure
3826 // that we do not iterate over a region of the heap that
3827 // contains garbage (update_region_limit() will also move
3828 // _finger to the start of the region if it is found empty).
3829 update_region_limit();
3830 // We will start from _finger not from the start of the region,
3831 // as we might be restarting this task after aborting half-way
3832 // through scanning this region. In this case, _finger points to
3833 // the address where we last found a marked object. If this is a
3834 // fresh region, _finger points to start().
3835 MemRegion mr = MemRegion(_finger, _region_limit);
3837 if (_cm->verbose_low())
3838 gclog_or_tty->print_cr("[%d] we're scanning part "
3839 "["PTR_FORMAT", "PTR_FORMAT") "
3840 "of region "PTR_FORMAT,
3841 _task_id, _finger, _region_limit, _curr_region);
3843 // Let's iterate over the bitmap of the part of the
3844 // region that is left.
3845 bitmap_closure.set_scanning_heap_region(true);
3846 if (mr.is_empty() ||
3847 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3848 // We successfully completed iterating over the region. Now,
3849 // let's give up the region.
3850 giveup_current_region();
3851 regular_clock_call();
3852 } else {
3853 assert(has_aborted(), "currently the only way to do so");
3854 // The only way to abort the bitmap iteration is to return
3855 // false from the do_bit() method. However, inside the
3856 // do_bit() method we move the _finger to point to the
3857 // object currently being looked at. So, if we bail out, we
3858 // have definitely set _finger to something non-null.
3859 assert(_finger != NULL, "invariant");
3861 // Region iteration was actually aborted. So now _finger
3862 // points to the address of the object we last scanned. If we
3863 // leave it there, when we restart this task, we will rescan
3864 // the object. It is easy to avoid this. We move the finger by
3865 // enough to point to the next possible object header (the
3866 // bitmap knows by how much we need to move it as it knows its
3867 // granularity).
3868 assert(_finger < _region_limit, "invariant");
3869 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3870 // Check if bitmap iteration was aborted while scanning the last object
3871 if (new_finger >= _region_limit) {
3872 giveup_current_region();
3873 } else {
3874 move_finger_to(new_finger);
3875 }
3876 }
3877 }
3878 // At this point we have either completed iterating over the
3879 // region we were holding on to, or we have aborted.
3881 // We then partially drain the local queue and the global stack.
3882 // (Do we really need this?)
3883 drain_local_queue(true);
3884 drain_global_stack(true);
3886 // Read the note on the claim_region() method on why it might
3887 // return NULL with potentially more regions available for
3888 // claiming and why we have to check out_of_regions() to determine
3889 // whether we're done or not.
3890 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3891 // We are going to try to claim a new region. We should have
3892 // given up on the previous one.
3893 // Separated the asserts so that we know which one fires.
3894 assert(_curr_region == NULL, "invariant");
3895 assert(_finger == NULL, "invariant");
3896 assert(_region_limit == NULL, "invariant");
3897 if (_cm->verbose_low())
3898 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3899 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3900 if (claimed_region != NULL) {
3901 // Yes, we managed to claim one
3902 statsOnly( ++_regions_claimed );
3904 if (_cm->verbose_low())
3905 gclog_or_tty->print_cr("[%d] we successfully claimed "
3906 "region "PTR_FORMAT,
3907 _task_id, claimed_region);
3909 setup_for_region(claimed_region);
3910 assert(_curr_region == claimed_region, "invariant");
3911 }
3912 // It is important to call the regular clock here. It might take
3913 // a while to claim a region if, for example, we hit a large
3914 // block of empty regions. So we need to call the regular clock
3915 // method once round the loop to make sure it's called
3916 // frequently enough.
3917 regular_clock_call();
3918 }
3920 if (!has_aborted() && _curr_region == NULL) {
3921 assert(_cm->out_of_regions(),
3922 "at this point we should be out of regions");
3923 }
3924 } while ( _curr_region != NULL && !has_aborted());
3926 if (!has_aborted()) {
3927 // We cannot check whether the global stack is empty, since other
3928 // tasks might be pushing objects to it concurrently. We also cannot
3929 // check if the region stack is empty because if a thread is aborting
3930 // it can push a partially done region back.
3931 assert(_cm->out_of_regions(),
3932 "at this point we should be out of regions");
3934 if (_cm->verbose_low())
3935 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3937 // Try to reduce the number of available SATB buffers so that
3938 // remark has less work to do.
3939 drain_satb_buffers();
3940 }
3942 // Since we've done everything else, we can now totally drain the
3943 // local queue and global stack.
3944 drain_local_queue(false);
3945 drain_global_stack(false);
3947 // Attempt at work stealing from other task's queues.
3948 if (!has_aborted()) {
3949 // We have not aborted. This means that we have finished all that
3950 // we could. Let's try to do some stealing...
3952 // We cannot check whether the global stack is empty, since other
3953 // tasks might be pushing objects to it concurrently. We also cannot
3954 // check if the region stack is empty because if a thread is aborting
3955 // it can push a partially done region back.
3956 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3957 "only way to reach here");
3959 if (_cm->verbose_low())
3960 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3962 while (!has_aborted()) {
3963 oop obj;
3964 statsOnly( ++_steal_attempts );
3966 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3967 if (_cm->verbose_medium())
3968 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3969 _task_id, (void*) obj);
3971 statsOnly( ++_steals );
3973 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3974 "any stolen object should be marked");
3975 scan_object(obj);
3977 // And since we're towards the end, let's totally drain the
3978 // local queue and global stack.
3979 drain_local_queue(false);
3980 drain_global_stack(false);
3981 } else {
3982 break;
3983 }
3984 }
3985 }
3987 // We still haven't aborted. Now, let's try to get into the
3988 // termination protocol.
3989 if (!has_aborted()) {
3990 // We cannot check whether the global stack is empty, since other
3991 // tasks might be concurrently pushing objects on it. We also cannot
3992 // check if the region stack is empty because if a thread is aborting
3993 // it can push a partially done region back.
3994 // Separated the asserts so that we know which one fires.
3995 assert(_cm->out_of_regions(), "only way to reach here");
3996 assert(_task_queue->size() == 0, "only way to reach here");
3998 if (_cm->verbose_low())
3999 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4001 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4002 // The CMTask class also extends the TerminatorTerminator class,
4003 // hence its should_exit_termination() method will also decide
4004 // whether to exit the termination protocol or not.
4005 bool finished = _cm->terminator()->offer_termination(this);
4006 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4007 _termination_time_ms +=
4008 termination_end_time_ms - _termination_start_time_ms;
4010 if (finished) {
4011 // We're all done.
4013 if (_task_id == 0) {
4014 // let's allow task 0 to do this
4015 if (concurrent()) {
4016 assert(_cm->concurrent_marking_in_progress(), "invariant");
4017 // we need to set this to false before the next
4018 // safepoint. This way we ensure that the marking phase
4019 // doesn't observe any more heap expansions.
4020 _cm->clear_concurrent_marking_in_progress();
4021 }
4022 }
4024 // We can now guarantee that the global stack is empty, since
4025 // all other tasks have finished. We separated the guarantees so
4026 // that, if a condition is false, we can immediately find out
4027 // which one.
4028 guarantee(_cm->out_of_regions(), "only way to reach here");
4029 guarantee(_aborted_region.is_empty(), "only way to reach here");
4030 guarantee(_cm->region_stack_empty(), "only way to reach here");
4031 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4032 guarantee(_task_queue->size() == 0, "only way to reach here");
4033 guarantee(!_cm->has_overflown(), "only way to reach here");
4034 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4035 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4037 if (_cm->verbose_low())
4038 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4039 } else {
4040 // Apparently there's more work to do. Let's abort this task. It
4041 // will restart it and we can hopefully find more things to do.
4043 if (_cm->verbose_low())
4044 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4046 set_has_aborted();
4047 statsOnly( ++_aborted_termination );
4048 }
4049 }
4051 // Mainly for debugging purposes to make sure that a pointer to the
4052 // closure which was statically allocated in this frame doesn't
4053 // escape it by accident.
4054 set_oop_closure(NULL);
4055 double end_time_ms = os::elapsedVTime() * 1000.0;
4056 double elapsed_time_ms = end_time_ms - _start_time_ms;
4057 // Update the step history.
4058 _step_times_ms.add(elapsed_time_ms);
4060 if (has_aborted()) {
4061 // The task was aborted for some reason.
4063 statsOnly( ++_aborted );
4065 if (_has_aborted_timed_out) {
4066 double diff_ms = elapsed_time_ms - _time_target_ms;
4067 // Keep statistics of how well we did with respect to hitting
4068 // our target only if we actually timed out (if we aborted for
4069 // other reasons, then the results might get skewed).
4070 _marking_step_diffs_ms.add(diff_ms);
4071 }
4073 if (_cm->has_overflown()) {
4074 // This is the interesting one. We aborted because a global
4075 // overflow was raised. This means we have to restart the
4076 // marking phase and start iterating over regions. However, in
4077 // order to do this we have to make sure that all tasks stop
4078 // what they are doing and re-initialise in a safe manner. We
4079 // will achieve this with the use of two barrier sync points.
4081 if (_cm->verbose_low())
4082 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4084 _cm->enter_first_sync_barrier(_task_id);
4085 // When we exit this sync barrier we know that all tasks have
4086 // stopped doing marking work. So, it's now safe to
4087 // re-initialise our data structures. At the end of this method,
4088 // task 0 will clear the global data structures.
4090 statsOnly( ++_aborted_overflow );
4092 // We clear the local state of this task...
4093 clear_region_fields();
4095 // ...and enter the second barrier.
4096 _cm->enter_second_sync_barrier(_task_id);
4097 // At this point everything has bee re-initialised and we're
4098 // ready to restart.
4099 }
4101 if (_cm->verbose_low()) {
4102 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4103 "elapsed = %1.2lfms <<<<<<<<<<",
4104 _task_id, _time_target_ms, elapsed_time_ms);
4105 if (_cm->has_aborted())
4106 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4107 _task_id);
4108 }
4109 } else {
4110 if (_cm->verbose_low())
4111 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4112 "elapsed = %1.2lfms <<<<<<<<<<",
4113 _task_id, _time_target_ms, elapsed_time_ms);
4114 }
4116 _claimed = false;
4117 }
4119 CMTask::CMTask(int task_id,
4120 ConcurrentMark* cm,
4121 CMTaskQueue* task_queue,
4122 CMTaskQueueSet* task_queues)
4123 : _g1h(G1CollectedHeap::heap()),
4124 _task_id(task_id), _cm(cm),
4125 _claimed(false),
4126 _nextMarkBitMap(NULL), _hash_seed(17),
4127 _task_queue(task_queue),
4128 _task_queues(task_queues),
4129 _oop_closure(NULL),
4130 _aborted_region(MemRegion()) {
4131 guarantee(task_queue != NULL, "invariant");
4132 guarantee(task_queues != NULL, "invariant");
4134 statsOnly( _clock_due_to_scanning = 0;
4135 _clock_due_to_marking = 0 );
4137 _marking_step_diffs_ms.add(0.5);
4138 }