Mon, 05 Apr 2010 12:19:22 -0400
6940310: G1: MT-unsafe calls to CM::region_stack_push() / CM::region_stack_pop()
Summary: Calling the methods region_stack_push() and region_stack_pop() concurrent is not MT-safe. The assumption is that we will only call region_stack_push() during a GC pause and region_stack_pop() during marking. Unfortunately, we also call region_stack_push() during marking which seems to be introducing subtle marking failures. This change introduces lock-based methods for pushing / popping to be called during marking.
Reviewed-by: iveresov, johnc
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 // Currently we do not call this at all. Normally we would call it
301 // during the concurrent marking / remark phases but we now call
302 // the lock-based version instead. But we might want to resurrect this
303 // code in the future. So, we'll leave it here commented out.
304 #if 0
305 MemRegion CMRegionStack::pop() {
306 while (true) {
307 // Otherwise...
308 jint index = _index;
310 if (index == 0) {
311 return MemRegion();
312 }
313 jint next_index = index-1;
314 jint res = Atomic::cmpxchg(next_index, &_index, index);
315 if (res == index) {
316 MemRegion mr = _base[next_index];
317 if (mr.start() != NULL) {
318 assert(mr.end() != NULL, "invariant");
319 assert(mr.word_size() > 0, "invariant");
320 return mr;
321 } else {
322 // that entry was invalidated... let's skip it
323 assert(mr.end() == NULL, "invariant");
324 }
325 }
326 // Otherwise, we need to try again.
327 }
328 }
329 #endif // 0
331 void CMRegionStack::push_with_lock(MemRegion mr) {
332 assert(mr.word_size() > 0, "Precondition");
333 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
335 if (isFull()) {
336 _overflow = true;
337 return;
338 }
340 _base[_index] = mr;
341 _index += 1;
342 }
344 MemRegion CMRegionStack::pop_with_lock() {
345 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
347 while (true) {
348 if (_index == 0) {
349 return MemRegion();
350 }
351 _index -= 1;
353 MemRegion mr = _base[_index];
354 if (mr.start() != NULL) {
355 assert(mr.end() != NULL, "invariant");
356 assert(mr.word_size() > 0, "invariant");
357 return mr;
358 } else {
359 // that entry was invalidated... let's skip it
360 assert(mr.end() == NULL, "invariant");
361 }
362 }
363 }
365 bool CMRegionStack::invalidate_entries_into_cset() {
366 bool result = false;
367 G1CollectedHeap* g1h = G1CollectedHeap::heap();
368 for (int i = 0; i < _oops_do_bound; ++i) {
369 MemRegion mr = _base[i];
370 if (mr.start() != NULL) {
371 assert(mr.end() != NULL, "invariant");
372 assert(mr.word_size() > 0, "invariant");
373 HeapRegion* hr = g1h->heap_region_containing(mr.start());
374 assert(hr != NULL, "invariant");
375 if (hr->in_collection_set()) {
376 // The region points into the collection set
377 _base[i] = MemRegion();
378 result = true;
379 }
380 } else {
381 // that entry was invalidated... let's skip it
382 assert(mr.end() == NULL, "invariant");
383 }
384 }
385 return result;
386 }
388 template<class OopClosureClass>
389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
391 || SafepointSynchronize::is_at_safepoint(),
392 "Drain recursion must be yield-safe.");
393 bool res = true;
394 debug_only(_drain_in_progress = true);
395 debug_only(_drain_in_progress_yields = yield_after);
396 while (!isEmpty()) {
397 oop newOop = pop();
398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
399 assert(newOop->is_oop(), "Expected an oop");
400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
401 "only grey objects on this stack");
402 // iterate over the oops in this oop, marking and pushing
403 // the ones in CMS generation.
404 newOop->oop_iterate(cl);
405 if (yield_after && _cm->do_yield_check()) {
406 res = false; break;
407 }
408 }
409 debug_only(_drain_in_progress = false);
410 return res;
411 }
413 void CMMarkStack::oops_do(OopClosure* f) {
414 if (_index == 0) return;
415 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
416 "Bound must be set.");
417 for (int i = 0; i < _oops_do_bound; i++) {
418 f->do_oop(&_base[i]);
419 }
420 _oops_do_bound = -1;
421 }
423 bool ConcurrentMark::not_yet_marked(oop obj) const {
424 return (_g1h->is_obj_ill(obj)
425 || (_g1h->is_in_permanent(obj)
426 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
427 }
429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
431 #endif // _MSC_VER
433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
434 int max_regions) :
435 _markBitMap1(rs, MinObjAlignment - 1),
436 _markBitMap2(rs, MinObjAlignment - 1),
438 _parallel_marking_threads(0),
439 _sleep_factor(0.0),
440 _marking_task_overhead(1.0),
441 _cleanup_sleep_factor(0.0),
442 _cleanup_task_overhead(1.0),
443 _region_bm(max_regions, false /* in_resource_area*/),
444 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
445 CardTableModRefBS::card_shift,
446 false /* in_resource_area*/),
447 _prevMarkBitMap(&_markBitMap1),
448 _nextMarkBitMap(&_markBitMap2),
449 _at_least_one_mark_complete(false),
451 _markStack(this),
452 _regionStack(),
453 // _finger set in set_non_marking_state
455 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
456 // _active_tasks set in set_non_marking_state
457 // _tasks set inside the constructor
458 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
459 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _concurrent_marking_in_progress(false),
466 _should_gray_objects(false),
468 // _verbose_level set below
470 _init_times(),
471 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
472 _cleanup_times(),
473 _total_counting_time(0.0),
474 _total_rs_scrub_time(0.0),
476 _parallel_workers(NULL)
477 {
478 CMVerboseLevel verbose_level =
479 (CMVerboseLevel) G1MarkingVerboseLevel;
480 if (verbose_level < no_verbose)
481 verbose_level = no_verbose;
482 if (verbose_level > high_verbose)
483 verbose_level = high_verbose;
484 _verbose_level = verbose_level;
486 if (verbose_low())
487 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
488 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
490 _markStack.allocate(MarkStackSize);
491 _regionStack.allocate(G1MarkRegionStackSize);
493 // Create & start a ConcurrentMark thread.
494 _cmThread = new ConcurrentMarkThread(this);
495 assert(cmThread() != NULL, "CM Thread should have been created");
496 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
498 _g1h = G1CollectedHeap::heap();
499 assert(CGC_lock != NULL, "Where's the CGC_lock?");
500 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
501 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
503 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
504 satb_qs.set_buffer_size(G1SATBBufferSize);
506 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
507 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
508 for (int i = 0 ; i < size; i++) {
509 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
510 }
512 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
513 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _active_tasks = _max_task_num;
517 for (int i = 0; i < (int) _max_task_num; ++i) {
518 CMTaskQueue* task_queue = new CMTaskQueue();
519 task_queue->initialize();
520 _task_queues->register_queue(i, task_queue);
522 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
523 _accum_task_vtime[i] = 0.0;
524 }
526 if (ConcGCThreads > ParallelGCThreads) {
527 vm_exit_during_initialization("Can't have more ConcGCThreads "
528 "than ParallelGCThreads.");
529 }
530 if (ParallelGCThreads == 0) {
531 // if we are not running with any parallel GC threads we will not
532 // spawn any marking threads either
533 _parallel_marking_threads = 0;
534 _sleep_factor = 0.0;
535 _marking_task_overhead = 1.0;
536 } else {
537 if (ConcGCThreads > 0) {
538 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
539 // if both are set
541 _parallel_marking_threads = ConcGCThreads;
542 _sleep_factor = 0.0;
543 _marking_task_overhead = 1.0;
544 } else if (G1MarkingOverheadPercent > 0) {
545 // we will calculate the number of parallel marking threads
546 // based on a target overhead with respect to the soft real-time
547 // goal
549 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
550 double overall_cm_overhead =
551 (double) MaxGCPauseMillis * marking_overhead /
552 (double) GCPauseIntervalMillis;
553 double cpu_ratio = 1.0 / (double) os::processor_count();
554 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
555 double marking_task_overhead =
556 overall_cm_overhead / marking_thread_num *
557 (double) os::processor_count();
558 double sleep_factor =
559 (1.0 - marking_task_overhead) / marking_task_overhead;
561 _parallel_marking_threads = (size_t) marking_thread_num;
562 _sleep_factor = sleep_factor;
563 _marking_task_overhead = marking_task_overhead;
564 } else {
565 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
566 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0;
568 }
570 if (parallel_marking_threads() > 1)
571 _cleanup_task_overhead = 1.0;
572 else
573 _cleanup_task_overhead = marking_task_overhead();
574 _cleanup_sleep_factor =
575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
577 #if 0
578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
583 #endif
585 guarantee(parallel_marking_threads() > 0, "peace of mind");
586 _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
587 (int) parallel_marking_threads(), false, true);
588 if (_parallel_workers == NULL)
589 vm_exit_during_initialization("Failed necessary allocation.");
590 }
592 // so that the call below can read a sensible value
593 _heap_start = (HeapWord*) rs.base();
594 set_non_marking_state();
595 }
597 void ConcurrentMark::update_g1_committed(bool force) {
598 // If concurrent marking is not in progress, then we do not need to
599 // update _heap_end. This has a subtle and important
600 // side-effect. Imagine that two evacuation pauses happen between
601 // marking completion and remark. The first one can grow the
602 // heap (hence now the finger is below the heap end). Then, the
603 // second one could unnecessarily push regions on the region
604 // stack. This causes the invariant that the region stack is empty
605 // at the beginning of remark to be false. By ensuring that we do
606 // not observe heap expansions after marking is complete, then we do
607 // not have this problem.
608 if (!concurrent_marking_in_progress() && !force)
609 return;
611 MemRegion committed = _g1h->g1_committed();
612 assert(committed.start() == _heap_start, "start shouldn't change");
613 HeapWord* new_end = committed.end();
614 if (new_end > _heap_end) {
615 // The heap has been expanded.
617 _heap_end = new_end;
618 }
619 // Notice that the heap can also shrink. However, this only happens
620 // during a Full GC (at least currently) and the entire marking
621 // phase will bail out and the task will not be restarted. So, let's
622 // do nothing.
623 }
625 void ConcurrentMark::reset() {
626 // Starting values for these two. This should be called in a STW
627 // phase. CM will be notified of any future g1_committed expansions
628 // will be at the end of evacuation pauses, when tasks are
629 // inactive.
630 MemRegion committed = _g1h->g1_committed();
631 _heap_start = committed.start();
632 _heap_end = committed.end();
634 // Separated the asserts so that we know which one fires.
635 assert(_heap_start != NULL, "heap bounds should look ok");
636 assert(_heap_end != NULL, "heap bounds should look ok");
637 assert(_heap_start < _heap_end, "heap bounds should look ok");
639 // reset all the marking data structures and any necessary flags
640 clear_marking_state();
642 if (verbose_low())
643 gclog_or_tty->print_cr("[global] resetting");
645 // We do reset all of them, since different phases will use
646 // different number of active threads. So, it's easiest to have all
647 // of them ready.
648 for (int i = 0; i < (int) _max_task_num; ++i)
649 _tasks[i]->reset(_nextMarkBitMap);
651 // we need this to make sure that the flag is on during the evac
652 // pause with initial mark piggy-backed
653 set_concurrent_marking_in_progress();
654 }
656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
657 assert(active_tasks <= _max_task_num, "we should not have more");
659 _active_tasks = active_tasks;
660 // Need to update the three data structures below according to the
661 // number of active threads for this phase.
662 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
663 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
664 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
666 _concurrent = concurrent;
667 // We propagate this to all tasks, not just the active ones.
668 for (int i = 0; i < (int) _max_task_num; ++i)
669 _tasks[i]->set_concurrent(concurrent);
671 if (concurrent) {
672 set_concurrent_marking_in_progress();
673 } else {
674 // We currently assume that the concurrent flag has been set to
675 // false before we start remark. At this point we should also be
676 // in a STW phase.
677 assert(!concurrent_marking_in_progress(), "invariant");
678 assert(_finger == _heap_end, "only way to get here");
679 update_g1_committed(true);
680 }
681 }
683 void ConcurrentMark::set_non_marking_state() {
684 // We set the global marking state to some default values when we're
685 // not doing marking.
686 clear_marking_state();
687 _active_tasks = 0;
688 clear_concurrent_marking_in_progress();
689 }
691 ConcurrentMark::~ConcurrentMark() {
692 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
693 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
694 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
695 _par_cleanup_thread_state);
697 for (int i = 0; i < (int) _max_task_num; ++i) {
698 delete _task_queues->queue(i);
699 delete _tasks[i];
700 }
701 delete _task_queues;
702 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
703 }
705 // This closure is used to mark refs into the g1 generation
706 // from external roots in the CMS bit map.
707 // Called at the first checkpoint.
708 //
710 void ConcurrentMark::clearNextBitmap() {
711 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
713 // clear the mark bitmap (no grey objects to start with).
714 // We need to do this in chunks and offer to yield in between
715 // each chunk.
716 HeapWord* start = _nextMarkBitMap->startWord();
717 HeapWord* end = _nextMarkBitMap->endWord();
718 HeapWord* cur = start;
719 size_t chunkSize = M;
720 while (cur < end) {
721 HeapWord* next = cur + chunkSize;
722 if (next > end)
723 next = end;
724 MemRegion mr(cur,next);
725 _nextMarkBitMap->clearRange(mr);
726 cur = next;
727 do_yield_check();
728 }
729 }
731 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
732 public:
733 bool doHeapRegion(HeapRegion* r) {
734 if (!r->continuesHumongous()) {
735 r->note_start_of_marking(true);
736 }
737 return false;
738 }
739 };
741 void ConcurrentMark::checkpointRootsInitialPre() {
742 G1CollectedHeap* g1h = G1CollectedHeap::heap();
743 G1CollectorPolicy* g1p = g1h->g1_policy();
745 _has_aborted = false;
747 if (G1PrintReachableAtInitialMark) {
748 print_reachable(true, "before");
749 }
751 // Initialise marking structures. This has to be done in a STW phase.
752 reset();
753 }
755 class CMMarkRootsClosure: public OopsInGenClosure {
756 private:
757 ConcurrentMark* _cm;
758 G1CollectedHeap* _g1h;
759 bool _do_barrier;
761 public:
762 CMMarkRootsClosure(ConcurrentMark* cm,
763 G1CollectedHeap* g1h,
764 bool do_barrier) : _cm(cm), _g1h(g1h),
765 _do_barrier(do_barrier) { }
767 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
768 virtual void do_oop( oop* p) { do_oop_work(p); }
770 template <class T> void do_oop_work(T* p) {
771 T heap_oop = oopDesc::load_heap_oop(p);
772 if (!oopDesc::is_null(heap_oop)) {
773 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
774 assert(obj->is_oop() || obj->mark() == NULL,
775 "expected an oop, possibly with mark word displaced");
776 HeapWord* addr = (HeapWord*)obj;
777 if (_g1h->is_in_g1_reserved(addr)) {
778 _cm->grayRoot(obj);
779 }
780 }
781 if (_do_barrier) {
782 assert(!_g1h->is_in_g1_reserved(p),
783 "Should be called on external roots");
784 do_barrier(p);
785 }
786 }
787 };
789 void ConcurrentMark::checkpointRootsInitialPost() {
790 G1CollectedHeap* g1h = G1CollectedHeap::heap();
792 // For each region note start of marking.
793 NoteStartOfMarkHRClosure startcl;
794 g1h->heap_region_iterate(&startcl);
796 // Start weak-reference discovery.
797 ReferenceProcessor* rp = g1h->ref_processor();
798 rp->verify_no_references_recorded();
799 rp->enable_discovery(); // enable ("weak") refs discovery
800 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
802 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
803 // This is the start of the marking cycle, we're expected all
804 // threads to have SATB queues with active set to false.
805 satb_mq_set.set_active_all_threads(true, /* new active value */
806 false /* expected_active */);
808 // update_g1_committed() will be called at the end of an evac pause
809 // when marking is on. So, it's also called at the end of the
810 // initial-mark pause to update the heap end, if the heap expands
811 // during it. No need to call it here.
812 }
814 // Checkpoint the roots into this generation from outside
815 // this generation. [Note this initial checkpoint need only
816 // be approximate -- we'll do a catch up phase subsequently.]
817 void ConcurrentMark::checkpointRootsInitial() {
818 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
819 G1CollectedHeap* g1h = G1CollectedHeap::heap();
821 double start = os::elapsedTime();
823 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
824 g1p->record_concurrent_mark_init_start();
825 checkpointRootsInitialPre();
827 // YSR: when concurrent precleaning is in place, we'll
828 // need to clear the cached card table here
830 ResourceMark rm;
831 HandleMark hm;
833 g1h->ensure_parsability(false);
834 g1h->perm_gen()->save_marks();
836 CMMarkRootsClosure notOlder(this, g1h, false);
837 CMMarkRootsClosure older(this, g1h, true);
839 g1h->set_marking_started();
840 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
842 g1h->process_strong_roots(true, // activate StrongRootsScope
843 false, // fake perm gen collection
844 SharedHeap::SO_AllClasses,
845 ¬Older, // Regular roots
846 NULL, // do not visit active blobs
847 &older // Perm Gen Roots
848 );
849 checkpointRootsInitialPost();
851 // Statistics.
852 double end = os::elapsedTime();
853 _init_times.add((end - start) * 1000.0);
855 g1p->record_concurrent_mark_init_end();
856 }
858 /*
859 Notice that in the next two methods, we actually leave the STS
860 during the barrier sync and join it immediately afterwards. If we
861 do not do this, this then the following deadlock can occur: one
862 thread could be in the barrier sync code, waiting for the other
863 thread to also sync up, whereas another one could be trying to
864 yield, while also waiting for the other threads to sync up too.
866 Because the thread that does the sync barrier has left the STS, it
867 is possible to be suspended for a Full GC or an evacuation pause
868 could occur. This is actually safe, since the entering the sync
869 barrier is one of the last things do_marking_step() does, and it
870 doesn't manipulate any data structures afterwards.
871 */
873 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
874 if (verbose_low())
875 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
877 ConcurrentGCThread::stsLeave();
878 _first_overflow_barrier_sync.enter();
879 ConcurrentGCThread::stsJoin();
880 // at this point everyone should have synced up and not be doing any
881 // more work
883 if (verbose_low())
884 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
886 // let task 0 do this
887 if (task_num == 0) {
888 // task 0 is responsible for clearing the global data structures
889 clear_marking_state();
891 if (PrintGC) {
892 gclog_or_tty->date_stamp(PrintGCDateStamps);
893 gclog_or_tty->stamp(PrintGCTimeStamps);
894 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
895 }
896 }
898 // after this, each task should reset its own data structures then
899 // then go into the second barrier
900 }
902 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
903 if (verbose_low())
904 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
906 ConcurrentGCThread::stsLeave();
907 _second_overflow_barrier_sync.enter();
908 ConcurrentGCThread::stsJoin();
909 // at this point everything should be re-initialised and ready to go
911 if (verbose_low())
912 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
913 }
915 void ConcurrentMark::grayRoot(oop p) {
916 HeapWord* addr = (HeapWord*) p;
917 // We can't really check against _heap_start and _heap_end, since it
918 // is possible during an evacuation pause with piggy-backed
919 // initial-mark that the committed space is expanded during the
920 // pause without CM observing this change. So the assertions below
921 // is a bit conservative; but better than nothing.
922 assert(_g1h->g1_committed().contains(addr),
923 "address should be within the heap bounds");
925 if (!_nextMarkBitMap->isMarked(addr))
926 _nextMarkBitMap->parMark(addr);
927 }
929 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
930 // The objects on the region have already been marked "in bulk" by
931 // the caller. We only need to decide whether to push the region on
932 // the region stack or not.
934 if (!concurrent_marking_in_progress() || !_should_gray_objects)
935 // We're done with marking and waiting for remark. We do not need to
936 // push anything else on the region stack.
937 return;
939 HeapWord* finger = _finger;
941 if (verbose_low())
942 gclog_or_tty->print_cr("[global] attempting to push "
943 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
944 PTR_FORMAT, mr.start(), mr.end(), finger);
946 if (mr.start() < finger) {
947 // The finger is always heap region aligned and it is not possible
948 // for mr to span heap regions.
949 assert(mr.end() <= finger, "invariant");
951 // Separated the asserts so that we know which one fires.
952 assert(mr.start() <= mr.end(),
953 "region boundaries should fall within the committed space");
954 assert(_heap_start <= mr.start(),
955 "region boundaries should fall within the committed space");
956 assert(mr.end() <= _heap_end,
957 "region boundaries should fall within the committed space");
958 if (verbose_low())
959 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
960 "below the finger, pushing it",
961 mr.start(), mr.end());
963 if (!region_stack_push(mr)) {
964 if (verbose_low())
965 gclog_or_tty->print_cr("[global] region stack has overflown.");
966 }
967 }
968 }
970 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
971 // The object is not marked by the caller. We need to at least mark
972 // it and maybe push in on the stack.
974 HeapWord* addr = (HeapWord*)p;
975 if (!_nextMarkBitMap->isMarked(addr)) {
976 // We definitely need to mark it, irrespective whether we bail out
977 // because we're done with marking.
978 if (_nextMarkBitMap->parMark(addr)) {
979 if (!concurrent_marking_in_progress() || !_should_gray_objects)
980 // If we're done with concurrent marking and we're waiting for
981 // remark, then we're not pushing anything on the stack.
982 return;
984 // No OrderAccess:store_load() is needed. It is implicit in the
985 // CAS done in parMark(addr) above
986 HeapWord* finger = _finger;
988 if (addr < finger) {
989 if (!mark_stack_push(oop(addr))) {
990 if (verbose_low())
991 gclog_or_tty->print_cr("[global] global stack overflow "
992 "during parMark");
993 }
994 }
995 }
996 }
997 }
999 class CMConcurrentMarkingTask: public AbstractGangTask {
1000 private:
1001 ConcurrentMark* _cm;
1002 ConcurrentMarkThread* _cmt;
1004 public:
1005 void work(int worker_i) {
1006 assert(Thread::current()->is_ConcurrentGC_thread(),
1007 "this should only be done by a conc GC thread");
1009 double start_vtime = os::elapsedVTime();
1011 ConcurrentGCThread::stsJoin();
1013 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1014 CMTask* the_task = _cm->task(worker_i);
1015 the_task->record_start_time();
1016 if (!_cm->has_aborted()) {
1017 do {
1018 double start_vtime_sec = os::elapsedVTime();
1019 double start_time_sec = os::elapsedTime();
1020 the_task->do_marking_step(10.0);
1021 double end_time_sec = os::elapsedTime();
1022 double end_vtime_sec = os::elapsedVTime();
1023 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1024 double elapsed_time_sec = end_time_sec - start_time_sec;
1025 _cm->clear_has_overflown();
1027 bool ret = _cm->do_yield_check(worker_i);
1029 jlong sleep_time_ms;
1030 if (!_cm->has_aborted() && the_task->has_aborted()) {
1031 sleep_time_ms =
1032 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1033 ConcurrentGCThread::stsLeave();
1034 os::sleep(Thread::current(), sleep_time_ms, false);
1035 ConcurrentGCThread::stsJoin();
1036 }
1037 double end_time2_sec = os::elapsedTime();
1038 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1040 #if 0
1041 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1042 "overhead %1.4lf",
1043 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1044 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1045 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1046 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1047 #endif
1048 } while (!_cm->has_aborted() && the_task->has_aborted());
1049 }
1050 the_task->record_end_time();
1051 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1053 ConcurrentGCThread::stsLeave();
1055 double end_vtime = os::elapsedVTime();
1056 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1057 }
1059 CMConcurrentMarkingTask(ConcurrentMark* cm,
1060 ConcurrentMarkThread* cmt) :
1061 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1063 ~CMConcurrentMarkingTask() { }
1064 };
1066 void ConcurrentMark::markFromRoots() {
1067 // we might be tempted to assert that:
1068 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1069 // "inconsistent argument?");
1070 // However that wouldn't be right, because it's possible that
1071 // a safepoint is indeed in progress as a younger generation
1072 // stop-the-world GC happens even as we mark in this generation.
1074 _restart_for_overflow = false;
1076 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1078 CMConcurrentMarkingTask markingTask(this, cmThread());
1079 if (parallel_marking_threads() > 0)
1080 _parallel_workers->run_task(&markingTask);
1081 else
1082 markingTask.work(0);
1083 print_stats();
1084 }
1086 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1087 // world is stopped at this checkpoint
1088 assert(SafepointSynchronize::is_at_safepoint(),
1089 "world should be stopped");
1090 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1092 // If a full collection has happened, we shouldn't do this.
1093 if (has_aborted()) {
1094 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1095 return;
1096 }
1098 if (VerifyDuringGC) {
1099 HandleMark hm; // handle scope
1100 gclog_or_tty->print(" VerifyDuringGC:(before)");
1101 Universe::heap()->prepare_for_verify();
1102 Universe::verify(true, false, true);
1103 }
1105 G1CollectorPolicy* g1p = g1h->g1_policy();
1106 g1p->record_concurrent_mark_remark_start();
1108 double start = os::elapsedTime();
1110 checkpointRootsFinalWork();
1112 double mark_work_end = os::elapsedTime();
1114 weakRefsWork(clear_all_soft_refs);
1116 if (has_overflown()) {
1117 // Oops. We overflowed. Restart concurrent marking.
1118 _restart_for_overflow = true;
1119 // Clear the flag. We do not need it any more.
1120 clear_has_overflown();
1121 if (G1TraceMarkStackOverflow)
1122 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1123 } else {
1124 // We're done with marking.
1125 // This is the end of the marking cycle, we're expected all
1126 // threads to have SATB queues with active set to true.
1127 JavaThread::satb_mark_queue_set().set_active_all_threads(
1128 false, /* new active value */
1129 true /* expected_active */);
1131 if (VerifyDuringGC) {
1132 HandleMark hm; // handle scope
1133 gclog_or_tty->print(" VerifyDuringGC:(after)");
1134 Universe::heap()->prepare_for_verify();
1135 Universe::heap()->verify(/* allow_dirty */ true,
1136 /* silent */ false,
1137 /* use_prev_marking */ false);
1138 }
1139 }
1141 #if VERIFY_OBJS_PROCESSED
1142 _scan_obj_cl.objs_processed = 0;
1143 ThreadLocalObjQueue::objs_enqueued = 0;
1144 #endif
1146 // Statistics
1147 double now = os::elapsedTime();
1148 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1149 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1150 _remark_times.add((now - start) * 1000.0);
1152 g1p->record_concurrent_mark_remark_end();
1153 }
1156 #define CARD_BM_TEST_MODE 0
1158 class CalcLiveObjectsClosure: public HeapRegionClosure {
1160 CMBitMapRO* _bm;
1161 ConcurrentMark* _cm;
1162 bool _changed;
1163 bool _yield;
1164 size_t _words_done;
1165 size_t _tot_live;
1166 size_t _tot_used;
1167 size_t _regions_done;
1168 double _start_vtime_sec;
1170 BitMap* _region_bm;
1171 BitMap* _card_bm;
1172 intptr_t _bottom_card_num;
1173 bool _final;
1175 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1176 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1177 #if CARD_BM_TEST_MODE
1178 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1179 #else
1180 _card_bm->par_at_put(i - _bottom_card_num, 1);
1181 #endif
1182 }
1183 }
1185 public:
1186 CalcLiveObjectsClosure(bool final,
1187 CMBitMapRO *bm, ConcurrentMark *cm,
1188 BitMap* region_bm, BitMap* card_bm) :
1189 _bm(bm), _cm(cm), _changed(false), _yield(true),
1190 _words_done(0), _tot_live(0), _tot_used(0),
1191 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1192 _regions_done(0), _start_vtime_sec(0.0)
1193 {
1194 _bottom_card_num =
1195 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1196 CardTableModRefBS::card_shift);
1197 }
1199 // It takes a region that's not empty (i.e., it has at least one
1200 // live object in it and sets its corresponding bit on the region
1201 // bitmap to 1. If the region is "starts humongous" it will also set
1202 // to 1 the bits on the region bitmap that correspond to its
1203 // associated "continues humongous" regions.
1204 void set_bit_for_region(HeapRegion* hr) {
1205 assert(!hr->continuesHumongous(), "should have filtered those out");
1207 size_t index = hr->hrs_index();
1208 if (!hr->startsHumongous()) {
1209 // Normal (non-humongous) case: just set the bit.
1210 _region_bm->par_at_put((BitMap::idx_t) index, true);
1211 } else {
1212 // Starts humongous case: calculate how many regions are part of
1213 // this humongous region and then set the bit range. It might
1214 // have been a bit more efficient to look at the object that
1215 // spans these humongous regions to calculate their number from
1216 // the object's size. However, it's a good idea to calculate
1217 // this based on the metadata itself, and not the region
1218 // contents, so that this code is not aware of what goes into
1219 // the humongous regions (in case this changes in the future).
1220 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1221 size_t end_index = index + 1;
1222 while (end_index < g1h->n_regions()) {
1223 HeapRegion* chr = g1h->region_at(end_index);
1224 if (!chr->continuesHumongous()) {
1225 break;
1226 }
1227 end_index += 1;
1228 }
1229 _region_bm->par_at_put_range((BitMap::idx_t) index,
1230 (BitMap::idx_t) end_index, true);
1231 }
1232 }
1234 bool doHeapRegion(HeapRegion* hr) {
1235 if (!_final && _regions_done == 0)
1236 _start_vtime_sec = os::elapsedVTime();
1238 if (hr->continuesHumongous()) {
1239 // We will ignore these here and process them when their
1240 // associated "starts humongous" region is processed (see
1241 // set_bit_for_heap_region()). Note that we cannot rely on their
1242 // associated "starts humongous" region to have their bit set to
1243 // 1 since, due to the region chunking in the parallel region
1244 // iteration, a "continues humongous" region might be visited
1245 // before its associated "starts humongous".
1246 return false;
1247 }
1249 HeapWord* nextTop = hr->next_top_at_mark_start();
1250 HeapWord* start = hr->top_at_conc_mark_count();
1251 assert(hr->bottom() <= start && start <= hr->end() &&
1252 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1253 start <= nextTop,
1254 "Preconditions.");
1255 // Otherwise, record the number of word's we'll examine.
1256 size_t words_done = (nextTop - start);
1257 // Find the first marked object at or after "start".
1258 start = _bm->getNextMarkedWordAddress(start, nextTop);
1259 size_t marked_bytes = 0;
1261 // Below, the term "card num" means the result of shifting an address
1262 // by the card shift -- address 0 corresponds to card number 0. One
1263 // must subtract the card num of the bottom of the heap to obtain a
1264 // card table index.
1265 // The first card num of the sequence of live cards currently being
1266 // constructed. -1 ==> no sequence.
1267 intptr_t start_card_num = -1;
1268 // The last card num of the sequence of live cards currently being
1269 // constructed. -1 ==> no sequence.
1270 intptr_t last_card_num = -1;
1272 while (start < nextTop) {
1273 if (_yield && _cm->do_yield_check()) {
1274 // We yielded. It might be for a full collection, in which case
1275 // all bets are off; terminate the traversal.
1276 if (_cm->has_aborted()) {
1277 _changed = false;
1278 return true;
1279 } else {
1280 // Otherwise, it might be a collection pause, and the region
1281 // we're looking at might be in the collection set. We'll
1282 // abandon this region.
1283 return false;
1284 }
1285 }
1286 oop obj = oop(start);
1287 int obj_sz = obj->size();
1288 // The card num of the start of the current object.
1289 intptr_t obj_card_num =
1290 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1292 HeapWord* obj_last = start + obj_sz - 1;
1293 intptr_t obj_last_card_num =
1294 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1296 if (obj_card_num != last_card_num) {
1297 if (start_card_num == -1) {
1298 assert(last_card_num == -1, "Both or neither.");
1299 start_card_num = obj_card_num;
1300 } else {
1301 assert(last_card_num != -1, "Both or neither.");
1302 assert(obj_card_num >= last_card_num, "Inv");
1303 if ((obj_card_num - last_card_num) > 1) {
1304 // Mark the last run, and start a new one.
1305 mark_card_num_range(start_card_num, last_card_num);
1306 start_card_num = obj_card_num;
1307 }
1308 }
1309 #if CARD_BM_TEST_MODE
1310 /*
1311 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1312 obj_card_num - _bottom_card_num,
1313 obj_last_card_num - _bottom_card_num);
1314 */
1315 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1316 _card_bm->par_at_put(j - _bottom_card_num, 1);
1317 }
1318 #endif
1319 }
1320 // In any case, we set the last card num.
1321 last_card_num = obj_last_card_num;
1323 marked_bytes += (size_t)obj_sz * HeapWordSize;
1324 // Find the next marked object after this one.
1325 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1326 _changed = true;
1327 }
1328 // Handle the last range, if any.
1329 if (start_card_num != -1)
1330 mark_card_num_range(start_card_num, last_card_num);
1331 if (_final) {
1332 // Mark the allocated-since-marking portion...
1333 HeapWord* tp = hr->top();
1334 if (nextTop < tp) {
1335 start_card_num =
1336 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1337 last_card_num =
1338 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1339 mark_card_num_range(start_card_num, last_card_num);
1340 // This definitely means the region has live objects.
1341 set_bit_for_region(hr);
1342 }
1343 }
1345 hr->add_to_marked_bytes(marked_bytes);
1346 // Update the live region bitmap.
1347 if (marked_bytes > 0) {
1348 set_bit_for_region(hr);
1349 }
1350 hr->set_top_at_conc_mark_count(nextTop);
1351 _tot_live += hr->next_live_bytes();
1352 _tot_used += hr->used();
1353 _words_done = words_done;
1355 if (!_final) {
1356 ++_regions_done;
1357 if (_regions_done % 10 == 0) {
1358 double end_vtime_sec = os::elapsedVTime();
1359 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1360 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1361 jlong sleep_time_ms =
1362 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1363 os::sleep(Thread::current(), sleep_time_ms, false);
1364 _start_vtime_sec = end_vtime_sec;
1365 }
1366 }
1367 }
1369 return false;
1370 }
1372 bool changed() { return _changed; }
1373 void reset() { _changed = false; _words_done = 0; }
1374 void no_yield() { _yield = false; }
1375 size_t words_done() { return _words_done; }
1376 size_t tot_live() { return _tot_live; }
1377 size_t tot_used() { return _tot_used; }
1378 };
1381 void ConcurrentMark::calcDesiredRegions() {
1382 _region_bm.clear();
1383 _card_bm.clear();
1384 CalcLiveObjectsClosure calccl(false /*final*/,
1385 nextMarkBitMap(), this,
1386 &_region_bm, &_card_bm);
1387 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1388 g1h->heap_region_iterate(&calccl);
1390 do {
1391 calccl.reset();
1392 g1h->heap_region_iterate(&calccl);
1393 } while (calccl.changed());
1394 }
1396 class G1ParFinalCountTask: public AbstractGangTask {
1397 protected:
1398 G1CollectedHeap* _g1h;
1399 CMBitMap* _bm;
1400 size_t _n_workers;
1401 size_t *_live_bytes;
1402 size_t *_used_bytes;
1403 BitMap* _region_bm;
1404 BitMap* _card_bm;
1405 public:
1406 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1407 BitMap* region_bm, BitMap* card_bm) :
1408 AbstractGangTask("G1 final counting"), _g1h(g1h),
1409 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1410 {
1411 if (ParallelGCThreads > 0)
1412 _n_workers = _g1h->workers()->total_workers();
1413 else
1414 _n_workers = 1;
1415 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1416 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1417 }
1419 ~G1ParFinalCountTask() {
1420 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1421 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1422 }
1424 void work(int i) {
1425 CalcLiveObjectsClosure calccl(true /*final*/,
1426 _bm, _g1h->concurrent_mark(),
1427 _region_bm, _card_bm);
1428 calccl.no_yield();
1429 if (ParallelGCThreads > 0) {
1430 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1431 HeapRegion::FinalCountClaimValue);
1432 } else {
1433 _g1h->heap_region_iterate(&calccl);
1434 }
1435 assert(calccl.complete(), "Shouldn't have yielded!");
1437 assert((size_t) i < _n_workers, "invariant");
1438 _live_bytes[i] = calccl.tot_live();
1439 _used_bytes[i] = calccl.tot_used();
1440 }
1441 size_t live_bytes() {
1442 size_t live_bytes = 0;
1443 for (size_t i = 0; i < _n_workers; ++i)
1444 live_bytes += _live_bytes[i];
1445 return live_bytes;
1446 }
1447 size_t used_bytes() {
1448 size_t used_bytes = 0;
1449 for (size_t i = 0; i < _n_workers; ++i)
1450 used_bytes += _used_bytes[i];
1451 return used_bytes;
1452 }
1453 };
1455 class G1ParNoteEndTask;
1457 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1458 G1CollectedHeap* _g1;
1459 int _worker_num;
1460 size_t _max_live_bytes;
1461 size_t _regions_claimed;
1462 size_t _freed_bytes;
1463 size_t _cleared_h_regions;
1464 size_t _freed_regions;
1465 UncleanRegionList* _unclean_region_list;
1466 double _claimed_region_time;
1467 double _max_region_time;
1469 public:
1470 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1471 UncleanRegionList* list,
1472 int worker_num);
1473 size_t freed_bytes() { return _freed_bytes; }
1474 size_t cleared_h_regions() { return _cleared_h_regions; }
1475 size_t freed_regions() { return _freed_regions; }
1476 UncleanRegionList* unclean_region_list() {
1477 return _unclean_region_list;
1478 }
1480 bool doHeapRegion(HeapRegion *r);
1482 size_t max_live_bytes() { return _max_live_bytes; }
1483 size_t regions_claimed() { return _regions_claimed; }
1484 double claimed_region_time_sec() { return _claimed_region_time; }
1485 double max_region_time_sec() { return _max_region_time; }
1486 };
1488 class G1ParNoteEndTask: public AbstractGangTask {
1489 friend class G1NoteEndOfConcMarkClosure;
1490 protected:
1491 G1CollectedHeap* _g1h;
1492 size_t _max_live_bytes;
1493 size_t _freed_bytes;
1494 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1495 public:
1496 G1ParNoteEndTask(G1CollectedHeap* g1h,
1497 ConcurrentMark::ParCleanupThreadState**
1498 par_cleanup_thread_state) :
1499 AbstractGangTask("G1 note end"), _g1h(g1h),
1500 _max_live_bytes(0), _freed_bytes(0),
1501 _par_cleanup_thread_state(par_cleanup_thread_state)
1502 {}
1504 void work(int i) {
1505 double start = os::elapsedTime();
1506 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1507 &_par_cleanup_thread_state[i]->list,
1508 i);
1509 if (ParallelGCThreads > 0) {
1510 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1511 HeapRegion::NoteEndClaimValue);
1512 } else {
1513 _g1h->heap_region_iterate(&g1_note_end);
1514 }
1515 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1517 // Now finish up freeing the current thread's regions.
1518 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1519 g1_note_end.cleared_h_regions(),
1520 0, NULL);
1521 {
1522 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1523 _max_live_bytes += g1_note_end.max_live_bytes();
1524 _freed_bytes += g1_note_end.freed_bytes();
1525 }
1526 double end = os::elapsedTime();
1527 if (G1PrintParCleanupStats) {
1528 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1529 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1530 i, start, end, (end-start)*1000.0,
1531 g1_note_end.regions_claimed(),
1532 g1_note_end.claimed_region_time_sec()*1000.0,
1533 g1_note_end.max_region_time_sec()*1000.0);
1534 }
1535 }
1536 size_t max_live_bytes() { return _max_live_bytes; }
1537 size_t freed_bytes() { return _freed_bytes; }
1538 };
1540 class G1ParScrubRemSetTask: public AbstractGangTask {
1541 protected:
1542 G1RemSet* _g1rs;
1543 BitMap* _region_bm;
1544 BitMap* _card_bm;
1545 public:
1546 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1547 BitMap* region_bm, BitMap* card_bm) :
1548 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1549 _region_bm(region_bm), _card_bm(card_bm)
1550 {}
1552 void work(int i) {
1553 if (ParallelGCThreads > 0) {
1554 _g1rs->scrub_par(_region_bm, _card_bm, i,
1555 HeapRegion::ScrubRemSetClaimValue);
1556 } else {
1557 _g1rs->scrub(_region_bm, _card_bm);
1558 }
1559 }
1561 };
1563 G1NoteEndOfConcMarkClosure::
1564 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1565 UncleanRegionList* list,
1566 int worker_num)
1567 : _g1(g1), _worker_num(worker_num),
1568 _max_live_bytes(0), _regions_claimed(0),
1569 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1570 _claimed_region_time(0.0), _max_region_time(0.0),
1571 _unclean_region_list(list)
1572 {}
1574 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1575 // We use a claim value of zero here because all regions
1576 // were claimed with value 1 in the FinalCount task.
1577 r->reset_gc_time_stamp();
1578 if (!r->continuesHumongous()) {
1579 double start = os::elapsedTime();
1580 _regions_claimed++;
1581 r->note_end_of_marking();
1582 _max_live_bytes += r->max_live_bytes();
1583 _g1->free_region_if_totally_empty_work(r,
1584 _freed_bytes,
1585 _cleared_h_regions,
1586 _freed_regions,
1587 _unclean_region_list,
1588 true /*par*/);
1589 double region_time = (os::elapsedTime() - start);
1590 _claimed_region_time += region_time;
1591 if (region_time > _max_region_time) _max_region_time = region_time;
1592 }
1593 return false;
1594 }
1596 void ConcurrentMark::cleanup() {
1597 // world is stopped at this checkpoint
1598 assert(SafepointSynchronize::is_at_safepoint(),
1599 "world should be stopped");
1600 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1602 // If a full collection has happened, we shouldn't do this.
1603 if (has_aborted()) {
1604 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1605 return;
1606 }
1608 if (VerifyDuringGC) {
1609 HandleMark hm; // handle scope
1610 gclog_or_tty->print(" VerifyDuringGC:(before)");
1611 Universe::heap()->prepare_for_verify();
1612 Universe::verify(/* allow dirty */ true,
1613 /* silent */ false,
1614 /* prev marking */ true);
1615 }
1617 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1618 g1p->record_concurrent_mark_cleanup_start();
1620 double start = os::elapsedTime();
1622 // Do counting once more with the world stopped for good measure.
1623 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1624 &_region_bm, &_card_bm);
1625 if (ParallelGCThreads > 0) {
1626 assert(g1h->check_heap_region_claim_values(
1627 HeapRegion::InitialClaimValue),
1628 "sanity check");
1630 int n_workers = g1h->workers()->total_workers();
1631 g1h->set_par_threads(n_workers);
1632 g1h->workers()->run_task(&g1_par_count_task);
1633 g1h->set_par_threads(0);
1635 assert(g1h->check_heap_region_claim_values(
1636 HeapRegion::FinalCountClaimValue),
1637 "sanity check");
1638 } else {
1639 g1_par_count_task.work(0);
1640 }
1642 size_t known_garbage_bytes =
1643 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1644 #if 0
1645 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1646 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1647 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1648 (double) known_garbage_bytes / (double) (1024 * 1024));
1649 #endif // 0
1650 g1p->set_known_garbage_bytes(known_garbage_bytes);
1652 size_t start_used_bytes = g1h->used();
1653 _at_least_one_mark_complete = true;
1654 g1h->set_marking_complete();
1656 double count_end = os::elapsedTime();
1657 double this_final_counting_time = (count_end - start);
1658 if (G1PrintParCleanupStats) {
1659 gclog_or_tty->print_cr("Cleanup:");
1660 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1661 this_final_counting_time*1000.0);
1662 }
1663 _total_counting_time += this_final_counting_time;
1665 // Install newly created mark bitMap as "prev".
1666 swapMarkBitMaps();
1668 g1h->reset_gc_time_stamp();
1670 // Note end of marking in all heap regions.
1671 double note_end_start = os::elapsedTime();
1672 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1673 if (ParallelGCThreads > 0) {
1674 int n_workers = g1h->workers()->total_workers();
1675 g1h->set_par_threads(n_workers);
1676 g1h->workers()->run_task(&g1_par_note_end_task);
1677 g1h->set_par_threads(0);
1679 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1680 "sanity check");
1681 } else {
1682 g1_par_note_end_task.work(0);
1683 }
1684 g1h->set_unclean_regions_coming(true);
1685 double note_end_end = os::elapsedTime();
1686 // Tell the mutators that there might be unclean regions coming...
1687 if (G1PrintParCleanupStats) {
1688 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1689 (note_end_end - note_end_start)*1000.0);
1690 }
1693 // call below, since it affects the metric by which we sort the heap
1694 // regions.
1695 if (G1ScrubRemSets) {
1696 double rs_scrub_start = os::elapsedTime();
1697 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1698 if (ParallelGCThreads > 0) {
1699 int n_workers = g1h->workers()->total_workers();
1700 g1h->set_par_threads(n_workers);
1701 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1702 g1h->set_par_threads(0);
1704 assert(g1h->check_heap_region_claim_values(
1705 HeapRegion::ScrubRemSetClaimValue),
1706 "sanity check");
1707 } else {
1708 g1_par_scrub_rs_task.work(0);
1709 }
1711 double rs_scrub_end = os::elapsedTime();
1712 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1713 _total_rs_scrub_time += this_rs_scrub_time;
1714 }
1716 // this will also free any regions totally full of garbage objects,
1717 // and sort the regions.
1718 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1719 g1_par_note_end_task.freed_bytes(),
1720 g1_par_note_end_task.max_live_bytes());
1722 // Statistics.
1723 double end = os::elapsedTime();
1724 _cleanup_times.add((end - start) * 1000.0);
1726 // G1CollectedHeap::heap()->print();
1727 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1728 // G1CollectedHeap::heap()->get_gc_time_stamp());
1730 if (PrintGC || PrintGCDetails) {
1731 g1h->print_size_transition(gclog_or_tty,
1732 start_used_bytes,
1733 g1h->used(),
1734 g1h->capacity());
1735 }
1737 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1738 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1740 // We need to make this be a "collection" so any collection pause that
1741 // races with it goes around and waits for completeCleanup to finish.
1742 g1h->increment_total_collections();
1744 if (VerifyDuringGC) {
1745 HandleMark hm; // handle scope
1746 gclog_or_tty->print(" VerifyDuringGC:(after)");
1747 Universe::heap()->prepare_for_verify();
1748 Universe::verify(/* allow dirty */ true,
1749 /* silent */ false,
1750 /* prev marking */ true);
1751 }
1752 }
1754 void ConcurrentMark::completeCleanup() {
1755 // A full collection intervened.
1756 if (has_aborted()) return;
1758 int first = 0;
1759 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1760 for (int t = 0; t < last; t++) {
1761 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1762 assert(list->well_formed(), "Inv");
1763 HeapRegion* hd = list->hd();
1764 while (hd != NULL) {
1765 // Now finish up the other stuff.
1766 hd->rem_set()->clear();
1767 HeapRegion* next_hd = hd->next_from_unclean_list();
1768 (void)list->pop();
1769 assert(list->hd() == next_hd, "how not?");
1770 _g1h->put_region_on_unclean_list(hd);
1771 if (!hd->isHumongous()) {
1772 // Add this to the _free_regions count by 1.
1773 _g1h->finish_free_region_work(0, 0, 1, NULL);
1774 }
1775 hd = list->hd();
1776 assert(hd == next_hd, "how not?");
1777 }
1778 }
1779 }
1782 class G1CMIsAliveClosure: public BoolObjectClosure {
1783 G1CollectedHeap* _g1;
1784 public:
1785 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1786 _g1(g1)
1787 {}
1789 void do_object(oop obj) {
1790 assert(false, "not to be invoked");
1791 }
1792 bool do_object_b(oop obj) {
1793 HeapWord* addr = (HeapWord*)obj;
1794 return addr != NULL &&
1795 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1796 }
1797 };
1799 class G1CMKeepAliveClosure: public OopClosure {
1800 G1CollectedHeap* _g1;
1801 ConcurrentMark* _cm;
1802 CMBitMap* _bitMap;
1803 public:
1804 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1805 CMBitMap* bitMap) :
1806 _g1(g1), _cm(cm),
1807 _bitMap(bitMap) {}
1809 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1810 virtual void do_oop( oop* p) { do_oop_work(p); }
1812 template <class T> void do_oop_work(T* p) {
1813 oop thisOop = oopDesc::load_decode_heap_oop(p);
1814 HeapWord* addr = (HeapWord*)thisOop;
1815 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1816 _bitMap->mark(addr);
1817 _cm->mark_stack_push(thisOop);
1818 }
1819 }
1820 };
1822 class G1CMDrainMarkingStackClosure: public VoidClosure {
1823 CMMarkStack* _markStack;
1824 CMBitMap* _bitMap;
1825 G1CMKeepAliveClosure* _oopClosure;
1826 public:
1827 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1828 G1CMKeepAliveClosure* oopClosure) :
1829 _bitMap(bitMap),
1830 _markStack(markStack),
1831 _oopClosure(oopClosure)
1832 {}
1834 void do_void() {
1835 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1836 }
1837 };
1839 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1840 ResourceMark rm;
1841 HandleMark hm;
1842 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1843 ReferenceProcessor* rp = g1h->ref_processor();
1845 // Process weak references.
1846 rp->setup_policy(clear_all_soft_refs);
1847 assert(_markStack.isEmpty(), "mark stack should be empty");
1849 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1850 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1851 G1CMDrainMarkingStackClosure
1852 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1853 &g1KeepAliveClosure);
1855 // XXXYYY Also: copy the parallel ref processing code from CMS.
1856 rp->process_discovered_references(&g1IsAliveClosure,
1857 &g1KeepAliveClosure,
1858 &g1DrainMarkingStackClosure,
1859 NULL);
1860 assert(_markStack.overflow() || _markStack.isEmpty(),
1861 "mark stack should be empty (unless it overflowed)");
1862 if (_markStack.overflow()) {
1863 set_has_overflown();
1864 }
1866 rp->enqueue_discovered_references();
1867 rp->verify_no_references_recorded();
1868 assert(!rp->discovery_enabled(), "should have been disabled");
1870 // Now clean up stale oops in SymbolTable and StringTable
1871 SymbolTable::unlink(&g1IsAliveClosure);
1872 StringTable::unlink(&g1IsAliveClosure);
1873 }
1875 void ConcurrentMark::swapMarkBitMaps() {
1876 CMBitMapRO* temp = _prevMarkBitMap;
1877 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1878 _nextMarkBitMap = (CMBitMap*) temp;
1879 }
1881 class CMRemarkTask: public AbstractGangTask {
1882 private:
1883 ConcurrentMark *_cm;
1885 public:
1886 void work(int worker_i) {
1887 // Since all available tasks are actually started, we should
1888 // only proceed if we're supposed to be actived.
1889 if ((size_t)worker_i < _cm->active_tasks()) {
1890 CMTask* task = _cm->task(worker_i);
1891 task->record_start_time();
1892 do {
1893 task->do_marking_step(1000000000.0 /* something very large */);
1894 } while (task->has_aborted() && !_cm->has_overflown());
1895 // If we overflow, then we do not want to restart. We instead
1896 // want to abort remark and do concurrent marking again.
1897 task->record_end_time();
1898 }
1899 }
1901 CMRemarkTask(ConcurrentMark* cm) :
1902 AbstractGangTask("Par Remark"), _cm(cm) { }
1903 };
1905 void ConcurrentMark::checkpointRootsFinalWork() {
1906 ResourceMark rm;
1907 HandleMark hm;
1908 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1910 g1h->ensure_parsability(false);
1912 if (ParallelGCThreads > 0) {
1913 G1CollectedHeap::StrongRootsScope srs(g1h);
1914 // this is remark, so we'll use up all available threads
1915 int active_workers = ParallelGCThreads;
1916 set_phase(active_workers, false);
1918 CMRemarkTask remarkTask(this);
1919 // We will start all available threads, even if we decide that the
1920 // active_workers will be fewer. The extra ones will just bail out
1921 // immediately.
1922 int n_workers = g1h->workers()->total_workers();
1923 g1h->set_par_threads(n_workers);
1924 g1h->workers()->run_task(&remarkTask);
1925 g1h->set_par_threads(0);
1926 } else {
1927 G1CollectedHeap::StrongRootsScope srs(g1h);
1928 // this is remark, so we'll use up all available threads
1929 int active_workers = 1;
1930 set_phase(active_workers, false);
1932 CMRemarkTask remarkTask(this);
1933 // We will start all available threads, even if we decide that the
1934 // active_workers will be fewer. The extra ones will just bail out
1935 // immediately.
1936 remarkTask.work(0);
1937 }
1938 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1939 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1941 print_stats();
1943 if (!restart_for_overflow())
1944 set_non_marking_state();
1946 #if VERIFY_OBJS_PROCESSED
1947 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1948 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1949 _scan_obj_cl.objs_processed,
1950 ThreadLocalObjQueue::objs_enqueued);
1951 guarantee(_scan_obj_cl.objs_processed ==
1952 ThreadLocalObjQueue::objs_enqueued,
1953 "Different number of objs processed and enqueued.");
1954 }
1955 #endif
1956 }
1958 #ifndef PRODUCT
1960 class ReachablePrinterOopClosure: public OopClosure {
1961 private:
1962 G1CollectedHeap* _g1h;
1963 CMBitMapRO* _bitmap;
1964 outputStream* _out;
1965 bool _use_prev_marking;
1967 public:
1968 ReachablePrinterOopClosure(CMBitMapRO* bitmap,
1969 outputStream* out,
1970 bool use_prev_marking) :
1971 _g1h(G1CollectedHeap::heap()),
1972 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
1974 void do_oop(narrowOop* p) { do_oop_work(p); }
1975 void do_oop( oop* p) { do_oop_work(p); }
1977 template <class T> void do_oop_work(T* p) {
1978 oop obj = oopDesc::load_decode_heap_oop(p);
1979 const char* str = NULL;
1980 const char* str2 = "";
1982 if (!_g1h->is_in_g1_reserved(obj))
1983 str = "outside G1 reserved";
1984 else {
1985 HeapRegion* hr = _g1h->heap_region_containing(obj);
1986 guarantee(hr != NULL, "invariant");
1987 bool over_tams = false;
1988 if (_use_prev_marking) {
1989 over_tams = hr->obj_allocated_since_prev_marking(obj);
1990 } else {
1991 over_tams = hr->obj_allocated_since_next_marking(obj);
1992 }
1994 if (over_tams) {
1995 str = "over TAMS";
1996 if (_bitmap->isMarked((HeapWord*) obj)) {
1997 str2 = " AND MARKED";
1998 }
1999 } else if (_bitmap->isMarked((HeapWord*) obj)) {
2000 str = "marked";
2001 } else {
2002 str = "#### NOT MARKED ####";
2003 }
2004 }
2006 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2007 p, (void*) obj, str, str2);
2008 }
2009 };
2011 class ReachablePrinterClosure: public BitMapClosure {
2012 private:
2013 CMBitMapRO* _bitmap;
2014 outputStream* _out;
2015 bool _use_prev_marking;
2017 public:
2018 ReachablePrinterClosure(CMBitMapRO* bitmap,
2019 outputStream* out,
2020 bool use_prev_marking) :
2021 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2023 bool do_bit(size_t offset) {
2024 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2025 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
2027 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2028 oop(addr)->oop_iterate(&oopCl);
2029 _out->print_cr("");
2031 return true;
2032 }
2033 };
2035 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2036 private:
2037 CMBitMapRO* _bitmap;
2038 outputStream* _out;
2039 bool _use_prev_marking;
2041 public:
2042 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap,
2043 outputStream* out,
2044 bool use_prev_marking) :
2045 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2047 void do_object(oop o) {
2048 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
2050 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2051 o->oop_iterate(&oopCl);
2052 _out->print_cr("");
2053 }
2054 };
2056 class RegionReachablePrinterClosure : public HeapRegionClosure {
2057 private:
2058 CMBitMapRO* _bitmap;
2059 outputStream* _out;
2060 bool _use_prev_marking;
2062 public:
2063 bool doHeapRegion(HeapRegion* hr) {
2064 HeapWord* b = hr->bottom();
2065 HeapWord* e = hr->end();
2066 HeapWord* t = hr->top();
2067 HeapWord* p = NULL;
2068 if (_use_prev_marking) {
2069 p = hr->prev_top_at_mark_start();
2070 } else {
2071 p = hr->next_top_at_mark_start();
2072 }
2073 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2074 "TAMS: "PTR_FORMAT, b, e, t, p);
2075 _out->print_cr("");
2077 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
2078 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2080 return false;
2081 }
2083 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2084 outputStream* out,
2085 bool use_prev_marking) :
2086 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2087 };
2089 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
2090 gclog_or_tty->print_cr("== Doing reachable object dump... ");
2092 if (G1PrintReachableBaseFile == NULL) {
2093 gclog_or_tty->print_cr(" #### error: no base file defined");
2094 return;
2095 }
2097 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2098 (JVM_MAXPATHLEN - 1)) {
2099 gclog_or_tty->print_cr(" #### error: file name too long");
2100 return;
2101 }
2103 char file_name[JVM_MAXPATHLEN];
2104 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2105 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2107 fileStream fout(file_name);
2108 if (!fout.is_open()) {
2109 gclog_or_tty->print_cr(" #### error: could not open file");
2110 return;
2111 }
2113 outputStream* out = &fout;
2115 CMBitMapRO* bitmap = NULL;
2116 if (use_prev_marking) {
2117 bitmap = _prevMarkBitMap;
2118 } else {
2119 bitmap = _nextMarkBitMap;
2120 }
2122 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2123 out->cr();
2125 RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
2126 out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
2127 out->cr();
2128 _g1h->heap_region_iterate(&rcl);
2129 out->cr();
2131 ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
2132 out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
2133 out->cr();
2134 bitmap->iterate(&cl);
2135 out->cr();
2137 gclog_or_tty->print_cr(" done");
2138 }
2140 #endif // PRODUCT
2142 // This note is for drainAllSATBBuffers and the code in between.
2143 // In the future we could reuse a task to do this work during an
2144 // evacuation pause (since now tasks are not active and can be claimed
2145 // during an evacuation pause). This was a late change to the code and
2146 // is currently not being taken advantage of.
2148 class CMGlobalObjectClosure : public ObjectClosure {
2149 private:
2150 ConcurrentMark* _cm;
2152 public:
2153 void do_object(oop obj) {
2154 _cm->deal_with_reference(obj);
2155 }
2157 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2158 };
2160 void ConcurrentMark::deal_with_reference(oop obj) {
2161 if (verbose_high())
2162 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2163 (void*) obj);
2166 HeapWord* objAddr = (HeapWord*) obj;
2167 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2168 if (_g1h->is_in_g1_reserved(objAddr)) {
2169 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2170 HeapRegion* hr = _g1h->heap_region_containing(obj);
2171 if (_g1h->is_obj_ill(obj, hr)) {
2172 if (verbose_high())
2173 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2174 "marked", (void*) obj);
2176 // we need to mark it first
2177 if (_nextMarkBitMap->parMark(objAddr)) {
2178 // No OrderAccess:store_load() is needed. It is implicit in the
2179 // CAS done in parMark(objAddr) above
2180 HeapWord* finger = _finger;
2181 if (objAddr < finger) {
2182 if (verbose_high())
2183 gclog_or_tty->print_cr("[global] below the global finger "
2184 "("PTR_FORMAT"), pushing it", finger);
2185 if (!mark_stack_push(obj)) {
2186 if (verbose_low())
2187 gclog_or_tty->print_cr("[global] global stack overflow during "
2188 "deal_with_reference");
2189 }
2190 }
2191 }
2192 }
2193 }
2194 }
2196 void ConcurrentMark::drainAllSATBBuffers() {
2197 CMGlobalObjectClosure oc(this);
2198 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2199 satb_mq_set.set_closure(&oc);
2201 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2202 if (verbose_medium())
2203 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2204 }
2206 // no need to check whether we should do this, as this is only
2207 // called during an evacuation pause
2208 satb_mq_set.iterate_closure_all_threads();
2210 satb_mq_set.set_closure(NULL);
2211 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2212 }
2214 void ConcurrentMark::markPrev(oop p) {
2215 // Note we are overriding the read-only view of the prev map here, via
2216 // the cast.
2217 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2218 }
2220 void ConcurrentMark::clear(oop p) {
2221 assert(p != NULL && p->is_oop(), "expected an oop");
2222 HeapWord* addr = (HeapWord*)p;
2223 assert(addr >= _nextMarkBitMap->startWord() ||
2224 addr < _nextMarkBitMap->endWord(), "in a region");
2226 _nextMarkBitMap->clear(addr);
2227 }
2229 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2230 // Note we are overriding the read-only view of the prev map here, via
2231 // the cast.
2232 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2233 _nextMarkBitMap->clearRange(mr);
2234 }
2236 HeapRegion*
2237 ConcurrentMark::claim_region(int task_num) {
2238 // "checkpoint" the finger
2239 HeapWord* finger = _finger;
2241 // _heap_end will not change underneath our feet; it only changes at
2242 // yield points.
2243 while (finger < _heap_end) {
2244 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2246 // is the gap between reading the finger and doing the CAS too long?
2248 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2249 HeapWord* bottom = curr_region->bottom();
2250 HeapWord* end = curr_region->end();
2251 HeapWord* limit = curr_region->next_top_at_mark_start();
2253 if (verbose_low())
2254 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2255 "["PTR_FORMAT", "PTR_FORMAT"), "
2256 "limit = "PTR_FORMAT,
2257 task_num, curr_region, bottom, end, limit);
2259 HeapWord* res =
2260 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2261 if (res == finger) {
2262 // we succeeded
2264 // notice that _finger == end cannot be guaranteed here since,
2265 // someone else might have moved the finger even further
2266 assert(_finger >= end, "the finger should have moved forward");
2268 if (verbose_low())
2269 gclog_or_tty->print_cr("[%d] we were successful with region = "
2270 PTR_FORMAT, task_num, curr_region);
2272 if (limit > bottom) {
2273 if (verbose_low())
2274 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2275 "returning it ", task_num, curr_region);
2276 return curr_region;
2277 } else {
2278 assert(limit == bottom,
2279 "the region limit should be at bottom");
2280 if (verbose_low())
2281 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2282 "returning NULL", task_num, curr_region);
2283 // we return NULL and the caller should try calling
2284 // claim_region() again.
2285 return NULL;
2286 }
2287 } else {
2288 assert(_finger > finger, "the finger should have moved forward");
2289 if (verbose_low())
2290 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2291 "global finger = "PTR_FORMAT", "
2292 "our finger = "PTR_FORMAT,
2293 task_num, _finger, finger);
2295 // read it again
2296 finger = _finger;
2297 }
2298 }
2300 return NULL;
2301 }
2303 void ConcurrentMark::oops_do(OopClosure* cl) {
2304 if (_markStack.size() > 0 && verbose_low())
2305 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2306 "size = %d", _markStack.size());
2307 // we first iterate over the contents of the mark stack...
2308 _markStack.oops_do(cl);
2310 for (int i = 0; i < (int)_max_task_num; ++i) {
2311 OopTaskQueue* queue = _task_queues->queue((int)i);
2313 if (queue->size() > 0 && verbose_low())
2314 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2315 "size = %d", i, queue->size());
2317 // ...then over the contents of the all the task queues.
2318 queue->oops_do(cl);
2319 }
2321 // finally, invalidate any entries that in the region stack that
2322 // point into the collection set
2323 if (_regionStack.invalidate_entries_into_cset()) {
2324 // otherwise, any gray objects copied during the evacuation pause
2325 // might not be visited.
2326 assert(_should_gray_objects, "invariant");
2327 }
2328 }
2330 void ConcurrentMark::clear_marking_state() {
2331 _markStack.setEmpty();
2332 _markStack.clear_overflow();
2333 _regionStack.setEmpty();
2334 _regionStack.clear_overflow();
2335 clear_has_overflown();
2336 _finger = _heap_start;
2338 for (int i = 0; i < (int)_max_task_num; ++i) {
2339 OopTaskQueue* queue = _task_queues->queue(i);
2340 queue->set_empty();
2341 }
2342 }
2344 void ConcurrentMark::print_stats() {
2345 if (verbose_stats()) {
2346 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2347 for (size_t i = 0; i < _active_tasks; ++i) {
2348 _tasks[i]->print_stats();
2349 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2350 }
2351 }
2352 }
2354 class CSMarkOopClosure: public OopClosure {
2355 friend class CSMarkBitMapClosure;
2357 G1CollectedHeap* _g1h;
2358 CMBitMap* _bm;
2359 ConcurrentMark* _cm;
2360 oop* _ms;
2361 jint* _array_ind_stack;
2362 int _ms_size;
2363 int _ms_ind;
2364 int _array_increment;
2366 bool push(oop obj, int arr_ind = 0) {
2367 if (_ms_ind == _ms_size) {
2368 gclog_or_tty->print_cr("Mark stack is full.");
2369 return false;
2370 }
2371 _ms[_ms_ind] = obj;
2372 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2373 _ms_ind++;
2374 return true;
2375 }
2377 oop pop() {
2378 if (_ms_ind == 0) return NULL;
2379 else {
2380 _ms_ind--;
2381 return _ms[_ms_ind];
2382 }
2383 }
2385 template <class T> bool drain() {
2386 while (_ms_ind > 0) {
2387 oop obj = pop();
2388 assert(obj != NULL, "Since index was non-zero.");
2389 if (obj->is_objArray()) {
2390 jint arr_ind = _array_ind_stack[_ms_ind];
2391 objArrayOop aobj = objArrayOop(obj);
2392 jint len = aobj->length();
2393 jint next_arr_ind = arr_ind + _array_increment;
2394 if (next_arr_ind < len) {
2395 push(obj, next_arr_ind);
2396 }
2397 // Now process this portion of this one.
2398 int lim = MIN2(next_arr_ind, len);
2399 for (int j = arr_ind; j < lim; j++) {
2400 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2401 }
2403 } else {
2404 obj->oop_iterate(this);
2405 }
2406 if (abort()) return false;
2407 }
2408 return true;
2409 }
2411 public:
2412 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2413 _g1h(G1CollectedHeap::heap()),
2414 _cm(cm),
2415 _bm(cm->nextMarkBitMap()),
2416 _ms_size(ms_size), _ms_ind(0),
2417 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2418 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2419 _array_increment(MAX2(ms_size/8, 16))
2420 {}
2422 ~CSMarkOopClosure() {
2423 FREE_C_HEAP_ARRAY(oop, _ms);
2424 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2425 }
2427 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2428 virtual void do_oop( oop* p) { do_oop_work(p); }
2430 template <class T> void do_oop_work(T* p) {
2431 T heap_oop = oopDesc::load_heap_oop(p);
2432 if (oopDesc::is_null(heap_oop)) return;
2433 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2434 if (obj->is_forwarded()) {
2435 // If the object has already been forwarded, we have to make sure
2436 // that it's marked. So follow the forwarding pointer. Note that
2437 // this does the right thing for self-forwarding pointers in the
2438 // evacuation failure case.
2439 obj = obj->forwardee();
2440 }
2441 HeapRegion* hr = _g1h->heap_region_containing(obj);
2442 if (hr != NULL) {
2443 if (hr->in_collection_set()) {
2444 if (_g1h->is_obj_ill(obj)) {
2445 _bm->mark((HeapWord*)obj);
2446 if (!push(obj)) {
2447 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2448 set_abort();
2449 }
2450 }
2451 } else {
2452 // Outside the collection set; we need to gray it
2453 _cm->deal_with_reference(obj);
2454 }
2455 }
2456 }
2457 };
2459 class CSMarkBitMapClosure: public BitMapClosure {
2460 G1CollectedHeap* _g1h;
2461 CMBitMap* _bitMap;
2462 ConcurrentMark* _cm;
2463 CSMarkOopClosure _oop_cl;
2464 public:
2465 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2466 _g1h(G1CollectedHeap::heap()),
2467 _bitMap(cm->nextMarkBitMap()),
2468 _oop_cl(cm, ms_size)
2469 {}
2471 ~CSMarkBitMapClosure() {}
2473 bool do_bit(size_t offset) {
2474 // convert offset into a HeapWord*
2475 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2476 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2477 "address out of range");
2478 assert(_bitMap->isMarked(addr), "tautology");
2479 oop obj = oop(addr);
2480 if (!obj->is_forwarded()) {
2481 if (!_oop_cl.push(obj)) return false;
2482 if (UseCompressedOops) {
2483 if (!_oop_cl.drain<narrowOop>()) return false;
2484 } else {
2485 if (!_oop_cl.drain<oop>()) return false;
2486 }
2487 }
2488 // Otherwise...
2489 return true;
2490 }
2491 };
2494 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2495 CMBitMap* _bm;
2496 CSMarkBitMapClosure _bit_cl;
2497 enum SomePrivateConstants {
2498 MSSize = 1000
2499 };
2500 bool _completed;
2501 public:
2502 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2503 _bm(cm->nextMarkBitMap()),
2504 _bit_cl(cm, MSSize),
2505 _completed(true)
2506 {}
2508 ~CompleteMarkingInCSHRClosure() {}
2510 bool doHeapRegion(HeapRegion* r) {
2511 if (!r->evacuation_failed()) {
2512 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2513 if (!mr.is_empty()) {
2514 if (!_bm->iterate(&_bit_cl, mr)) {
2515 _completed = false;
2516 return true;
2517 }
2518 }
2519 }
2520 return false;
2521 }
2523 bool completed() { return _completed; }
2524 };
2526 class ClearMarksInHRClosure: public HeapRegionClosure {
2527 CMBitMap* _bm;
2528 public:
2529 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2531 bool doHeapRegion(HeapRegion* r) {
2532 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2533 MemRegion usedMR = r->used_region();
2534 _bm->clearRange(r->used_region());
2535 }
2536 return false;
2537 }
2538 };
2540 void ConcurrentMark::complete_marking_in_collection_set() {
2541 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2543 if (!g1h->mark_in_progress()) {
2544 g1h->g1_policy()->record_mark_closure_time(0.0);
2545 return;
2546 }
2548 int i = 1;
2549 double start = os::elapsedTime();
2550 while (true) {
2551 i++;
2552 CompleteMarkingInCSHRClosure cmplt(this);
2553 g1h->collection_set_iterate(&cmplt);
2554 if (cmplt.completed()) break;
2555 }
2556 double end_time = os::elapsedTime();
2557 double elapsed_time_ms = (end_time - start) * 1000.0;
2558 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2559 if (PrintGCDetails) {
2560 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2561 }
2563 ClearMarksInHRClosure clr(nextMarkBitMap());
2564 g1h->collection_set_iterate(&clr);
2565 }
2567 // The next two methods deal with the following optimisation. Some
2568 // objects are gray by being marked and located above the finger. If
2569 // they are copied, during an evacuation pause, below the finger then
2570 // the need to be pushed on the stack. The observation is that, if
2571 // there are no regions in the collection set located above the
2572 // finger, then the above cannot happen, hence we do not need to
2573 // explicitly gray any objects when copying them to below the
2574 // finger. The global stack will be scanned to ensure that, if it
2575 // points to objects being copied, it will update their
2576 // location. There is a tricky situation with the gray objects in
2577 // region stack that are being coped, however. See the comment in
2578 // newCSet().
2580 void ConcurrentMark::newCSet() {
2581 if (!concurrent_marking_in_progress())
2582 // nothing to do if marking is not in progress
2583 return;
2585 // find what the lowest finger is among the global and local fingers
2586 _min_finger = _finger;
2587 for (int i = 0; i < (int)_max_task_num; ++i) {
2588 CMTask* task = _tasks[i];
2589 HeapWord* task_finger = task->finger();
2590 if (task_finger != NULL && task_finger < _min_finger)
2591 _min_finger = task_finger;
2592 }
2594 _should_gray_objects = false;
2596 // This fixes a very subtle and fustrating bug. It might be the case
2597 // that, during en evacuation pause, heap regions that contain
2598 // objects that are gray (by being in regions contained in the
2599 // region stack) are included in the collection set. Since such gray
2600 // objects will be moved, and because it's not easy to redirect
2601 // region stack entries to point to a new location (because objects
2602 // in one region might be scattered to multiple regions after they
2603 // are copied), one option is to ensure that all marked objects
2604 // copied during a pause are pushed on the stack. Notice, however,
2605 // that this problem can only happen when the region stack is not
2606 // empty during an evacuation pause. So, we make the fix a bit less
2607 // conservative and ensure that regions are pushed on the stack,
2608 // irrespective whether all collection set regions are below the
2609 // finger, if the region stack is not empty. This is expected to be
2610 // a rare case, so I don't think it's necessary to be smarted about it.
2611 if (!region_stack_empty())
2612 _should_gray_objects = true;
2613 }
2615 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2616 if (!concurrent_marking_in_progress())
2617 return;
2619 HeapWord* region_end = hr->end();
2620 if (region_end > _min_finger)
2621 _should_gray_objects = true;
2622 }
2624 // abandon current marking iteration due to a Full GC
2625 void ConcurrentMark::abort() {
2626 // Clear all marks to force marking thread to do nothing
2627 _nextMarkBitMap->clearAll();
2628 // Empty mark stack
2629 clear_marking_state();
2630 for (int i = 0; i < (int)_max_task_num; ++i)
2631 _tasks[i]->clear_region_fields();
2632 _has_aborted = true;
2634 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2635 satb_mq_set.abandon_partial_marking();
2636 // This can be called either during or outside marking, we'll read
2637 // the expected_active value from the SATB queue set.
2638 satb_mq_set.set_active_all_threads(
2639 false, /* new active value */
2640 satb_mq_set.is_active() /* expected_active */);
2641 }
2643 static void print_ms_time_info(const char* prefix, const char* name,
2644 NumberSeq& ns) {
2645 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2646 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2647 if (ns.num() > 0) {
2648 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2649 prefix, ns.sd(), ns.maximum());
2650 }
2651 }
2653 void ConcurrentMark::print_summary_info() {
2654 gclog_or_tty->print_cr(" Concurrent marking:");
2655 print_ms_time_info(" ", "init marks", _init_times);
2656 print_ms_time_info(" ", "remarks", _remark_times);
2657 {
2658 print_ms_time_info(" ", "final marks", _remark_mark_times);
2659 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2661 }
2662 print_ms_time_info(" ", "cleanups", _cleanup_times);
2663 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2664 _total_counting_time,
2665 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2666 (double)_cleanup_times.num()
2667 : 0.0));
2668 if (G1ScrubRemSets) {
2669 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2670 _total_rs_scrub_time,
2671 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2672 (double)_cleanup_times.num()
2673 : 0.0));
2674 }
2675 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2676 (_init_times.sum() + _remark_times.sum() +
2677 _cleanup_times.sum())/1000.0);
2678 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2679 "(%8.2f s marking, %8.2f s counting).",
2680 cmThread()->vtime_accum(),
2681 cmThread()->vtime_mark_accum(),
2682 cmThread()->vtime_count_accum());
2683 }
2685 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2686 _parallel_workers->print_worker_threads_on(st);
2687 }
2689 // Closures
2690 // XXX: there seems to be a lot of code duplication here;
2691 // should refactor and consolidate the shared code.
2693 // This closure is used to mark refs into the CMS generation in
2694 // the CMS bit map. Called at the first checkpoint.
2696 // We take a break if someone is trying to stop the world.
2697 bool ConcurrentMark::do_yield_check(int worker_i) {
2698 if (should_yield()) {
2699 if (worker_i == 0)
2700 _g1h->g1_policy()->record_concurrent_pause();
2701 cmThread()->yield();
2702 if (worker_i == 0)
2703 _g1h->g1_policy()->record_concurrent_pause_end();
2704 return true;
2705 } else {
2706 return false;
2707 }
2708 }
2710 bool ConcurrentMark::should_yield() {
2711 return cmThread()->should_yield();
2712 }
2714 bool ConcurrentMark::containing_card_is_marked(void* p) {
2715 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2716 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2717 }
2719 bool ConcurrentMark::containing_cards_are_marked(void* start,
2720 void* last) {
2721 return
2722 containing_card_is_marked(start) &&
2723 containing_card_is_marked(last);
2724 }
2726 #ifndef PRODUCT
2727 // for debugging purposes
2728 void ConcurrentMark::print_finger() {
2729 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2730 _heap_start, _heap_end, _finger);
2731 for (int i = 0; i < (int) _max_task_num; ++i) {
2732 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2733 }
2734 gclog_or_tty->print_cr("");
2735 }
2736 #endif
2738 // Closure for iteration over bitmaps
2739 class CMBitMapClosure : public BitMapClosure {
2740 private:
2741 // the bitmap that is being iterated over
2742 CMBitMap* _nextMarkBitMap;
2743 ConcurrentMark* _cm;
2744 CMTask* _task;
2745 // true if we're scanning a heap region claimed by the task (so that
2746 // we move the finger along), false if we're not, i.e. currently when
2747 // scanning a heap region popped from the region stack (so that we
2748 // do not move the task finger along; it'd be a mistake if we did so).
2749 bool _scanning_heap_region;
2751 public:
2752 CMBitMapClosure(CMTask *task,
2753 ConcurrentMark* cm,
2754 CMBitMap* nextMarkBitMap)
2755 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2757 void set_scanning_heap_region(bool scanning_heap_region) {
2758 _scanning_heap_region = scanning_heap_region;
2759 }
2761 bool do_bit(size_t offset) {
2762 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2763 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2764 assert( addr < _cm->finger(), "invariant");
2766 if (_scanning_heap_region) {
2767 statsOnly( _task->increase_objs_found_on_bitmap() );
2768 assert(addr >= _task->finger(), "invariant");
2769 // We move that task's local finger along.
2770 _task->move_finger_to(addr);
2771 } else {
2772 // We move the task's region finger along.
2773 _task->move_region_finger_to(addr);
2774 }
2776 _task->scan_object(oop(addr));
2777 // we only partially drain the local queue and global stack
2778 _task->drain_local_queue(true);
2779 _task->drain_global_stack(true);
2781 // if the has_aborted flag has been raised, we need to bail out of
2782 // the iteration
2783 return !_task->has_aborted();
2784 }
2785 };
2787 // Closure for iterating over objects, currently only used for
2788 // processing SATB buffers.
2789 class CMObjectClosure : public ObjectClosure {
2790 private:
2791 CMTask* _task;
2793 public:
2794 void do_object(oop obj) {
2795 _task->deal_with_reference(obj);
2796 }
2798 CMObjectClosure(CMTask* task) : _task(task) { }
2799 };
2801 // Closure for iterating over object fields
2802 class CMOopClosure : public OopClosure {
2803 private:
2804 G1CollectedHeap* _g1h;
2805 ConcurrentMark* _cm;
2806 CMTask* _task;
2808 public:
2809 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2810 virtual void do_oop( oop* p) { do_oop_work(p); }
2812 template <class T> void do_oop_work(T* p) {
2813 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2814 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2815 "invariant");
2817 oop obj = oopDesc::load_decode_heap_oop(p);
2818 if (_cm->verbose_high())
2819 gclog_or_tty->print_cr("[%d] we're looking at location "
2820 "*"PTR_FORMAT" = "PTR_FORMAT,
2821 _task->task_id(), p, (void*) obj);
2822 _task->deal_with_reference(obj);
2823 }
2825 CMOopClosure(G1CollectedHeap* g1h,
2826 ConcurrentMark* cm,
2827 CMTask* task)
2828 : _g1h(g1h), _cm(cm), _task(task) { }
2829 };
2831 void CMTask::setup_for_region(HeapRegion* hr) {
2832 // Separated the asserts so that we know which one fires.
2833 assert(hr != NULL,
2834 "claim_region() should have filtered out continues humongous regions");
2835 assert(!hr->continuesHumongous(),
2836 "claim_region() should have filtered out continues humongous regions");
2838 if (_cm->verbose_low())
2839 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2840 _task_id, hr);
2842 _curr_region = hr;
2843 _finger = hr->bottom();
2844 update_region_limit();
2845 }
2847 void CMTask::update_region_limit() {
2848 HeapRegion* hr = _curr_region;
2849 HeapWord* bottom = hr->bottom();
2850 HeapWord* limit = hr->next_top_at_mark_start();
2852 if (limit == bottom) {
2853 if (_cm->verbose_low())
2854 gclog_or_tty->print_cr("[%d] found an empty region "
2855 "["PTR_FORMAT", "PTR_FORMAT")",
2856 _task_id, bottom, limit);
2857 // The region was collected underneath our feet.
2858 // We set the finger to bottom to ensure that the bitmap
2859 // iteration that will follow this will not do anything.
2860 // (this is not a condition that holds when we set the region up,
2861 // as the region is not supposed to be empty in the first place)
2862 _finger = bottom;
2863 } else if (limit >= _region_limit) {
2864 assert(limit >= _finger, "peace of mind");
2865 } else {
2866 assert(limit < _region_limit, "only way to get here");
2867 // This can happen under some pretty unusual circumstances. An
2868 // evacuation pause empties the region underneath our feet (NTAMS
2869 // at bottom). We then do some allocation in the region (NTAMS
2870 // stays at bottom), followed by the region being used as a GC
2871 // alloc region (NTAMS will move to top() and the objects
2872 // originally below it will be grayed). All objects now marked in
2873 // the region are explicitly grayed, if below the global finger,
2874 // and we do not need in fact to scan anything else. So, we simply
2875 // set _finger to be limit to ensure that the bitmap iteration
2876 // doesn't do anything.
2877 _finger = limit;
2878 }
2880 _region_limit = limit;
2881 }
2883 void CMTask::giveup_current_region() {
2884 assert(_curr_region != NULL, "invariant");
2885 if (_cm->verbose_low())
2886 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2887 _task_id, _curr_region);
2888 clear_region_fields();
2889 }
2891 void CMTask::clear_region_fields() {
2892 // Values for these three fields that indicate that we're not
2893 // holding on to a region.
2894 _curr_region = NULL;
2895 _finger = NULL;
2896 _region_limit = NULL;
2898 _region_finger = NULL;
2899 }
2901 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2902 guarantee(nextMarkBitMap != NULL, "invariant");
2904 if (_cm->verbose_low())
2905 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2907 _nextMarkBitMap = nextMarkBitMap;
2908 clear_region_fields();
2910 _calls = 0;
2911 _elapsed_time_ms = 0.0;
2912 _termination_time_ms = 0.0;
2913 _termination_start_time_ms = 0.0;
2915 #if _MARKING_STATS_
2916 _local_pushes = 0;
2917 _local_pops = 0;
2918 _local_max_size = 0;
2919 _objs_scanned = 0;
2920 _global_pushes = 0;
2921 _global_pops = 0;
2922 _global_max_size = 0;
2923 _global_transfers_to = 0;
2924 _global_transfers_from = 0;
2925 _region_stack_pops = 0;
2926 _regions_claimed = 0;
2927 _objs_found_on_bitmap = 0;
2928 _satb_buffers_processed = 0;
2929 _steal_attempts = 0;
2930 _steals = 0;
2931 _aborted = 0;
2932 _aborted_overflow = 0;
2933 _aborted_cm_aborted = 0;
2934 _aborted_yield = 0;
2935 _aborted_timed_out = 0;
2936 _aborted_satb = 0;
2937 _aborted_termination = 0;
2938 #endif // _MARKING_STATS_
2939 }
2941 bool CMTask::should_exit_termination() {
2942 regular_clock_call();
2943 // This is called when we are in the termination protocol. We should
2944 // quit if, for some reason, this task wants to abort or the global
2945 // stack is not empty (this means that we can get work from it).
2946 return !_cm->mark_stack_empty() || has_aborted();
2947 }
2949 // This determines whether the method below will check both the local
2950 // and global fingers when determining whether to push on the stack a
2951 // gray object (value 1) or whether it will only check the global one
2952 // (value 0). The tradeoffs are that the former will be a bit more
2953 // accurate and possibly push less on the stack, but it might also be
2954 // a little bit slower.
2956 #define _CHECK_BOTH_FINGERS_ 1
2958 void CMTask::deal_with_reference(oop obj) {
2959 if (_cm->verbose_high())
2960 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2961 _task_id, (void*) obj);
2963 ++_refs_reached;
2965 HeapWord* objAddr = (HeapWord*) obj;
2966 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2967 if (_g1h->is_in_g1_reserved(objAddr)) {
2968 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2969 HeapRegion* hr = _g1h->heap_region_containing(obj);
2970 if (_g1h->is_obj_ill(obj, hr)) {
2971 if (_cm->verbose_high())
2972 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2973 _task_id, (void*) obj);
2975 // we need to mark it first
2976 if (_nextMarkBitMap->parMark(objAddr)) {
2977 // No OrderAccess:store_load() is needed. It is implicit in the
2978 // CAS done in parMark(objAddr) above
2979 HeapWord* global_finger = _cm->finger();
2981 #if _CHECK_BOTH_FINGERS_
2982 // we will check both the local and global fingers
2984 if (_finger != NULL && objAddr < _finger) {
2985 if (_cm->verbose_high())
2986 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2987 "pushing it", _task_id, _finger);
2988 push(obj);
2989 } else if (_curr_region != NULL && objAddr < _region_limit) {
2990 // do nothing
2991 } else if (objAddr < global_finger) {
2992 // Notice that the global finger might be moving forward
2993 // concurrently. This is not a problem. In the worst case, we
2994 // mark the object while it is above the global finger and, by
2995 // the time we read the global finger, it has moved forward
2996 // passed this object. In this case, the object will probably
2997 // be visited when a task is scanning the region and will also
2998 // be pushed on the stack. So, some duplicate work, but no
2999 // correctness problems.
3001 if (_cm->verbose_high())
3002 gclog_or_tty->print_cr("[%d] below the global finger "
3003 "("PTR_FORMAT"), pushing it",
3004 _task_id, global_finger);
3005 push(obj);
3006 } else {
3007 // do nothing
3008 }
3009 #else // _CHECK_BOTH_FINGERS_
3010 // we will only check the global finger
3012 if (objAddr < global_finger) {
3013 // see long comment above
3015 if (_cm->verbose_high())
3016 gclog_or_tty->print_cr("[%d] below the global finger "
3017 "("PTR_FORMAT"), pushing it",
3018 _task_id, global_finger);
3019 push(obj);
3020 }
3021 #endif // _CHECK_BOTH_FINGERS_
3022 }
3023 }
3024 }
3025 }
3027 void CMTask::push(oop obj) {
3028 HeapWord* objAddr = (HeapWord*) obj;
3029 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3030 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3031 "invariant");
3032 assert(!_g1h->is_obj_ill(obj), "invariant");
3033 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3035 if (_cm->verbose_high())
3036 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3038 if (!_task_queue->push(obj)) {
3039 // The local task queue looks full. We need to push some entries
3040 // to the global stack.
3042 if (_cm->verbose_medium())
3043 gclog_or_tty->print_cr("[%d] task queue overflow, "
3044 "moving entries to the global stack",
3045 _task_id);
3046 move_entries_to_global_stack();
3048 // this should succeed since, even if we overflow the global
3049 // stack, we should have definitely removed some entries from the
3050 // local queue. So, there must be space on it.
3051 bool success = _task_queue->push(obj);
3052 assert(success, "invariant");
3053 }
3055 statsOnly( int tmp_size = _task_queue->size();
3056 if (tmp_size > _local_max_size)
3057 _local_max_size = tmp_size;
3058 ++_local_pushes );
3059 }
3061 void CMTask::reached_limit() {
3062 assert(_words_scanned >= _words_scanned_limit ||
3063 _refs_reached >= _refs_reached_limit ,
3064 "shouldn't have been called otherwise");
3065 regular_clock_call();
3066 }
3068 void CMTask::regular_clock_call() {
3069 if (has_aborted())
3070 return;
3072 // First, we need to recalculate the words scanned and refs reached
3073 // limits for the next clock call.
3074 recalculate_limits();
3076 // During the regular clock call we do the following
3078 // (1) If an overflow has been flagged, then we abort.
3079 if (_cm->has_overflown()) {
3080 set_has_aborted();
3081 return;
3082 }
3084 // If we are not concurrent (i.e. we're doing remark) we don't need
3085 // to check anything else. The other steps are only needed during
3086 // the concurrent marking phase.
3087 if (!concurrent())
3088 return;
3090 // (2) If marking has been aborted for Full GC, then we also abort.
3091 if (_cm->has_aborted()) {
3092 set_has_aborted();
3093 statsOnly( ++_aborted_cm_aborted );
3094 return;
3095 }
3097 double curr_time_ms = os::elapsedVTime() * 1000.0;
3099 // (3) If marking stats are enabled, then we update the step history.
3100 #if _MARKING_STATS_
3101 if (_words_scanned >= _words_scanned_limit)
3102 ++_clock_due_to_scanning;
3103 if (_refs_reached >= _refs_reached_limit)
3104 ++_clock_due_to_marking;
3106 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3107 _interval_start_time_ms = curr_time_ms;
3108 _all_clock_intervals_ms.add(last_interval_ms);
3110 if (_cm->verbose_medium()) {
3111 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3112 "scanned = %d%s, refs reached = %d%s",
3113 _task_id, last_interval_ms,
3114 _words_scanned,
3115 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3116 _refs_reached,
3117 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3118 }
3119 #endif // _MARKING_STATS_
3121 // (4) We check whether we should yield. If we have to, then we abort.
3122 if (_cm->should_yield()) {
3123 // We should yield. To do this we abort the task. The caller is
3124 // responsible for yielding.
3125 set_has_aborted();
3126 statsOnly( ++_aborted_yield );
3127 return;
3128 }
3130 // (5) We check whether we've reached our time quota. If we have,
3131 // then we abort.
3132 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3133 if (elapsed_time_ms > _time_target_ms) {
3134 set_has_aborted();
3135 _has_aborted_timed_out = true;
3136 statsOnly( ++_aborted_timed_out );
3137 return;
3138 }
3140 // (6) Finally, we check whether there are enough completed STAB
3141 // buffers available for processing. If there are, we abort.
3142 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3143 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3144 if (_cm->verbose_low())
3145 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3146 _task_id);
3147 // we do need to process SATB buffers, we'll abort and restart
3148 // the marking task to do so
3149 set_has_aborted();
3150 statsOnly( ++_aborted_satb );
3151 return;
3152 }
3153 }
3155 void CMTask::recalculate_limits() {
3156 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3157 _words_scanned_limit = _real_words_scanned_limit;
3159 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3160 _refs_reached_limit = _real_refs_reached_limit;
3161 }
3163 void CMTask::decrease_limits() {
3164 // This is called when we believe that we're going to do an infrequent
3165 // operation which will increase the per byte scanned cost (i.e. move
3166 // entries to/from the global stack). It basically tries to decrease the
3167 // scanning limit so that the clock is called earlier.
3169 if (_cm->verbose_medium())
3170 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3172 _words_scanned_limit = _real_words_scanned_limit -
3173 3 * words_scanned_period / 4;
3174 _refs_reached_limit = _real_refs_reached_limit -
3175 3 * refs_reached_period / 4;
3176 }
3178 void CMTask::move_entries_to_global_stack() {
3179 // local array where we'll store the entries that will be popped
3180 // from the local queue
3181 oop buffer[global_stack_transfer_size];
3183 int n = 0;
3184 oop obj;
3185 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3186 buffer[n] = obj;
3187 ++n;
3188 }
3190 if (n > 0) {
3191 // we popped at least one entry from the local queue
3193 statsOnly( ++_global_transfers_to; _local_pops += n );
3195 if (!_cm->mark_stack_push(buffer, n)) {
3196 if (_cm->verbose_low())
3197 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3198 set_has_aborted();
3199 } else {
3200 // the transfer was successful
3202 if (_cm->verbose_medium())
3203 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3204 _task_id, n);
3205 statsOnly( int tmp_size = _cm->mark_stack_size();
3206 if (tmp_size > _global_max_size)
3207 _global_max_size = tmp_size;
3208 _global_pushes += n );
3209 }
3210 }
3212 // this operation was quite expensive, so decrease the limits
3213 decrease_limits();
3214 }
3216 void CMTask::get_entries_from_global_stack() {
3217 // local array where we'll store the entries that will be popped
3218 // from the global stack.
3219 oop buffer[global_stack_transfer_size];
3220 int n;
3221 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3222 assert(n <= global_stack_transfer_size,
3223 "we should not pop more than the given limit");
3224 if (n > 0) {
3225 // yes, we did actually pop at least one entry
3227 statsOnly( ++_global_transfers_from; _global_pops += n );
3228 if (_cm->verbose_medium())
3229 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3230 _task_id, n);
3231 for (int i = 0; i < n; ++i) {
3232 bool success = _task_queue->push(buffer[i]);
3233 // We only call this when the local queue is empty or under a
3234 // given target limit. So, we do not expect this push to fail.
3235 assert(success, "invariant");
3236 }
3238 statsOnly( int tmp_size = _task_queue->size();
3239 if (tmp_size > _local_max_size)
3240 _local_max_size = tmp_size;
3241 _local_pushes += n );
3242 }
3244 // this operation was quite expensive, so decrease the limits
3245 decrease_limits();
3246 }
3248 void CMTask::drain_local_queue(bool partially) {
3249 if (has_aborted())
3250 return;
3252 // Decide what the target size is, depending whether we're going to
3253 // drain it partially (so that other tasks can steal if they run out
3254 // of things to do) or totally (at the very end).
3255 size_t target_size;
3256 if (partially)
3257 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3258 else
3259 target_size = 0;
3261 if (_task_queue->size() > target_size) {
3262 if (_cm->verbose_high())
3263 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3264 _task_id, target_size);
3266 oop obj;
3267 bool ret = _task_queue->pop_local(obj);
3268 while (ret) {
3269 statsOnly( ++_local_pops );
3271 if (_cm->verbose_high())
3272 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3273 (void*) obj);
3275 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3276 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3277 "invariant");
3279 scan_object(obj);
3281 if (_task_queue->size() <= target_size || has_aborted())
3282 ret = false;
3283 else
3284 ret = _task_queue->pop_local(obj);
3285 }
3287 if (_cm->verbose_high())
3288 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3289 _task_id, _task_queue->size());
3290 }
3291 }
3293 void CMTask::drain_global_stack(bool partially) {
3294 if (has_aborted())
3295 return;
3297 // We have a policy to drain the local queue before we attempt to
3298 // drain the global stack.
3299 assert(partially || _task_queue->size() == 0, "invariant");
3301 // Decide what the target size is, depending whether we're going to
3302 // drain it partially (so that other tasks can steal if they run out
3303 // of things to do) or totally (at the very end). Notice that,
3304 // because we move entries from the global stack in chunks or
3305 // because another task might be doing the same, we might in fact
3306 // drop below the target. But, this is not a problem.
3307 size_t target_size;
3308 if (partially)
3309 target_size = _cm->partial_mark_stack_size_target();
3310 else
3311 target_size = 0;
3313 if (_cm->mark_stack_size() > target_size) {
3314 if (_cm->verbose_low())
3315 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3316 _task_id, target_size);
3318 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3319 get_entries_from_global_stack();
3320 drain_local_queue(partially);
3321 }
3323 if (_cm->verbose_low())
3324 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3325 _task_id, _cm->mark_stack_size());
3326 }
3327 }
3329 // SATB Queue has several assumptions on whether to call the par or
3330 // non-par versions of the methods. this is why some of the code is
3331 // replicated. We should really get rid of the single-threaded version
3332 // of the code to simplify things.
3333 void CMTask::drain_satb_buffers() {
3334 if (has_aborted())
3335 return;
3337 // We set this so that the regular clock knows that we're in the
3338 // middle of draining buffers and doesn't set the abort flag when it
3339 // notices that SATB buffers are available for draining. It'd be
3340 // very counter productive if it did that. :-)
3341 _draining_satb_buffers = true;
3343 CMObjectClosure oc(this);
3344 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3345 if (ParallelGCThreads > 0)
3346 satb_mq_set.set_par_closure(_task_id, &oc);
3347 else
3348 satb_mq_set.set_closure(&oc);
3350 // This keeps claiming and applying the closure to completed buffers
3351 // until we run out of buffers or we need to abort.
3352 if (ParallelGCThreads > 0) {
3353 while (!has_aborted() &&
3354 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3355 if (_cm->verbose_medium())
3356 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3357 statsOnly( ++_satb_buffers_processed );
3358 regular_clock_call();
3359 }
3360 } else {
3361 while (!has_aborted() &&
3362 satb_mq_set.apply_closure_to_completed_buffer()) {
3363 if (_cm->verbose_medium())
3364 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3365 statsOnly( ++_satb_buffers_processed );
3366 regular_clock_call();
3367 }
3368 }
3370 if (!concurrent() && !has_aborted()) {
3371 // We should only do this during remark.
3372 if (ParallelGCThreads > 0)
3373 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3374 else
3375 satb_mq_set.iterate_closure_all_threads();
3376 }
3378 _draining_satb_buffers = false;
3380 assert(has_aborted() ||
3381 concurrent() ||
3382 satb_mq_set.completed_buffers_num() == 0, "invariant");
3384 if (ParallelGCThreads > 0)
3385 satb_mq_set.set_par_closure(_task_id, NULL);
3386 else
3387 satb_mq_set.set_closure(NULL);
3389 // again, this was a potentially expensive operation, decrease the
3390 // limits to get the regular clock call early
3391 decrease_limits();
3392 }
3394 void CMTask::drain_region_stack(BitMapClosure* bc) {
3395 if (has_aborted())
3396 return;
3398 assert(_region_finger == NULL,
3399 "it should be NULL when we're not scanning a region");
3401 if (!_cm->region_stack_empty()) {
3402 if (_cm->verbose_low())
3403 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3404 _task_id, _cm->region_stack_size());
3406 MemRegion mr = _cm->region_stack_pop_with_lock();
3407 // it returns MemRegion() if the pop fails
3408 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3410 while (mr.start() != NULL) {
3411 if (_cm->verbose_medium())
3412 gclog_or_tty->print_cr("[%d] we are scanning region "
3413 "["PTR_FORMAT", "PTR_FORMAT")",
3414 _task_id, mr.start(), mr.end());
3415 assert(mr.end() <= _cm->finger(),
3416 "otherwise the region shouldn't be on the stack");
3417 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3418 if (_nextMarkBitMap->iterate(bc, mr)) {
3419 assert(!has_aborted(),
3420 "cannot abort the task without aborting the bitmap iteration");
3422 // We finished iterating over the region without aborting.
3423 regular_clock_call();
3424 if (has_aborted())
3425 mr = MemRegion();
3426 else {
3427 mr = _cm->region_stack_pop_with_lock();
3428 // it returns MemRegion() if the pop fails
3429 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3430 }
3431 } else {
3432 assert(has_aborted(), "currently the only way to do so");
3434 // The only way to abort the bitmap iteration is to return
3435 // false from the do_bit() method. However, inside the
3436 // do_bit() method we move the _region_finger to point to the
3437 // object currently being looked at. So, if we bail out, we
3438 // have definitely set _region_finger to something non-null.
3439 assert(_region_finger != NULL, "invariant");
3441 // The iteration was actually aborted. So now _region_finger
3442 // points to the address of the object we last scanned. If we
3443 // leave it there, when we restart this task, we will rescan
3444 // the object. It is easy to avoid this. We move the finger by
3445 // enough to point to the next possible object header (the
3446 // bitmap knows by how much we need to move it as it knows its
3447 // granularity).
3448 MemRegion newRegion =
3449 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3451 if (!newRegion.is_empty()) {
3452 if (_cm->verbose_low()) {
3453 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3454 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3455 _task_id,
3456 newRegion.start(), newRegion.end());
3457 }
3458 // Now push the part of the region we didn't scan on the
3459 // region stack to make sure a task scans it later.
3460 _cm->region_stack_push_with_lock(newRegion);
3461 }
3462 // break from while
3463 mr = MemRegion();
3464 }
3465 _region_finger = NULL;
3466 }
3468 if (_cm->verbose_low())
3469 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3470 _task_id, _cm->region_stack_size());
3471 }
3472 }
3474 void CMTask::print_stats() {
3475 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3476 _task_id, _calls);
3477 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3478 _elapsed_time_ms, _termination_time_ms);
3479 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3480 _step_times_ms.num(), _step_times_ms.avg(),
3481 _step_times_ms.sd());
3482 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3483 _step_times_ms.maximum(), _step_times_ms.sum());
3485 #if _MARKING_STATS_
3486 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3487 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3488 _all_clock_intervals_ms.sd());
3489 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3490 _all_clock_intervals_ms.maximum(),
3491 _all_clock_intervals_ms.sum());
3492 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3493 _clock_due_to_scanning, _clock_due_to_marking);
3494 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3495 _objs_scanned, _objs_found_on_bitmap);
3496 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3497 _local_pushes, _local_pops, _local_max_size);
3498 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3499 _global_pushes, _global_pops, _global_max_size);
3500 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3501 _global_transfers_to,_global_transfers_from);
3502 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3503 _regions_claimed, _region_stack_pops);
3504 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3505 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3506 _steal_attempts, _steals);
3507 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3508 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3509 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3510 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3511 _aborted_timed_out, _aborted_satb, _aborted_termination);
3512 #endif // _MARKING_STATS_
3513 }
3515 /*****************************************************************************
3517 The do_marking_step(time_target_ms) method is the building block
3518 of the parallel marking framework. It can be called in parallel
3519 with other invocations of do_marking_step() on different tasks
3520 (but only one per task, obviously) and concurrently with the
3521 mutator threads, or during remark, hence it eliminates the need
3522 for two versions of the code. When called during remark, it will
3523 pick up from where the task left off during the concurrent marking
3524 phase. Interestingly, tasks are also claimable during evacuation
3525 pauses too, since do_marking_step() ensures that it aborts before
3526 it needs to yield.
3528 The data structures that is uses to do marking work are the
3529 following:
3531 (1) Marking Bitmap. If there are gray objects that appear only
3532 on the bitmap (this happens either when dealing with an overflow
3533 or when the initial marking phase has simply marked the roots
3534 and didn't push them on the stack), then tasks claim heap
3535 regions whose bitmap they then scan to find gray objects. A
3536 global finger indicates where the end of the last claimed region
3537 is. A local finger indicates how far into the region a task has
3538 scanned. The two fingers are used to determine how to gray an
3539 object (i.e. whether simply marking it is OK, as it will be
3540 visited by a task in the future, or whether it needs to be also
3541 pushed on a stack).
3543 (2) Local Queue. The local queue of the task which is accessed
3544 reasonably efficiently by the task. Other tasks can steal from
3545 it when they run out of work. Throughout the marking phase, a
3546 task attempts to keep its local queue short but not totally
3547 empty, so that entries are available for stealing by other
3548 tasks. Only when there is no more work, a task will totally
3549 drain its local queue.
3551 (3) Global Mark Stack. This handles local queue overflow. During
3552 marking only sets of entries are moved between it and the local
3553 queues, as access to it requires a mutex and more fine-grain
3554 interaction with it which might cause contention. If it
3555 overflows, then the marking phase should restart and iterate
3556 over the bitmap to identify gray objects. Throughout the marking
3557 phase, tasks attempt to keep the global mark stack at a small
3558 length but not totally empty, so that entries are available for
3559 popping by other tasks. Only when there is no more work, tasks
3560 will totally drain the global mark stack.
3562 (4) Global Region Stack. Entries on it correspond to areas of
3563 the bitmap that need to be scanned since they contain gray
3564 objects. Pushes on the region stack only happen during
3565 evacuation pauses and typically correspond to areas covered by
3566 GC LABS. If it overflows, then the marking phase should restart
3567 and iterate over the bitmap to identify gray objects. Tasks will
3568 try to totally drain the region stack as soon as possible.
3570 (5) SATB Buffer Queue. This is where completed SATB buffers are
3571 made available. Buffers are regularly removed from this queue
3572 and scanned for roots, so that the queue doesn't get too
3573 long. During remark, all completed buffers are processed, as
3574 well as the filled in parts of any uncompleted buffers.
3576 The do_marking_step() method tries to abort when the time target
3577 has been reached. There are a few other cases when the
3578 do_marking_step() method also aborts:
3580 (1) When the marking phase has been aborted (after a Full GC).
3582 (2) When a global overflow (either on the global stack or the
3583 region stack) has been triggered. Before the task aborts, it
3584 will actually sync up with the other tasks to ensure that all
3585 the marking data structures (local queues, stacks, fingers etc.)
3586 are re-initialised so that when do_marking_step() completes,
3587 the marking phase can immediately restart.
3589 (3) When enough completed SATB buffers are available. The
3590 do_marking_step() method only tries to drain SATB buffers right
3591 at the beginning. So, if enough buffers are available, the
3592 marking step aborts and the SATB buffers are processed at
3593 the beginning of the next invocation.
3595 (4) To yield. when we have to yield then we abort and yield
3596 right at the end of do_marking_step(). This saves us from a lot
3597 of hassle as, by yielding we might allow a Full GC. If this
3598 happens then objects will be compacted underneath our feet, the
3599 heap might shrink, etc. We save checking for this by just
3600 aborting and doing the yield right at the end.
3602 From the above it follows that the do_marking_step() method should
3603 be called in a loop (or, otherwise, regularly) until it completes.
3605 If a marking step completes without its has_aborted() flag being
3606 true, it means it has completed the current marking phase (and
3607 also all other marking tasks have done so and have all synced up).
3609 A method called regular_clock_call() is invoked "regularly" (in
3610 sub ms intervals) throughout marking. It is this clock method that
3611 checks all the abort conditions which were mentioned above and
3612 decides when the task should abort. A work-based scheme is used to
3613 trigger this clock method: when the number of object words the
3614 marking phase has scanned or the number of references the marking
3615 phase has visited reach a given limit. Additional invocations to
3616 the method clock have been planted in a few other strategic places
3617 too. The initial reason for the clock method was to avoid calling
3618 vtime too regularly, as it is quite expensive. So, once it was in
3619 place, it was natural to piggy-back all the other conditions on it
3620 too and not constantly check them throughout the code.
3622 *****************************************************************************/
3624 void CMTask::do_marking_step(double time_target_ms) {
3625 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3626 assert(concurrent() == _cm->concurrent(), "they should be the same");
3628 assert(concurrent() || _cm->region_stack_empty(),
3629 "the region stack should have been cleared before remark");
3630 assert(_region_finger == NULL,
3631 "this should be non-null only when a region is being scanned");
3633 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3634 assert(_task_queues != NULL, "invariant");
3635 assert(_task_queue != NULL, "invariant");
3636 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3638 assert(!_claimed,
3639 "only one thread should claim this task at any one time");
3641 // OK, this doesn't safeguard again all possible scenarios, as it is
3642 // possible for two threads to set the _claimed flag at the same
3643 // time. But it is only for debugging purposes anyway and it will
3644 // catch most problems.
3645 _claimed = true;
3647 _start_time_ms = os::elapsedVTime() * 1000.0;
3648 statsOnly( _interval_start_time_ms = _start_time_ms );
3650 double diff_prediction_ms =
3651 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3652 _time_target_ms = time_target_ms - diff_prediction_ms;
3654 // set up the variables that are used in the work-based scheme to
3655 // call the regular clock method
3656 _words_scanned = 0;
3657 _refs_reached = 0;
3658 recalculate_limits();
3660 // clear all flags
3661 clear_has_aborted();
3662 _has_aborted_timed_out = false;
3663 _draining_satb_buffers = false;
3665 ++_calls;
3667 if (_cm->verbose_low())
3668 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3669 "target = %1.2lfms >>>>>>>>>>",
3670 _task_id, _calls, _time_target_ms);
3672 // Set up the bitmap and oop closures. Anything that uses them is
3673 // eventually called from this method, so it is OK to allocate these
3674 // statically.
3675 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3676 CMOopClosure oop_closure(_g1h, _cm, this);
3677 set_oop_closure(&oop_closure);
3679 if (_cm->has_overflown()) {
3680 // This can happen if the region stack or the mark stack overflows
3681 // during a GC pause and this task, after a yield point,
3682 // restarts. We have to abort as we need to get into the overflow
3683 // protocol which happens right at the end of this task.
3684 set_has_aborted();
3685 }
3687 // First drain any available SATB buffers. After this, we will not
3688 // look at SATB buffers before the next invocation of this method.
3689 // If enough completed SATB buffers are queued up, the regular clock
3690 // will abort this task so that it restarts.
3691 drain_satb_buffers();
3692 // ...then partially drain the local queue and the global stack
3693 drain_local_queue(true);
3694 drain_global_stack(true);
3696 // Then totally drain the region stack. We will not look at
3697 // it again before the next invocation of this method. Entries on
3698 // the region stack are only added during evacuation pauses, for
3699 // which we have to yield. When we do, we abort the task anyway so
3700 // it will look at the region stack again when it restarts.
3701 bitmap_closure.set_scanning_heap_region(false);
3702 drain_region_stack(&bitmap_closure);
3703 // ...then partially drain the local queue and the global stack
3704 drain_local_queue(true);
3705 drain_global_stack(true);
3707 do {
3708 if (!has_aborted() && _curr_region != NULL) {
3709 // This means that we're already holding on to a region.
3710 assert(_finger != NULL, "if region is not NULL, then the finger "
3711 "should not be NULL either");
3713 // We might have restarted this task after an evacuation pause
3714 // which might have evacuated the region we're holding on to
3715 // underneath our feet. Let's read its limit again to make sure
3716 // that we do not iterate over a region of the heap that
3717 // contains garbage (update_region_limit() will also move
3718 // _finger to the start of the region if it is found empty).
3719 update_region_limit();
3720 // We will start from _finger not from the start of the region,
3721 // as we might be restarting this task after aborting half-way
3722 // through scanning this region. In this case, _finger points to
3723 // the address where we last found a marked object. If this is a
3724 // fresh region, _finger points to start().
3725 MemRegion mr = MemRegion(_finger, _region_limit);
3727 if (_cm->verbose_low())
3728 gclog_or_tty->print_cr("[%d] we're scanning part "
3729 "["PTR_FORMAT", "PTR_FORMAT") "
3730 "of region "PTR_FORMAT,
3731 _task_id, _finger, _region_limit, _curr_region);
3733 // Let's iterate over the bitmap of the part of the
3734 // region that is left.
3735 bitmap_closure.set_scanning_heap_region(true);
3736 if (mr.is_empty() ||
3737 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3738 // We successfully completed iterating over the region. Now,
3739 // let's give up the region.
3740 giveup_current_region();
3741 regular_clock_call();
3742 } else {
3743 assert(has_aborted(), "currently the only way to do so");
3744 // The only way to abort the bitmap iteration is to return
3745 // false from the do_bit() method. However, inside the
3746 // do_bit() method we move the _finger to point to the
3747 // object currently being looked at. So, if we bail out, we
3748 // have definitely set _finger to something non-null.
3749 assert(_finger != NULL, "invariant");
3751 // Region iteration was actually aborted. So now _finger
3752 // points to the address of the object we last scanned. If we
3753 // leave it there, when we restart this task, we will rescan
3754 // the object. It is easy to avoid this. We move the finger by
3755 // enough to point to the next possible object header (the
3756 // bitmap knows by how much we need to move it as it knows its
3757 // granularity).
3758 assert(_finger < _region_limit, "invariant");
3759 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3760 // Check if bitmap iteration was aborted while scanning the last object
3761 if (new_finger >= _region_limit) {
3762 giveup_current_region();
3763 } else {
3764 move_finger_to(new_finger);
3765 }
3766 }
3767 }
3768 // At this point we have either completed iterating over the
3769 // region we were holding on to, or we have aborted.
3771 // We then partially drain the local queue and the global stack.
3772 // (Do we really need this?)
3773 drain_local_queue(true);
3774 drain_global_stack(true);
3776 // Read the note on the claim_region() method on why it might
3777 // return NULL with potentially more regions available for
3778 // claiming and why we have to check out_of_regions() to determine
3779 // whether we're done or not.
3780 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3781 // We are going to try to claim a new region. We should have
3782 // given up on the previous one.
3783 // Separated the asserts so that we know which one fires.
3784 assert(_curr_region == NULL, "invariant");
3785 assert(_finger == NULL, "invariant");
3786 assert(_region_limit == NULL, "invariant");
3787 if (_cm->verbose_low())
3788 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3789 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3790 if (claimed_region != NULL) {
3791 // Yes, we managed to claim one
3792 statsOnly( ++_regions_claimed );
3794 if (_cm->verbose_low())
3795 gclog_or_tty->print_cr("[%d] we successfully claimed "
3796 "region "PTR_FORMAT,
3797 _task_id, claimed_region);
3799 setup_for_region(claimed_region);
3800 assert(_curr_region == claimed_region, "invariant");
3801 }
3802 // It is important to call the regular clock here. It might take
3803 // a while to claim a region if, for example, we hit a large
3804 // block of empty regions. So we need to call the regular clock
3805 // method once round the loop to make sure it's called
3806 // frequently enough.
3807 regular_clock_call();
3808 }
3810 if (!has_aborted() && _curr_region == NULL) {
3811 assert(_cm->out_of_regions(),
3812 "at this point we should be out of regions");
3813 }
3814 } while ( _curr_region != NULL && !has_aborted());
3816 if (!has_aborted()) {
3817 // We cannot check whether the global stack is empty, since other
3818 // tasks might be pushing objects to it concurrently. We also cannot
3819 // check if the region stack is empty because if a thread is aborting
3820 // it can push a partially done region back.
3821 assert(_cm->out_of_regions(),
3822 "at this point we should be out of regions");
3824 if (_cm->verbose_low())
3825 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3827 // Try to reduce the number of available SATB buffers so that
3828 // remark has less work to do.
3829 drain_satb_buffers();
3830 }
3832 // Since we've done everything else, we can now totally drain the
3833 // local queue and global stack.
3834 drain_local_queue(false);
3835 drain_global_stack(false);
3837 // Attempt at work stealing from other task's queues.
3838 if (!has_aborted()) {
3839 // We have not aborted. This means that we have finished all that
3840 // we could. Let's try to do some stealing...
3842 // We cannot check whether the global stack is empty, since other
3843 // tasks might be pushing objects to it concurrently. We also cannot
3844 // check if the region stack is empty because if a thread is aborting
3845 // it can push a partially done region back.
3846 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3847 "only way to reach here");
3849 if (_cm->verbose_low())
3850 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3852 while (!has_aborted()) {
3853 oop obj;
3854 statsOnly( ++_steal_attempts );
3856 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3857 if (_cm->verbose_medium())
3858 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3859 _task_id, (void*) obj);
3861 statsOnly( ++_steals );
3863 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3864 "any stolen object should be marked");
3865 scan_object(obj);
3867 // And since we're towards the end, let's totally drain the
3868 // local queue and global stack.
3869 drain_local_queue(false);
3870 drain_global_stack(false);
3871 } else {
3872 break;
3873 }
3874 }
3875 }
3877 // We still haven't aborted. Now, let's try to get into the
3878 // termination protocol.
3879 if (!has_aborted()) {
3880 // We cannot check whether the global stack is empty, since other
3881 // tasks might be concurrently pushing objects on it. We also cannot
3882 // check if the region stack is empty because if a thread is aborting
3883 // it can push a partially done region back.
3884 // Separated the asserts so that we know which one fires.
3885 assert(_cm->out_of_regions(), "only way to reach here");
3886 assert(_task_queue->size() == 0, "only way to reach here");
3888 if (_cm->verbose_low())
3889 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3891 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3892 // The CMTask class also extends the TerminatorTerminator class,
3893 // hence its should_exit_termination() method will also decide
3894 // whether to exit the termination protocol or not.
3895 bool finished = _cm->terminator()->offer_termination(this);
3896 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3897 _termination_time_ms +=
3898 termination_end_time_ms - _termination_start_time_ms;
3900 if (finished) {
3901 // We're all done.
3903 if (_task_id == 0) {
3904 // let's allow task 0 to do this
3905 if (concurrent()) {
3906 assert(_cm->concurrent_marking_in_progress(), "invariant");
3907 // we need to set this to false before the next
3908 // safepoint. This way we ensure that the marking phase
3909 // doesn't observe any more heap expansions.
3910 _cm->clear_concurrent_marking_in_progress();
3911 }
3912 }
3914 // We can now guarantee that the global stack is empty, since
3915 // all other tasks have finished. We separated the guarantees so
3916 // that, if a condition is false, we can immediately find out
3917 // which one.
3918 guarantee(_cm->out_of_regions(), "only way to reach here");
3919 guarantee(_cm->region_stack_empty(), "only way to reach here");
3920 guarantee(_cm->mark_stack_empty(), "only way to reach here");
3921 guarantee(_task_queue->size() == 0, "only way to reach here");
3922 guarantee(!_cm->has_overflown(), "only way to reach here");
3923 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3924 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3926 if (_cm->verbose_low())
3927 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3928 } else {
3929 // Apparently there's more work to do. Let's abort this task. It
3930 // will restart it and we can hopefully find more things to do.
3932 if (_cm->verbose_low())
3933 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3935 set_has_aborted();
3936 statsOnly( ++_aborted_termination );
3937 }
3938 }
3940 // Mainly for debugging purposes to make sure that a pointer to the
3941 // closure which was statically allocated in this frame doesn't
3942 // escape it by accident.
3943 set_oop_closure(NULL);
3944 double end_time_ms = os::elapsedVTime() * 1000.0;
3945 double elapsed_time_ms = end_time_ms - _start_time_ms;
3946 // Update the step history.
3947 _step_times_ms.add(elapsed_time_ms);
3949 if (has_aborted()) {
3950 // The task was aborted for some reason.
3952 statsOnly( ++_aborted );
3954 if (_has_aborted_timed_out) {
3955 double diff_ms = elapsed_time_ms - _time_target_ms;
3956 // Keep statistics of how well we did with respect to hitting
3957 // our target only if we actually timed out (if we aborted for
3958 // other reasons, then the results might get skewed).
3959 _marking_step_diffs_ms.add(diff_ms);
3960 }
3962 if (_cm->has_overflown()) {
3963 // This is the interesting one. We aborted because a global
3964 // overflow was raised. This means we have to restart the
3965 // marking phase and start iterating over regions. However, in
3966 // order to do this we have to make sure that all tasks stop
3967 // what they are doing and re-initialise in a safe manner. We
3968 // will achieve this with the use of two barrier sync points.
3970 if (_cm->verbose_low())
3971 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3973 _cm->enter_first_sync_barrier(_task_id);
3974 // When we exit this sync barrier we know that all tasks have
3975 // stopped doing marking work. So, it's now safe to
3976 // re-initialise our data structures. At the end of this method,
3977 // task 0 will clear the global data structures.
3979 statsOnly( ++_aborted_overflow );
3981 // We clear the local state of this task...
3982 clear_region_fields();
3984 // ...and enter the second barrier.
3985 _cm->enter_second_sync_barrier(_task_id);
3986 // At this point everything has bee re-initialised and we're
3987 // ready to restart.
3988 }
3990 if (_cm->verbose_low()) {
3991 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3992 "elapsed = %1.2lfms <<<<<<<<<<",
3993 _task_id, _time_target_ms, elapsed_time_ms);
3994 if (_cm->has_aborted())
3995 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3996 _task_id);
3997 }
3998 } else {
3999 if (_cm->verbose_low())
4000 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4001 "elapsed = %1.2lfms <<<<<<<<<<",
4002 _task_id, _time_target_ms, elapsed_time_ms);
4003 }
4005 _claimed = false;
4006 }
4008 CMTask::CMTask(int task_id,
4009 ConcurrentMark* cm,
4010 CMTaskQueue* task_queue,
4011 CMTaskQueueSet* task_queues)
4012 : _g1h(G1CollectedHeap::heap()),
4013 _task_id(task_id), _cm(cm),
4014 _claimed(false),
4015 _nextMarkBitMap(NULL), _hash_seed(17),
4016 _task_queue(task_queue),
4017 _task_queues(task_queues),
4018 _oop_closure(NULL) {
4019 guarantee(task_queue != NULL, "invariant");
4020 guarantee(task_queues != NULL, "invariant");
4022 statsOnly( _clock_due_to_scanning = 0;
4023 _clock_due_to_marking = 0 );
4025 _marking_step_diffs_ms.add(0.5);
4026 }