Mon, 31 Aug 2009 05:27:29 -0700
6841313: G1: dirty cards of survivor regions in parallel
Reviewed-by: tonyp, iveresov
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
421 _has_overflown(false),
422 _concurrent(false),
423 _has_aborted(false),
424 _restart_for_overflow(false),
425 _concurrent_marking_in_progress(false),
426 _should_gray_objects(false),
428 // _verbose_level set below
430 _init_times(),
431 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
432 _cleanup_times(),
433 _total_counting_time(0.0),
434 _total_rs_scrub_time(0.0),
436 _parallel_workers(NULL)
437 {
438 CMVerboseLevel verbose_level =
439 (CMVerboseLevel) G1MarkingVerboseLevel;
440 if (verbose_level < no_verbose)
441 verbose_level = no_verbose;
442 if (verbose_level > high_verbose)
443 verbose_level = high_verbose;
444 _verbose_level = verbose_level;
446 if (verbose_low())
447 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
448 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
450 _markStack.allocate(G1MarkStackSize);
451 _regionStack.allocate(G1MarkRegionStackSize);
453 // Create & start a ConcurrentMark thread.
454 _cmThread = new ConcurrentMarkThread(this);
455 assert(cmThread() != NULL, "CM Thread should have been created");
456 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
458 _g1h = G1CollectedHeap::heap();
459 assert(CGC_lock != NULL, "Where's the CGC_lock?");
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
464 satb_qs.set_buffer_size(G1SATBLogBufferSize);
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
468 for (int i = 0 ; i < size; i++) {
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
470 }
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
476 _active_tasks = _max_task_num;
477 for (int i = 0; i < (int) _max_task_num; ++i) {
478 CMTaskQueue* task_queue = new CMTaskQueue();
479 task_queue->initialize();
480 _task_queues->register_queue(i, task_queue);
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
483 _accum_task_vtime[i] = 0.0;
484 }
486 if (ParallelMarkingThreads > ParallelGCThreads) {
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
488 "than ParallelGCThreads.");
489 }
490 if (ParallelGCThreads == 0) {
491 // if we are not running with any parallel GC threads we will not
492 // spawn any marking threads either
493 _parallel_marking_threads = 0;
494 _sleep_factor = 0.0;
495 _marking_task_overhead = 1.0;
496 } else {
497 if (ParallelMarkingThreads > 0) {
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
499 // if both are set
501 _parallel_marking_threads = ParallelMarkingThreads;
502 _sleep_factor = 0.0;
503 _marking_task_overhead = 1.0;
504 } else if (G1MarkingOverheadPercent > 0) {
505 // we will calculate the number of parallel marking threads
506 // based on a target overhead with respect to the soft real-time
507 // goal
509 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
510 double overall_cm_overhead =
511 (double) MaxGCPauseMillis * marking_overhead /
512 (double) GCPauseIntervalMillis;
513 double cpu_ratio = 1.0 / (double) os::processor_count();
514 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
515 double marking_task_overhead =
516 overall_cm_overhead / marking_thread_num *
517 (double) os::processor_count();
518 double sleep_factor =
519 (1.0 - marking_task_overhead) / marking_task_overhead;
521 _parallel_marking_threads = (size_t) marking_thread_num;
522 _sleep_factor = sleep_factor;
523 _marking_task_overhead = marking_task_overhead;
524 } else {
525 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
526 _sleep_factor = 0.0;
527 _marking_task_overhead = 1.0;
528 }
530 if (parallel_marking_threads() > 1)
531 _cleanup_task_overhead = 1.0;
532 else
533 _cleanup_task_overhead = marking_task_overhead();
534 _cleanup_sleep_factor =
535 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
537 #if 0
538 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
539 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
540 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
541 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
542 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
543 #endif
545 guarantee( parallel_marking_threads() > 0, "peace of mind" );
546 _parallel_workers = new WorkGang("Parallel Marking Threads",
547 (int) parallel_marking_threads(), false, true);
548 if (_parallel_workers == NULL)
549 vm_exit_during_initialization("Failed necessary allocation.");
550 }
552 // so that the call below can read a sensible value
553 _heap_start = (HeapWord*) rs.base();
554 set_non_marking_state();
555 }
557 void ConcurrentMark::update_g1_committed(bool force) {
558 // If concurrent marking is not in progress, then we do not need to
559 // update _heap_end. This has a subtle and important
560 // side-effect. Imagine that two evacuation pauses happen between
561 // marking completion and remark. The first one can grow the
562 // heap (hence now the finger is below the heap end). Then, the
563 // second one could unnecessarily push regions on the region
564 // stack. This causes the invariant that the region stack is empty
565 // at the beginning of remark to be false. By ensuring that we do
566 // not observe heap expansions after marking is complete, then we do
567 // not have this problem.
568 if (!concurrent_marking_in_progress() && !force)
569 return;
571 MemRegion committed = _g1h->g1_committed();
572 tmp_guarantee_CM( committed.start() == _heap_start,
573 "start shouldn't change" );
574 HeapWord* new_end = committed.end();
575 if (new_end > _heap_end) {
576 // The heap has been expanded.
578 _heap_end = new_end;
579 }
580 // Notice that the heap can also shrink. However, this only happens
581 // during a Full GC (at least currently) and the entire marking
582 // phase will bail out and the task will not be restarted. So, let's
583 // do nothing.
584 }
586 void ConcurrentMark::reset() {
587 // Starting values for these two. This should be called in a STW
588 // phase. CM will be notified of any future g1_committed expansions
589 // will be at the end of evacuation pauses, when tasks are
590 // inactive.
591 MemRegion committed = _g1h->g1_committed();
592 _heap_start = committed.start();
593 _heap_end = committed.end();
595 guarantee( _heap_start != NULL &&
596 _heap_end != NULL &&
597 _heap_start < _heap_end, "heap bounds should look ok" );
599 // reset all the marking data structures and any necessary flags
600 clear_marking_state();
602 if (verbose_low())
603 gclog_or_tty->print_cr("[global] resetting");
605 // We do reset all of them, since different phases will use
606 // different number of active threads. So, it's easiest to have all
607 // of them ready.
608 for (int i = 0; i < (int) _max_task_num; ++i)
609 _tasks[i]->reset(_nextMarkBitMap);
611 // we need this to make sure that the flag is on during the evac
612 // pause with initial mark piggy-backed
613 set_concurrent_marking_in_progress();
614 }
616 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
617 guarantee( active_tasks <= _max_task_num, "we should not have more" );
619 _active_tasks = active_tasks;
620 // Need to update the three data structures below according to the
621 // number of active threads for this phase.
622 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
623 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
624 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
626 _concurrent = concurrent;
627 // We propagate this to all tasks, not just the active ones.
628 for (int i = 0; i < (int) _max_task_num; ++i)
629 _tasks[i]->set_concurrent(concurrent);
631 if (concurrent) {
632 set_concurrent_marking_in_progress();
633 } else {
634 // We currently assume that the concurrent flag has been set to
635 // false before we start remark. At this point we should also be
636 // in a STW phase.
637 guarantee( !concurrent_marking_in_progress(), "invariant" );
638 guarantee( _finger == _heap_end, "only way to get here" );
639 update_g1_committed(true);
640 }
641 }
643 void ConcurrentMark::set_non_marking_state() {
644 // We set the global marking state to some default values when we're
645 // not doing marking.
646 clear_marking_state();
647 _active_tasks = 0;
648 clear_concurrent_marking_in_progress();
649 }
651 ConcurrentMark::~ConcurrentMark() {
652 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
653 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
654 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
655 _par_cleanup_thread_state);
657 for (int i = 0; i < (int) _max_task_num; ++i) {
658 delete _task_queues->queue(i);
659 delete _tasks[i];
660 }
661 delete _task_queues;
662 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
663 }
665 // This closure is used to mark refs into the g1 generation
666 // from external roots in the CMS bit map.
667 // Called at the first checkpoint.
668 //
670 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
671 #if PRINT_REACHABLE_AT_INITIAL_MARK
672 static FILE* reachable_file = NULL;
674 class PrintReachableClosure: public OopsInGenClosure {
675 CMBitMap* _bm;
676 int _level;
677 public:
678 PrintReachableClosure(CMBitMap* bm) :
679 _bm(bm), _level(0) {
680 guarantee(reachable_file != NULL, "pre-condition");
681 }
682 void do_oop(oop* p) {
683 oop obj = *p;
684 HeapWord* obj_addr = (HeapWord*)obj;
685 if (obj == NULL) return;
686 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
687 _level, p, (void*) obj, _bm->isMarked(obj_addr));
688 if (!_bm->isMarked(obj_addr)) {
689 _bm->mark(obj_addr);
690 _level++;
691 obj->oop_iterate(this);
692 _level--;
693 }
694 }
695 };
696 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
698 #define SEND_HEAP_DUMP_TO_FILE 0
699 #if SEND_HEAP_DUMP_TO_FILE
700 static FILE* heap_dump_file = NULL;
701 #endif // SEND_HEAP_DUMP_TO_FILE
703 void ConcurrentMark::clearNextBitmap() {
704 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
706 // clear the mark bitmap (no grey objects to start with).
707 // We need to do this in chunks and offer to yield in between
708 // each chunk.
709 HeapWord* start = _nextMarkBitMap->startWord();
710 HeapWord* end = _nextMarkBitMap->endWord();
711 HeapWord* cur = start;
712 size_t chunkSize = M;
713 while (cur < end) {
714 HeapWord* next = cur + chunkSize;
715 if (next > end)
716 next = end;
717 MemRegion mr(cur,next);
718 _nextMarkBitMap->clearRange(mr);
719 cur = next;
720 do_yield_check();
721 }
722 }
724 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
725 public:
726 bool doHeapRegion(HeapRegion* r) {
727 if (!r->continuesHumongous()) {
728 r->note_start_of_marking(true);
729 }
730 return false;
731 }
732 };
734 void ConcurrentMark::checkpointRootsInitialPre() {
735 G1CollectedHeap* g1h = G1CollectedHeap::heap();
736 G1CollectorPolicy* g1p = g1h->g1_policy();
738 _has_aborted = false;
740 // Find all the reachable objects...
741 #if PRINT_REACHABLE_AT_INITIAL_MARK
742 guarantee(reachable_file == NULL, "Protocol");
743 char fn_buf[100];
744 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
745 reachable_file = fopen(fn_buf, "w");
746 // clear the mark bitmap (no grey objects to start with)
747 _nextMarkBitMap->clearAll();
748 PrintReachableClosure prcl(_nextMarkBitMap);
749 g1h->process_strong_roots(
750 false, // fake perm gen collection
751 SharedHeap::SO_AllClasses,
752 &prcl, // Regular roots
753 &prcl // Perm Gen Roots
754 );
755 // The root iteration above "consumed" dirty cards in the perm gen.
756 // Therefore, as a shortcut, we dirty all such cards.
757 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
758 fclose(reachable_file);
759 reachable_file = NULL;
760 // clear the mark bitmap again.
761 _nextMarkBitMap->clearAll();
762 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
763 COMPILER2_PRESENT(DerivedPointerTable::clear());
764 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
766 // Initialise marking structures. This has to be done in a STW phase.
767 reset();
768 }
770 class CMMarkRootsClosure: public OopsInGenClosure {
771 private:
772 ConcurrentMark* _cm;
773 G1CollectedHeap* _g1h;
774 bool _do_barrier;
776 public:
777 CMMarkRootsClosure(ConcurrentMark* cm,
778 G1CollectedHeap* g1h,
779 bool do_barrier) : _cm(cm), _g1h(g1h),
780 _do_barrier(do_barrier) { }
782 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
783 virtual void do_oop( oop* p) { do_oop_work(p); }
785 template <class T> void do_oop_work(T* p) {
786 T heap_oop = oopDesc::load_heap_oop(p);
787 if (!oopDesc::is_null(heap_oop)) {
788 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
789 assert(obj->is_oop() || obj->mark() == NULL,
790 "expected an oop, possibly with mark word displaced");
791 HeapWord* addr = (HeapWord*)obj;
792 if (_g1h->is_in_g1_reserved(addr)) {
793 _cm->grayRoot(obj);
794 }
795 }
796 if (_do_barrier) {
797 assert(!_g1h->is_in_g1_reserved(p),
798 "Should be called on external roots");
799 do_barrier(p);
800 }
801 }
802 };
804 void ConcurrentMark::checkpointRootsInitialPost() {
805 G1CollectedHeap* g1h = G1CollectedHeap::heap();
807 // For each region note start of marking.
808 NoteStartOfMarkHRClosure startcl;
809 g1h->heap_region_iterate(&startcl);
811 // Start weak-reference discovery.
812 ReferenceProcessor* rp = g1h->ref_processor();
813 rp->verify_no_references_recorded();
814 rp->enable_discovery(); // enable ("weak") refs discovery
815 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
817 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
818 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
819 satb_mq_set.set_active_all_threads(true);
821 // update_g1_committed() will be called at the end of an evac pause
822 // when marking is on. So, it's also called at the end of the
823 // initial-mark pause to update the heap end, if the heap expands
824 // during it. No need to call it here.
825 }
827 // Checkpoint the roots into this generation from outside
828 // this generation. [Note this initial checkpoint need only
829 // be approximate -- we'll do a catch up phase subsequently.]
830 void ConcurrentMark::checkpointRootsInitial() {
831 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
832 G1CollectedHeap* g1h = G1CollectedHeap::heap();
834 double start = os::elapsedTime();
836 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
837 g1p->record_concurrent_mark_init_start();
838 checkpointRootsInitialPre();
840 // YSR: when concurrent precleaning is in place, we'll
841 // need to clear the cached card table here
843 ResourceMark rm;
844 HandleMark hm;
846 g1h->ensure_parsability(false);
847 g1h->perm_gen()->save_marks();
849 CMMarkRootsClosure notOlder(this, g1h, false);
850 CMMarkRootsClosure older(this, g1h, true);
852 g1h->set_marking_started();
853 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
855 g1h->process_strong_roots(false, // fake perm gen collection
856 SharedHeap::SO_AllClasses,
857 ¬Older, // Regular roots
858 &older // Perm Gen Roots
859 );
860 checkpointRootsInitialPost();
862 // Statistics.
863 double end = os::elapsedTime();
864 _init_times.add((end - start) * 1000.0);
866 g1p->record_concurrent_mark_init_end();
867 }
869 /*
870 Notice that in the next two methods, we actually leave the STS
871 during the barrier sync and join it immediately afterwards. If we
872 do not do this, this then the following deadlock can occur: one
873 thread could be in the barrier sync code, waiting for the other
874 thread to also sync up, whereas another one could be trying to
875 yield, while also waiting for the other threads to sync up too.
877 Because the thread that does the sync barrier has left the STS, it
878 is possible to be suspended for a Full GC or an evacuation pause
879 could occur. This is actually safe, since the entering the sync
880 barrier is one of the last things do_marking_step() does, and it
881 doesn't manipulate any data structures afterwards.
882 */
884 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
885 if (verbose_low())
886 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
888 ConcurrentGCThread::stsLeave();
889 _first_overflow_barrier_sync.enter();
890 ConcurrentGCThread::stsJoin();
891 // at this point everyone should have synced up and not be doing any
892 // more work
894 if (verbose_low())
895 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
897 // let task 0 do this
898 if (task_num == 0) {
899 // task 0 is responsible for clearing the global data structures
900 clear_marking_state();
902 if (PrintGC) {
903 gclog_or_tty->date_stamp(PrintGCDateStamps);
904 gclog_or_tty->stamp(PrintGCTimeStamps);
905 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
906 }
907 }
909 // after this, each task should reset its own data structures then
910 // then go into the second barrier
911 }
913 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
914 if (verbose_low())
915 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
917 ConcurrentGCThread::stsLeave();
918 _second_overflow_barrier_sync.enter();
919 ConcurrentGCThread::stsJoin();
920 // at this point everything should be re-initialised and ready to go
922 if (verbose_low())
923 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
924 }
926 void ConcurrentMark::grayRoot(oop p) {
927 HeapWord* addr = (HeapWord*) p;
928 // We can't really check against _heap_start and _heap_end, since it
929 // is possible during an evacuation pause with piggy-backed
930 // initial-mark that the committed space is expanded during the
931 // pause without CM observing this change. So the assertions below
932 // is a bit conservative; but better than nothing.
933 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
934 "address should be within the heap bounds" );
936 if (!_nextMarkBitMap->isMarked(addr))
937 _nextMarkBitMap->parMark(addr);
938 }
940 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
941 // The objects on the region have already been marked "in bulk" by
942 // the caller. We only need to decide whether to push the region on
943 // the region stack or not.
945 if (!concurrent_marking_in_progress() || !_should_gray_objects)
946 // We're done with marking and waiting for remark. We do not need to
947 // push anything else on the region stack.
948 return;
950 HeapWord* finger = _finger;
952 if (verbose_low())
953 gclog_or_tty->print_cr("[global] attempting to push "
954 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
955 PTR_FORMAT, mr.start(), mr.end(), finger);
957 if (mr.start() < finger) {
958 // The finger is always heap region aligned and it is not possible
959 // for mr to span heap regions.
960 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
962 tmp_guarantee_CM( mr.start() <= mr.end() &&
963 _heap_start <= mr.start() &&
964 mr.end() <= _heap_end,
965 "region boundaries should fall within the committed space" );
966 if (verbose_low())
967 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
968 "below the finger, pushing it",
969 mr.start(), mr.end());
971 if (!region_stack_push(mr)) {
972 if (verbose_low())
973 gclog_or_tty->print_cr("[global] region stack has overflown.");
974 }
975 }
976 }
978 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
979 // The object is not marked by the caller. We need to at least mark
980 // it and maybe push in on the stack.
982 HeapWord* addr = (HeapWord*)p;
983 if (!_nextMarkBitMap->isMarked(addr)) {
984 // We definitely need to mark it, irrespective whether we bail out
985 // because we're done with marking.
986 if (_nextMarkBitMap->parMark(addr)) {
987 if (!concurrent_marking_in_progress() || !_should_gray_objects)
988 // If we're done with concurrent marking and we're waiting for
989 // remark, then we're not pushing anything on the stack.
990 return;
992 // No OrderAccess:store_load() is needed. It is implicit in the
993 // CAS done in parMark(addr) above
994 HeapWord* finger = _finger;
996 if (addr < finger) {
997 if (!mark_stack_push(oop(addr))) {
998 if (verbose_low())
999 gclog_or_tty->print_cr("[global] global stack overflow "
1000 "during parMark");
1001 }
1002 }
1003 }
1004 }
1005 }
1007 class CMConcurrentMarkingTask: public AbstractGangTask {
1008 private:
1009 ConcurrentMark* _cm;
1010 ConcurrentMarkThread* _cmt;
1012 public:
1013 void work(int worker_i) {
1014 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1015 "this should only be done by a conc GC thread" );
1017 double start_vtime = os::elapsedVTime();
1019 ConcurrentGCThread::stsJoin();
1021 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1022 CMTask* the_task = _cm->task(worker_i);
1023 the_task->record_start_time();
1024 if (!_cm->has_aborted()) {
1025 do {
1026 double start_vtime_sec = os::elapsedVTime();
1027 double start_time_sec = os::elapsedTime();
1028 the_task->do_marking_step(10.0);
1029 double end_time_sec = os::elapsedTime();
1030 double end_vtime_sec = os::elapsedVTime();
1031 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1032 double elapsed_time_sec = end_time_sec - start_time_sec;
1033 _cm->clear_has_overflown();
1035 bool ret = _cm->do_yield_check(worker_i);
1037 jlong sleep_time_ms;
1038 if (!_cm->has_aborted() && the_task->has_aborted()) {
1039 sleep_time_ms =
1040 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1041 ConcurrentGCThread::stsLeave();
1042 os::sleep(Thread::current(), sleep_time_ms, false);
1043 ConcurrentGCThread::stsJoin();
1044 }
1045 double end_time2_sec = os::elapsedTime();
1046 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1048 #if 0
1049 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1050 "overhead %1.4lf",
1051 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1052 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1053 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1054 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1055 #endif
1056 } while (!_cm->has_aborted() && the_task->has_aborted());
1057 }
1058 the_task->record_end_time();
1059 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1061 ConcurrentGCThread::stsLeave();
1063 double end_vtime = os::elapsedVTime();
1064 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1065 }
1067 CMConcurrentMarkingTask(ConcurrentMark* cm,
1068 ConcurrentMarkThread* cmt) :
1069 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1071 ~CMConcurrentMarkingTask() { }
1072 };
1074 void ConcurrentMark::markFromRoots() {
1075 // we might be tempted to assert that:
1076 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1077 // "inconsistent argument?");
1078 // However that wouldn't be right, because it's possible that
1079 // a safepoint is indeed in progress as a younger generation
1080 // stop-the-world GC happens even as we mark in this generation.
1082 _restart_for_overflow = false;
1084 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1086 CMConcurrentMarkingTask markingTask(this, cmThread());
1087 if (parallel_marking_threads() > 0)
1088 _parallel_workers->run_task(&markingTask);
1089 else
1090 markingTask.work(0);
1091 print_stats();
1092 }
1094 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1095 // world is stopped at this checkpoint
1096 assert(SafepointSynchronize::is_at_safepoint(),
1097 "world should be stopped");
1098 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1100 // If a full collection has happened, we shouldn't do this.
1101 if (has_aborted()) {
1102 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1103 return;
1104 }
1106 if (VerifyDuringGC) {
1107 HandleMark hm; // handle scope
1108 gclog_or_tty->print(" VerifyDuringGC:(before)");
1109 Universe::heap()->prepare_for_verify();
1110 Universe::verify(true, false, true);
1111 }
1113 G1CollectorPolicy* g1p = g1h->g1_policy();
1114 g1p->record_concurrent_mark_remark_start();
1116 double start = os::elapsedTime();
1118 checkpointRootsFinalWork();
1120 double mark_work_end = os::elapsedTime();
1122 weakRefsWork(clear_all_soft_refs);
1124 if (has_overflown()) {
1125 // Oops. We overflowed. Restart concurrent marking.
1126 _restart_for_overflow = true;
1127 // Clear the flag. We do not need it any more.
1128 clear_has_overflown();
1129 if (G1TraceMarkStackOverflow)
1130 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1131 } else {
1132 // We're done with marking.
1133 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1135 if (VerifyDuringGC) {
1136 HandleMark hm; // handle scope
1137 gclog_or_tty->print(" VerifyDuringGC:(after)");
1138 Universe::heap()->prepare_for_verify();
1139 Universe::heap()->verify(/* allow_dirty */ true,
1140 /* silent */ false,
1141 /* use_prev_marking */ false);
1142 }
1143 }
1145 #if VERIFY_OBJS_PROCESSED
1146 _scan_obj_cl.objs_processed = 0;
1147 ThreadLocalObjQueue::objs_enqueued = 0;
1148 #endif
1150 // Statistics
1151 double now = os::elapsedTime();
1152 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1153 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1154 _remark_times.add((now - start) * 1000.0);
1156 g1p->record_concurrent_mark_remark_end();
1157 }
1160 #define CARD_BM_TEST_MODE 0
1162 class CalcLiveObjectsClosure: public HeapRegionClosure {
1164 CMBitMapRO* _bm;
1165 ConcurrentMark* _cm;
1166 bool _changed;
1167 bool _yield;
1168 size_t _words_done;
1169 size_t _tot_live;
1170 size_t _tot_used;
1171 size_t _regions_done;
1172 double _start_vtime_sec;
1174 BitMap* _region_bm;
1175 BitMap* _card_bm;
1176 intptr_t _bottom_card_num;
1177 bool _final;
1179 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1180 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1181 #if CARD_BM_TEST_MODE
1182 guarantee(_card_bm->at(i - _bottom_card_num),
1183 "Should already be set.");
1184 #else
1185 _card_bm->par_at_put(i - _bottom_card_num, 1);
1186 #endif
1187 }
1188 }
1190 public:
1191 CalcLiveObjectsClosure(bool final,
1192 CMBitMapRO *bm, ConcurrentMark *cm,
1193 BitMap* region_bm, BitMap* card_bm) :
1194 _bm(bm), _cm(cm), _changed(false), _yield(true),
1195 _words_done(0), _tot_live(0), _tot_used(0),
1196 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1197 _regions_done(0), _start_vtime_sec(0.0)
1198 {
1199 _bottom_card_num =
1200 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1201 CardTableModRefBS::card_shift);
1202 }
1204 // It takes a region that's not empty (i.e., it has at least one
1205 // live object in it and sets its corresponding bit on the region
1206 // bitmap to 1. If the region is "starts humongous" it will also set
1207 // to 1 the bits on the region bitmap that correspond to its
1208 // associated "continues humongous" regions.
1209 void set_bit_for_region(HeapRegion* hr) {
1210 assert(!hr->continuesHumongous(), "should have filtered those out");
1212 size_t index = hr->hrs_index();
1213 if (!hr->startsHumongous()) {
1214 // Normal (non-humongous) case: just set the bit.
1215 _region_bm->par_at_put((BitMap::idx_t) index, true);
1216 } else {
1217 // Starts humongous case: calculate how many regions are part of
1218 // this humongous region and then set the bit range. It might
1219 // have been a bit more efficient to look at the object that
1220 // spans these humongous regions to calculate their number from
1221 // the object's size. However, it's a good idea to calculate
1222 // this based on the metadata itself, and not the region
1223 // contents, so that this code is not aware of what goes into
1224 // the humongous regions (in case this changes in the future).
1225 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1226 size_t end_index = index + 1;
1227 while (end_index < g1h->n_regions()) {
1228 HeapRegion* chr = g1h->region_at(end_index);
1229 if (!chr->continuesHumongous()) {
1230 break;
1231 }
1232 end_index += 1;
1233 }
1234 _region_bm->par_at_put_range((BitMap::idx_t) index,
1235 (BitMap::idx_t) end_index, true);
1236 }
1237 }
1239 bool doHeapRegion(HeapRegion* hr) {
1240 if (!_final && _regions_done == 0)
1241 _start_vtime_sec = os::elapsedVTime();
1243 if (hr->continuesHumongous()) {
1244 // We will ignore these here and process them when their
1245 // associated "starts humongous" region is processed (see
1246 // set_bit_for_heap_region()). Note that we cannot rely on their
1247 // associated "starts humongous" region to have their bit set to
1248 // 1 since, due to the region chunking in the parallel region
1249 // iteration, a "continues humongous" region might be visited
1250 // before its associated "starts humongous".
1251 return false;
1252 }
1254 HeapWord* nextTop = hr->next_top_at_mark_start();
1255 HeapWord* start = hr->top_at_conc_mark_count();
1256 assert(hr->bottom() <= start && start <= hr->end() &&
1257 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1258 start <= nextTop,
1259 "Preconditions.");
1260 // Otherwise, record the number of word's we'll examine.
1261 size_t words_done = (nextTop - start);
1262 // Find the first marked object at or after "start".
1263 start = _bm->getNextMarkedWordAddress(start, nextTop);
1264 size_t marked_bytes = 0;
1266 // Below, the term "card num" means the result of shifting an address
1267 // by the card shift -- address 0 corresponds to card number 0. One
1268 // must subtract the card num of the bottom of the heap to obtain a
1269 // card table index.
1270 // The first card num of the sequence of live cards currently being
1271 // constructed. -1 ==> no sequence.
1272 intptr_t start_card_num = -1;
1273 // The last card num of the sequence of live cards currently being
1274 // constructed. -1 ==> no sequence.
1275 intptr_t last_card_num = -1;
1277 while (start < nextTop) {
1278 if (_yield && _cm->do_yield_check()) {
1279 // We yielded. It might be for a full collection, in which case
1280 // all bets are off; terminate the traversal.
1281 if (_cm->has_aborted()) {
1282 _changed = false;
1283 return true;
1284 } else {
1285 // Otherwise, it might be a collection pause, and the region
1286 // we're looking at might be in the collection set. We'll
1287 // abandon this region.
1288 return false;
1289 }
1290 }
1291 oop obj = oop(start);
1292 int obj_sz = obj->size();
1293 // The card num of the start of the current object.
1294 intptr_t obj_card_num =
1295 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1297 HeapWord* obj_last = start + obj_sz - 1;
1298 intptr_t obj_last_card_num =
1299 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1301 if (obj_card_num != last_card_num) {
1302 if (start_card_num == -1) {
1303 assert(last_card_num == -1, "Both or neither.");
1304 start_card_num = obj_card_num;
1305 } else {
1306 assert(last_card_num != -1, "Both or neither.");
1307 assert(obj_card_num >= last_card_num, "Inv");
1308 if ((obj_card_num - last_card_num) > 1) {
1309 // Mark the last run, and start a new one.
1310 mark_card_num_range(start_card_num, last_card_num);
1311 start_card_num = obj_card_num;
1312 }
1313 }
1314 #if CARD_BM_TEST_MODE
1315 /*
1316 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1317 obj_card_num - _bottom_card_num,
1318 obj_last_card_num - _bottom_card_num);
1319 */
1320 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1321 _card_bm->par_at_put(j - _bottom_card_num, 1);
1322 }
1323 #endif
1324 }
1325 // In any case, we set the last card num.
1326 last_card_num = obj_last_card_num;
1328 marked_bytes += obj_sz * HeapWordSize;
1329 // Find the next marked object after this one.
1330 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1331 _changed = true;
1332 }
1333 // Handle the last range, if any.
1334 if (start_card_num != -1)
1335 mark_card_num_range(start_card_num, last_card_num);
1336 if (_final) {
1337 // Mark the allocated-since-marking portion...
1338 HeapWord* tp = hr->top();
1339 if (nextTop < tp) {
1340 start_card_num =
1341 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1342 last_card_num =
1343 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1344 mark_card_num_range(start_card_num, last_card_num);
1345 // This definitely means the region has live objects.
1346 set_bit_for_region(hr);
1347 }
1348 }
1350 hr->add_to_marked_bytes(marked_bytes);
1351 // Update the live region bitmap.
1352 if (marked_bytes > 0) {
1353 set_bit_for_region(hr);
1354 }
1355 hr->set_top_at_conc_mark_count(nextTop);
1356 _tot_live += hr->next_live_bytes();
1357 _tot_used += hr->used();
1358 _words_done = words_done;
1360 if (!_final) {
1361 ++_regions_done;
1362 if (_regions_done % 10 == 0) {
1363 double end_vtime_sec = os::elapsedVTime();
1364 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1365 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1366 jlong sleep_time_ms =
1367 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1368 os::sleep(Thread::current(), sleep_time_ms, false);
1369 _start_vtime_sec = end_vtime_sec;
1370 }
1371 }
1372 }
1374 return false;
1375 }
1377 bool changed() { return _changed; }
1378 void reset() { _changed = false; _words_done = 0; }
1379 void no_yield() { _yield = false; }
1380 size_t words_done() { return _words_done; }
1381 size_t tot_live() { return _tot_live; }
1382 size_t tot_used() { return _tot_used; }
1383 };
1386 void ConcurrentMark::calcDesiredRegions() {
1387 _region_bm.clear();
1388 _card_bm.clear();
1389 CalcLiveObjectsClosure calccl(false /*final*/,
1390 nextMarkBitMap(), this,
1391 &_region_bm, &_card_bm);
1392 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1393 g1h->heap_region_iterate(&calccl);
1395 do {
1396 calccl.reset();
1397 g1h->heap_region_iterate(&calccl);
1398 } while (calccl.changed());
1399 }
1401 class G1ParFinalCountTask: public AbstractGangTask {
1402 protected:
1403 G1CollectedHeap* _g1h;
1404 CMBitMap* _bm;
1405 size_t _n_workers;
1406 size_t *_live_bytes;
1407 size_t *_used_bytes;
1408 BitMap* _region_bm;
1409 BitMap* _card_bm;
1410 public:
1411 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1412 BitMap* region_bm, BitMap* card_bm) :
1413 AbstractGangTask("G1 final counting"), _g1h(g1h),
1414 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1415 {
1416 if (ParallelGCThreads > 0)
1417 _n_workers = _g1h->workers()->total_workers();
1418 else
1419 _n_workers = 1;
1420 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1421 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1422 }
1424 ~G1ParFinalCountTask() {
1425 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1426 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1427 }
1429 void work(int i) {
1430 CalcLiveObjectsClosure calccl(true /*final*/,
1431 _bm, _g1h->concurrent_mark(),
1432 _region_bm, _card_bm);
1433 calccl.no_yield();
1434 if (ParallelGCThreads > 0) {
1435 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1436 HeapRegion::FinalCountClaimValue);
1437 } else {
1438 _g1h->heap_region_iterate(&calccl);
1439 }
1440 assert(calccl.complete(), "Shouldn't have yielded!");
1442 guarantee( (size_t)i < _n_workers, "invariant" );
1443 _live_bytes[i] = calccl.tot_live();
1444 _used_bytes[i] = calccl.tot_used();
1445 }
1446 size_t live_bytes() {
1447 size_t live_bytes = 0;
1448 for (size_t i = 0; i < _n_workers; ++i)
1449 live_bytes += _live_bytes[i];
1450 return live_bytes;
1451 }
1452 size_t used_bytes() {
1453 size_t used_bytes = 0;
1454 for (size_t i = 0; i < _n_workers; ++i)
1455 used_bytes += _used_bytes[i];
1456 return used_bytes;
1457 }
1458 };
1460 class G1ParNoteEndTask;
1462 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1463 G1CollectedHeap* _g1;
1464 int _worker_num;
1465 size_t _max_live_bytes;
1466 size_t _regions_claimed;
1467 size_t _freed_bytes;
1468 size_t _cleared_h_regions;
1469 size_t _freed_regions;
1470 UncleanRegionList* _unclean_region_list;
1471 double _claimed_region_time;
1472 double _max_region_time;
1474 public:
1475 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1476 UncleanRegionList* list,
1477 int worker_num);
1478 size_t freed_bytes() { return _freed_bytes; }
1479 size_t cleared_h_regions() { return _cleared_h_regions; }
1480 size_t freed_regions() { return _freed_regions; }
1481 UncleanRegionList* unclean_region_list() {
1482 return _unclean_region_list;
1483 }
1485 bool doHeapRegion(HeapRegion *r);
1487 size_t max_live_bytes() { return _max_live_bytes; }
1488 size_t regions_claimed() { return _regions_claimed; }
1489 double claimed_region_time_sec() { return _claimed_region_time; }
1490 double max_region_time_sec() { return _max_region_time; }
1491 };
1493 class G1ParNoteEndTask: public AbstractGangTask {
1494 friend class G1NoteEndOfConcMarkClosure;
1495 protected:
1496 G1CollectedHeap* _g1h;
1497 size_t _max_live_bytes;
1498 size_t _freed_bytes;
1499 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1500 public:
1501 G1ParNoteEndTask(G1CollectedHeap* g1h,
1502 ConcurrentMark::ParCleanupThreadState**
1503 par_cleanup_thread_state) :
1504 AbstractGangTask("G1 note end"), _g1h(g1h),
1505 _max_live_bytes(0), _freed_bytes(0),
1506 _par_cleanup_thread_state(par_cleanup_thread_state)
1507 {}
1509 void work(int i) {
1510 double start = os::elapsedTime();
1511 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1512 &_par_cleanup_thread_state[i]->list,
1513 i);
1514 if (ParallelGCThreads > 0) {
1515 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1516 HeapRegion::NoteEndClaimValue);
1517 } else {
1518 _g1h->heap_region_iterate(&g1_note_end);
1519 }
1520 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1522 // Now finish up freeing the current thread's regions.
1523 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1524 g1_note_end.cleared_h_regions(),
1525 0, NULL);
1526 {
1527 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1528 _max_live_bytes += g1_note_end.max_live_bytes();
1529 _freed_bytes += g1_note_end.freed_bytes();
1530 }
1531 double end = os::elapsedTime();
1532 if (G1PrintParCleanupStats) {
1533 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1534 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1535 i, start, end, (end-start)*1000.0,
1536 g1_note_end.regions_claimed(),
1537 g1_note_end.claimed_region_time_sec()*1000.0,
1538 g1_note_end.max_region_time_sec()*1000.0);
1539 }
1540 }
1541 size_t max_live_bytes() { return _max_live_bytes; }
1542 size_t freed_bytes() { return _freed_bytes; }
1543 };
1545 class G1ParScrubRemSetTask: public AbstractGangTask {
1546 protected:
1547 G1RemSet* _g1rs;
1548 BitMap* _region_bm;
1549 BitMap* _card_bm;
1550 public:
1551 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1552 BitMap* region_bm, BitMap* card_bm) :
1553 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1554 _region_bm(region_bm), _card_bm(card_bm)
1555 {}
1557 void work(int i) {
1558 if (ParallelGCThreads > 0) {
1559 _g1rs->scrub_par(_region_bm, _card_bm, i,
1560 HeapRegion::ScrubRemSetClaimValue);
1561 } else {
1562 _g1rs->scrub(_region_bm, _card_bm);
1563 }
1564 }
1566 };
1568 G1NoteEndOfConcMarkClosure::
1569 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1570 UncleanRegionList* list,
1571 int worker_num)
1572 : _g1(g1), _worker_num(worker_num),
1573 _max_live_bytes(0), _regions_claimed(0),
1574 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1575 _claimed_region_time(0.0), _max_region_time(0.0),
1576 _unclean_region_list(list)
1577 {}
1579 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1580 // We use a claim value of zero here because all regions
1581 // were claimed with value 1 in the FinalCount task.
1582 r->reset_gc_time_stamp();
1583 if (!r->continuesHumongous()) {
1584 double start = os::elapsedTime();
1585 _regions_claimed++;
1586 r->note_end_of_marking();
1587 _max_live_bytes += r->max_live_bytes();
1588 _g1->free_region_if_totally_empty_work(r,
1589 _freed_bytes,
1590 _cleared_h_regions,
1591 _freed_regions,
1592 _unclean_region_list,
1593 true /*par*/);
1594 double region_time = (os::elapsedTime() - start);
1595 _claimed_region_time += region_time;
1596 if (region_time > _max_region_time) _max_region_time = region_time;
1597 }
1598 return false;
1599 }
1601 void ConcurrentMark::cleanup() {
1602 // world is stopped at this checkpoint
1603 assert(SafepointSynchronize::is_at_safepoint(),
1604 "world should be stopped");
1605 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1607 // If a full collection has happened, we shouldn't do this.
1608 if (has_aborted()) {
1609 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1610 return;
1611 }
1613 if (VerifyDuringGC) {
1614 HandleMark hm; // handle scope
1615 gclog_or_tty->print(" VerifyDuringGC:(before)");
1616 Universe::heap()->prepare_for_verify();
1617 Universe::verify(/* allow dirty */ true,
1618 /* silent */ false,
1619 /* prev marking */ true);
1620 }
1622 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1623 g1p->record_concurrent_mark_cleanup_start();
1625 double start = os::elapsedTime();
1627 // Do counting once more with the world stopped for good measure.
1628 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1629 &_region_bm, &_card_bm);
1630 if (ParallelGCThreads > 0) {
1631 assert(g1h->check_heap_region_claim_values(
1632 HeapRegion::InitialClaimValue),
1633 "sanity check");
1635 int n_workers = g1h->workers()->total_workers();
1636 g1h->set_par_threads(n_workers);
1637 g1h->workers()->run_task(&g1_par_count_task);
1638 g1h->set_par_threads(0);
1640 assert(g1h->check_heap_region_claim_values(
1641 HeapRegion::FinalCountClaimValue),
1642 "sanity check");
1643 } else {
1644 g1_par_count_task.work(0);
1645 }
1647 size_t known_garbage_bytes =
1648 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1649 #if 0
1650 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1651 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1652 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1653 (double) known_garbage_bytes / (double) (1024 * 1024));
1654 #endif // 0
1655 g1p->set_known_garbage_bytes(known_garbage_bytes);
1657 size_t start_used_bytes = g1h->used();
1658 _at_least_one_mark_complete = true;
1659 g1h->set_marking_complete();
1661 double count_end = os::elapsedTime();
1662 double this_final_counting_time = (count_end - start);
1663 if (G1PrintParCleanupStats) {
1664 gclog_or_tty->print_cr("Cleanup:");
1665 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1666 this_final_counting_time*1000.0);
1667 }
1668 _total_counting_time += this_final_counting_time;
1670 // Install newly created mark bitMap as "prev".
1671 swapMarkBitMaps();
1673 g1h->reset_gc_time_stamp();
1675 // Note end of marking in all heap regions.
1676 double note_end_start = os::elapsedTime();
1677 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1678 if (ParallelGCThreads > 0) {
1679 int n_workers = g1h->workers()->total_workers();
1680 g1h->set_par_threads(n_workers);
1681 g1h->workers()->run_task(&g1_par_note_end_task);
1682 g1h->set_par_threads(0);
1684 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1685 "sanity check");
1686 } else {
1687 g1_par_note_end_task.work(0);
1688 }
1689 g1h->set_unclean_regions_coming(true);
1690 double note_end_end = os::elapsedTime();
1691 // Tell the mutators that there might be unclean regions coming...
1692 if (G1PrintParCleanupStats) {
1693 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1694 (note_end_end - note_end_start)*1000.0);
1695 }
1698 // call below, since it affects the metric by which we sort the heap
1699 // regions.
1700 if (G1ScrubRemSets) {
1701 double rs_scrub_start = os::elapsedTime();
1702 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1703 if (ParallelGCThreads > 0) {
1704 int n_workers = g1h->workers()->total_workers();
1705 g1h->set_par_threads(n_workers);
1706 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1707 g1h->set_par_threads(0);
1709 assert(g1h->check_heap_region_claim_values(
1710 HeapRegion::ScrubRemSetClaimValue),
1711 "sanity check");
1712 } else {
1713 g1_par_scrub_rs_task.work(0);
1714 }
1716 double rs_scrub_end = os::elapsedTime();
1717 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1718 _total_rs_scrub_time += this_rs_scrub_time;
1719 }
1721 // this will also free any regions totally full of garbage objects,
1722 // and sort the regions.
1723 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1724 g1_par_note_end_task.freed_bytes(),
1725 g1_par_note_end_task.max_live_bytes());
1727 // Statistics.
1728 double end = os::elapsedTime();
1729 _cleanup_times.add((end - start) * 1000.0);
1731 // G1CollectedHeap::heap()->print();
1732 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1733 // G1CollectedHeap::heap()->get_gc_time_stamp());
1735 if (PrintGC || PrintGCDetails) {
1736 g1h->print_size_transition(gclog_or_tty,
1737 start_used_bytes,
1738 g1h->used(),
1739 g1h->capacity());
1740 }
1742 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1743 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1745 // We need to make this be a "collection" so any collection pause that
1746 // races with it goes around and waits for completeCleanup to finish.
1747 g1h->increment_total_collections();
1749 if (VerifyDuringGC) {
1750 HandleMark hm; // handle scope
1751 gclog_or_tty->print(" VerifyDuringGC:(after)");
1752 Universe::heap()->prepare_for_verify();
1753 Universe::verify(/* allow dirty */ true,
1754 /* silent */ false,
1755 /* prev marking */ true);
1756 }
1757 }
1759 void ConcurrentMark::completeCleanup() {
1760 // A full collection intervened.
1761 if (has_aborted()) return;
1763 int first = 0;
1764 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1765 for (int t = 0; t < last; t++) {
1766 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1767 assert(list->well_formed(), "Inv");
1768 HeapRegion* hd = list->hd();
1769 while (hd != NULL) {
1770 // Now finish up the other stuff.
1771 hd->rem_set()->clear();
1772 HeapRegion* next_hd = hd->next_from_unclean_list();
1773 (void)list->pop();
1774 guarantee(list->hd() == next_hd, "how not?");
1775 _g1h->put_region_on_unclean_list(hd);
1776 if (!hd->isHumongous()) {
1777 // Add this to the _free_regions count by 1.
1778 _g1h->finish_free_region_work(0, 0, 1, NULL);
1779 }
1780 hd = list->hd();
1781 guarantee(hd == next_hd, "how not?");
1782 }
1783 }
1784 }
1787 class G1CMIsAliveClosure: public BoolObjectClosure {
1788 G1CollectedHeap* _g1;
1789 public:
1790 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1791 _g1(g1)
1792 {}
1794 void do_object(oop obj) {
1795 assert(false, "not to be invoked");
1796 }
1797 bool do_object_b(oop obj) {
1798 HeapWord* addr = (HeapWord*)obj;
1799 return addr != NULL &&
1800 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1801 }
1802 };
1804 class G1CMKeepAliveClosure: public OopClosure {
1805 G1CollectedHeap* _g1;
1806 ConcurrentMark* _cm;
1807 CMBitMap* _bitMap;
1808 public:
1809 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1810 CMBitMap* bitMap) :
1811 _g1(g1), _cm(cm),
1812 _bitMap(bitMap) {}
1814 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1815 virtual void do_oop( oop* p) { do_oop_work(p); }
1817 template <class T> void do_oop_work(T* p) {
1818 oop thisOop = oopDesc::load_decode_heap_oop(p);
1819 HeapWord* addr = (HeapWord*)thisOop;
1820 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1821 _bitMap->mark(addr);
1822 _cm->mark_stack_push(thisOop);
1823 }
1824 }
1825 };
1827 class G1CMDrainMarkingStackClosure: public VoidClosure {
1828 CMMarkStack* _markStack;
1829 CMBitMap* _bitMap;
1830 G1CMKeepAliveClosure* _oopClosure;
1831 public:
1832 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1833 G1CMKeepAliveClosure* oopClosure) :
1834 _bitMap(bitMap),
1835 _markStack(markStack),
1836 _oopClosure(oopClosure)
1837 {}
1839 void do_void() {
1840 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1841 }
1842 };
1844 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1845 ResourceMark rm;
1846 HandleMark hm;
1847 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1848 ReferenceProcessor* rp = g1h->ref_processor();
1850 // Process weak references.
1851 rp->setup_policy(clear_all_soft_refs);
1852 assert(_markStack.isEmpty(), "mark stack should be empty");
1854 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1855 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1856 G1CMDrainMarkingStackClosure
1857 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1858 &g1KeepAliveClosure);
1860 // XXXYYY Also: copy the parallel ref processing code from CMS.
1861 rp->process_discovered_references(&g1IsAliveClosure,
1862 &g1KeepAliveClosure,
1863 &g1DrainMarkingStackClosure,
1864 NULL);
1865 assert(_markStack.overflow() || _markStack.isEmpty(),
1866 "mark stack should be empty (unless it overflowed)");
1867 if (_markStack.overflow()) {
1868 set_has_overflown();
1869 }
1871 rp->enqueue_discovered_references();
1872 rp->verify_no_references_recorded();
1873 assert(!rp->discovery_enabled(), "should have been disabled");
1875 // Now clean up stale oops in SymbolTable and StringTable
1876 SymbolTable::unlink(&g1IsAliveClosure);
1877 StringTable::unlink(&g1IsAliveClosure);
1878 }
1880 void ConcurrentMark::swapMarkBitMaps() {
1881 CMBitMapRO* temp = _prevMarkBitMap;
1882 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1883 _nextMarkBitMap = (CMBitMap*) temp;
1884 }
1886 class CMRemarkTask: public AbstractGangTask {
1887 private:
1888 ConcurrentMark *_cm;
1890 public:
1891 void work(int worker_i) {
1892 // Since all available tasks are actually started, we should
1893 // only proceed if we're supposed to be actived.
1894 if ((size_t)worker_i < _cm->active_tasks()) {
1895 CMTask* task = _cm->task(worker_i);
1896 task->record_start_time();
1897 do {
1898 task->do_marking_step(1000000000.0 /* something very large */);
1899 } while (task->has_aborted() && !_cm->has_overflown());
1900 // If we overflow, then we do not want to restart. We instead
1901 // want to abort remark and do concurrent marking again.
1902 task->record_end_time();
1903 }
1904 }
1906 CMRemarkTask(ConcurrentMark* cm) :
1907 AbstractGangTask("Par Remark"), _cm(cm) { }
1908 };
1910 void ConcurrentMark::checkpointRootsFinalWork() {
1911 ResourceMark rm;
1912 HandleMark hm;
1913 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1915 g1h->ensure_parsability(false);
1917 if (ParallelGCThreads > 0) {
1918 g1h->change_strong_roots_parity();
1919 // this is remark, so we'll use up all available threads
1920 int active_workers = ParallelGCThreads;
1921 set_phase(active_workers, false);
1923 CMRemarkTask remarkTask(this);
1924 // We will start all available threads, even if we decide that the
1925 // active_workers will be fewer. The extra ones will just bail out
1926 // immediately.
1927 int n_workers = g1h->workers()->total_workers();
1928 g1h->set_par_threads(n_workers);
1929 g1h->workers()->run_task(&remarkTask);
1930 g1h->set_par_threads(0);
1932 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1933 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1934 } else {
1935 g1h->change_strong_roots_parity();
1936 // this is remark, so we'll use up all available threads
1937 int active_workers = 1;
1938 set_phase(active_workers, false);
1940 CMRemarkTask remarkTask(this);
1941 // We will start all available threads, even if we decide that the
1942 // active_workers will be fewer. The extra ones will just bail out
1943 // immediately.
1944 remarkTask.work(0);
1946 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1947 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1948 }
1950 print_stats();
1952 if (!restart_for_overflow())
1953 set_non_marking_state();
1955 #if VERIFY_OBJS_PROCESSED
1956 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1957 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1958 _scan_obj_cl.objs_processed,
1959 ThreadLocalObjQueue::objs_enqueued);
1960 guarantee(_scan_obj_cl.objs_processed ==
1961 ThreadLocalObjQueue::objs_enqueued,
1962 "Different number of objs processed and enqueued.");
1963 }
1964 #endif
1965 }
1967 class ReachablePrinterOopClosure: public OopClosure {
1968 private:
1969 G1CollectedHeap* _g1h;
1970 CMBitMapRO* _bitmap;
1971 outputStream* _out;
1973 public:
1974 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1975 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1977 void do_oop(narrowOop* p) { do_oop_work(p); }
1978 void do_oop( oop* p) { do_oop_work(p); }
1980 template <class T> void do_oop_work(T* p) {
1981 oop obj = oopDesc::load_decode_heap_oop(p);
1982 const char* str = NULL;
1983 const char* str2 = "";
1985 if (!_g1h->is_in_g1_reserved(obj))
1986 str = "outside G1 reserved";
1987 else {
1988 HeapRegion* hr = _g1h->heap_region_containing(obj);
1989 guarantee( hr != NULL, "invariant" );
1990 if (hr->obj_allocated_since_prev_marking(obj)) {
1991 str = "over TAMS";
1992 if (_bitmap->isMarked((HeapWord*) obj))
1993 str2 = " AND MARKED";
1994 } else if (_bitmap->isMarked((HeapWord*) obj))
1995 str = "marked";
1996 else
1997 str = "#### NOT MARKED ####";
1998 }
2000 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2001 p, (void*) obj, str, str2);
2002 }
2003 };
2005 class ReachablePrinterClosure: public BitMapClosure {
2006 private:
2007 CMBitMapRO* _bitmap;
2008 outputStream* _out;
2010 public:
2011 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2012 _bitmap(bitmap), _out(out) { }
2014 bool do_bit(size_t offset) {
2015 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2016 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2018 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2019 oop(addr)->oop_iterate(&oopCl);
2020 _out->print_cr("");
2022 return true;
2023 }
2024 };
2026 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2027 private:
2028 CMBitMapRO* _bitmap;
2029 outputStream* _out;
2031 public:
2032 void do_object(oop o) {
2033 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2035 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2036 o->oop_iterate(&oopCl);
2037 _out->print_cr("");
2038 }
2040 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2041 _bitmap(bitmap), _out(out) { }
2042 };
2044 class RegionReachablePrinterClosure : public HeapRegionClosure {
2045 private:
2046 CMBitMapRO* _bitmap;
2047 outputStream* _out;
2049 public:
2050 bool doHeapRegion(HeapRegion* hr) {
2051 HeapWord* b = hr->bottom();
2052 HeapWord* e = hr->end();
2053 HeapWord* t = hr->top();
2054 HeapWord* p = hr->prev_top_at_mark_start();
2055 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2056 "PTAMS: "PTR_FORMAT, b, e, t, p);
2057 _out->print_cr("");
2059 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2060 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2062 return false;
2063 }
2065 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2066 outputStream* out) :
2067 _bitmap(bitmap), _out(out) { }
2068 };
2070 void ConcurrentMark::print_prev_bitmap_reachable() {
2071 outputStream* out = gclog_or_tty;
2073 #if SEND_HEAP_DUMP_TO_FILE
2074 guarantee(heap_dump_file == NULL, "Protocol");
2075 char fn_buf[100];
2076 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2077 heap_dump_file = fopen(fn_buf, "w");
2078 fileStream fstream(heap_dump_file);
2079 out = &fstream;
2080 #endif // SEND_HEAP_DUMP_TO_FILE
2082 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2083 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2084 _g1h->heap_region_iterate(&rcl);
2085 out->print_cr("");
2087 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2088 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2089 _prevMarkBitMap->iterate(&cl);
2090 out->print_cr("");
2092 #if SEND_HEAP_DUMP_TO_FILE
2093 fclose(heap_dump_file);
2094 heap_dump_file = NULL;
2095 #endif // SEND_HEAP_DUMP_TO_FILE
2096 }
2098 // This note is for drainAllSATBBuffers and the code in between.
2099 // In the future we could reuse a task to do this work during an
2100 // evacuation pause (since now tasks are not active and can be claimed
2101 // during an evacuation pause). This was a late change to the code and
2102 // is currently not being taken advantage of.
2104 class CMGlobalObjectClosure : public ObjectClosure {
2105 private:
2106 ConcurrentMark* _cm;
2108 public:
2109 void do_object(oop obj) {
2110 _cm->deal_with_reference(obj);
2111 }
2113 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2114 };
2116 void ConcurrentMark::deal_with_reference(oop obj) {
2117 if (verbose_high())
2118 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2119 (void*) obj);
2122 HeapWord* objAddr = (HeapWord*) obj;
2123 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2124 if (_g1h->is_in_g1_reserved(objAddr)) {
2125 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2126 HeapRegion* hr = _g1h->heap_region_containing(obj);
2127 if (_g1h->is_obj_ill(obj, hr)) {
2128 if (verbose_high())
2129 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2130 "marked", (void*) obj);
2132 // we need to mark it first
2133 if (_nextMarkBitMap->parMark(objAddr)) {
2134 // No OrderAccess:store_load() is needed. It is implicit in the
2135 // CAS done in parMark(objAddr) above
2136 HeapWord* finger = _finger;
2137 if (objAddr < finger) {
2138 if (verbose_high())
2139 gclog_or_tty->print_cr("[global] below the global finger "
2140 "("PTR_FORMAT"), pushing it", finger);
2141 if (!mark_stack_push(obj)) {
2142 if (verbose_low())
2143 gclog_or_tty->print_cr("[global] global stack overflow during "
2144 "deal_with_reference");
2145 }
2146 }
2147 }
2148 }
2149 }
2150 }
2152 void ConcurrentMark::drainAllSATBBuffers() {
2153 CMGlobalObjectClosure oc(this);
2154 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2155 satb_mq_set.set_closure(&oc);
2157 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2158 if (verbose_medium())
2159 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2160 }
2162 // no need to check whether we should do this, as this is only
2163 // called during an evacuation pause
2164 satb_mq_set.iterate_closure_all_threads();
2166 satb_mq_set.set_closure(NULL);
2167 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2168 }
2170 void ConcurrentMark::markPrev(oop p) {
2171 // Note we are overriding the read-only view of the prev map here, via
2172 // the cast.
2173 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2174 }
2176 void ConcurrentMark::clear(oop p) {
2177 assert(p != NULL && p->is_oop(), "expected an oop");
2178 HeapWord* addr = (HeapWord*)p;
2179 assert(addr >= _nextMarkBitMap->startWord() ||
2180 addr < _nextMarkBitMap->endWord(), "in a region");
2182 _nextMarkBitMap->clear(addr);
2183 }
2185 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2186 // Note we are overriding the read-only view of the prev map here, via
2187 // the cast.
2188 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2189 _nextMarkBitMap->clearRange(mr);
2190 }
2192 HeapRegion*
2193 ConcurrentMark::claim_region(int task_num) {
2194 // "checkpoint" the finger
2195 HeapWord* finger = _finger;
2197 // _heap_end will not change underneath our feet; it only changes at
2198 // yield points.
2199 while (finger < _heap_end) {
2200 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2202 // is the gap between reading the finger and doing the CAS too long?
2204 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2205 HeapWord* bottom = curr_region->bottom();
2206 HeapWord* end = curr_region->end();
2207 HeapWord* limit = curr_region->next_top_at_mark_start();
2209 if (verbose_low())
2210 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2211 "["PTR_FORMAT", "PTR_FORMAT"), "
2212 "limit = "PTR_FORMAT,
2213 task_num, curr_region, bottom, end, limit);
2215 HeapWord* res =
2216 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2217 if (res == finger) {
2218 // we succeeded
2220 // notice that _finger == end cannot be guaranteed here since,
2221 // someone else might have moved the finger even further
2222 guarantee( _finger >= end, "the finger should have moved forward" );
2224 if (verbose_low())
2225 gclog_or_tty->print_cr("[%d] we were successful with region = "
2226 PTR_FORMAT, task_num, curr_region);
2228 if (limit > bottom) {
2229 if (verbose_low())
2230 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2231 "returning it ", task_num, curr_region);
2232 return curr_region;
2233 } else {
2234 tmp_guarantee_CM( limit == bottom,
2235 "the region limit should be at bottom" );
2236 if (verbose_low())
2237 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2238 "returning NULL", task_num, curr_region);
2239 // we return NULL and the caller should try calling
2240 // claim_region() again.
2241 return NULL;
2242 }
2243 } else {
2244 guarantee( _finger > finger, "the finger should have moved forward" );
2245 if (verbose_low())
2246 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2247 "global finger = "PTR_FORMAT", "
2248 "our finger = "PTR_FORMAT,
2249 task_num, _finger, finger);
2251 // read it again
2252 finger = _finger;
2253 }
2254 }
2256 return NULL;
2257 }
2259 void ConcurrentMark::oops_do(OopClosure* cl) {
2260 if (_markStack.size() > 0 && verbose_low())
2261 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2262 "size = %d", _markStack.size());
2263 // we first iterate over the contents of the mark stack...
2264 _markStack.oops_do(cl);
2266 for (int i = 0; i < (int)_max_task_num; ++i) {
2267 OopTaskQueue* queue = _task_queues->queue((int)i);
2269 if (queue->size() > 0 && verbose_low())
2270 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2271 "size = %d", i, queue->size());
2273 // ...then over the contents of the all the task queues.
2274 queue->oops_do(cl);
2275 }
2277 // finally, invalidate any entries that in the region stack that
2278 // point into the collection set
2279 if (_regionStack.invalidate_entries_into_cset()) {
2280 // otherwise, any gray objects copied during the evacuation pause
2281 // might not be visited.
2282 guarantee( _should_gray_objects, "invariant" );
2283 }
2284 }
2286 void ConcurrentMark::clear_marking_state() {
2287 _markStack.setEmpty();
2288 _markStack.clear_overflow();
2289 _regionStack.setEmpty();
2290 _regionStack.clear_overflow();
2291 clear_has_overflown();
2292 _finger = _heap_start;
2294 for (int i = 0; i < (int)_max_task_num; ++i) {
2295 OopTaskQueue* queue = _task_queues->queue(i);
2296 queue->set_empty();
2297 }
2298 }
2300 void ConcurrentMark::print_stats() {
2301 if (verbose_stats()) {
2302 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2303 for (size_t i = 0; i < _active_tasks; ++i) {
2304 _tasks[i]->print_stats();
2305 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2306 }
2307 }
2308 }
2310 class CSMarkOopClosure: public OopClosure {
2311 friend class CSMarkBitMapClosure;
2313 G1CollectedHeap* _g1h;
2314 CMBitMap* _bm;
2315 ConcurrentMark* _cm;
2316 oop* _ms;
2317 jint* _array_ind_stack;
2318 int _ms_size;
2319 int _ms_ind;
2320 int _array_increment;
2322 bool push(oop obj, int arr_ind = 0) {
2323 if (_ms_ind == _ms_size) {
2324 gclog_or_tty->print_cr("Mark stack is full.");
2325 return false;
2326 }
2327 _ms[_ms_ind] = obj;
2328 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2329 _ms_ind++;
2330 return true;
2331 }
2333 oop pop() {
2334 if (_ms_ind == 0) return NULL;
2335 else {
2336 _ms_ind--;
2337 return _ms[_ms_ind];
2338 }
2339 }
2341 template <class T> bool drain() {
2342 while (_ms_ind > 0) {
2343 oop obj = pop();
2344 assert(obj != NULL, "Since index was non-zero.");
2345 if (obj->is_objArray()) {
2346 jint arr_ind = _array_ind_stack[_ms_ind];
2347 objArrayOop aobj = objArrayOop(obj);
2348 jint len = aobj->length();
2349 jint next_arr_ind = arr_ind + _array_increment;
2350 if (next_arr_ind < len) {
2351 push(obj, next_arr_ind);
2352 }
2353 // Now process this portion of this one.
2354 int lim = MIN2(next_arr_ind, len);
2355 for (int j = arr_ind; j < lim; j++) {
2356 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2357 }
2359 } else {
2360 obj->oop_iterate(this);
2361 }
2362 if (abort()) return false;
2363 }
2364 return true;
2365 }
2367 public:
2368 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2369 _g1h(G1CollectedHeap::heap()),
2370 _cm(cm),
2371 _bm(cm->nextMarkBitMap()),
2372 _ms_size(ms_size), _ms_ind(0),
2373 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2374 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2375 _array_increment(MAX2(ms_size/8, 16))
2376 {}
2378 ~CSMarkOopClosure() {
2379 FREE_C_HEAP_ARRAY(oop, _ms);
2380 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2381 }
2383 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2384 virtual void do_oop( oop* p) { do_oop_work(p); }
2386 template <class T> void do_oop_work(T* p) {
2387 T heap_oop = oopDesc::load_heap_oop(p);
2388 if (oopDesc::is_null(heap_oop)) return;
2389 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2390 if (obj->is_forwarded()) {
2391 // If the object has already been forwarded, we have to make sure
2392 // that it's marked. So follow the forwarding pointer. Note that
2393 // this does the right thing for self-forwarding pointers in the
2394 // evacuation failure case.
2395 obj = obj->forwardee();
2396 }
2397 HeapRegion* hr = _g1h->heap_region_containing(obj);
2398 if (hr != NULL) {
2399 if (hr->in_collection_set()) {
2400 if (_g1h->is_obj_ill(obj)) {
2401 _bm->mark((HeapWord*)obj);
2402 if (!push(obj)) {
2403 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2404 set_abort();
2405 }
2406 }
2407 } else {
2408 // Outside the collection set; we need to gray it
2409 _cm->deal_with_reference(obj);
2410 }
2411 }
2412 }
2413 };
2415 class CSMarkBitMapClosure: public BitMapClosure {
2416 G1CollectedHeap* _g1h;
2417 CMBitMap* _bitMap;
2418 ConcurrentMark* _cm;
2419 CSMarkOopClosure _oop_cl;
2420 public:
2421 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2422 _g1h(G1CollectedHeap::heap()),
2423 _bitMap(cm->nextMarkBitMap()),
2424 _oop_cl(cm, ms_size)
2425 {}
2427 ~CSMarkBitMapClosure() {}
2429 bool do_bit(size_t offset) {
2430 // convert offset into a HeapWord*
2431 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2432 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2433 "address out of range");
2434 assert(_bitMap->isMarked(addr), "tautology");
2435 oop obj = oop(addr);
2436 if (!obj->is_forwarded()) {
2437 if (!_oop_cl.push(obj)) return false;
2438 if (UseCompressedOops) {
2439 if (!_oop_cl.drain<narrowOop>()) return false;
2440 } else {
2441 if (!_oop_cl.drain<oop>()) return false;
2442 }
2443 }
2444 // Otherwise...
2445 return true;
2446 }
2447 };
2450 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2451 CMBitMap* _bm;
2452 CSMarkBitMapClosure _bit_cl;
2453 enum SomePrivateConstants {
2454 MSSize = 1000
2455 };
2456 bool _completed;
2457 public:
2458 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2459 _bm(cm->nextMarkBitMap()),
2460 _bit_cl(cm, MSSize),
2461 _completed(true)
2462 {}
2464 ~CompleteMarkingInCSHRClosure() {}
2466 bool doHeapRegion(HeapRegion* r) {
2467 if (!r->evacuation_failed()) {
2468 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2469 if (!mr.is_empty()) {
2470 if (!_bm->iterate(&_bit_cl, mr)) {
2471 _completed = false;
2472 return true;
2473 }
2474 }
2475 }
2476 return false;
2477 }
2479 bool completed() { return _completed; }
2480 };
2482 class ClearMarksInHRClosure: public HeapRegionClosure {
2483 CMBitMap* _bm;
2484 public:
2485 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2487 bool doHeapRegion(HeapRegion* r) {
2488 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2489 MemRegion usedMR = r->used_region();
2490 _bm->clearRange(r->used_region());
2491 }
2492 return false;
2493 }
2494 };
2496 void ConcurrentMark::complete_marking_in_collection_set() {
2497 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2499 if (!g1h->mark_in_progress()) {
2500 g1h->g1_policy()->record_mark_closure_time(0.0);
2501 return;
2502 }
2504 int i = 1;
2505 double start = os::elapsedTime();
2506 while (true) {
2507 i++;
2508 CompleteMarkingInCSHRClosure cmplt(this);
2509 g1h->collection_set_iterate(&cmplt);
2510 if (cmplt.completed()) break;
2511 }
2512 double end_time = os::elapsedTime();
2513 double elapsed_time_ms = (end_time - start) * 1000.0;
2514 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2515 if (PrintGCDetails) {
2516 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2517 }
2519 ClearMarksInHRClosure clr(nextMarkBitMap());
2520 g1h->collection_set_iterate(&clr);
2521 }
2523 // The next two methods deal with the following optimisation. Some
2524 // objects are gray by being marked and located above the finger. If
2525 // they are copied, during an evacuation pause, below the finger then
2526 // the need to be pushed on the stack. The observation is that, if
2527 // there are no regions in the collection set located above the
2528 // finger, then the above cannot happen, hence we do not need to
2529 // explicitly gray any objects when copying them to below the
2530 // finger. The global stack will be scanned to ensure that, if it
2531 // points to objects being copied, it will update their
2532 // location. There is a tricky situation with the gray objects in
2533 // region stack that are being coped, however. See the comment in
2534 // newCSet().
2536 void ConcurrentMark::newCSet() {
2537 if (!concurrent_marking_in_progress())
2538 // nothing to do if marking is not in progress
2539 return;
2541 // find what the lowest finger is among the global and local fingers
2542 _min_finger = _finger;
2543 for (int i = 0; i < (int)_max_task_num; ++i) {
2544 CMTask* task = _tasks[i];
2545 HeapWord* task_finger = task->finger();
2546 if (task_finger != NULL && task_finger < _min_finger)
2547 _min_finger = task_finger;
2548 }
2550 _should_gray_objects = false;
2552 // This fixes a very subtle and fustrating bug. It might be the case
2553 // that, during en evacuation pause, heap regions that contain
2554 // objects that are gray (by being in regions contained in the
2555 // region stack) are included in the collection set. Since such gray
2556 // objects will be moved, and because it's not easy to redirect
2557 // region stack entries to point to a new location (because objects
2558 // in one region might be scattered to multiple regions after they
2559 // are copied), one option is to ensure that all marked objects
2560 // copied during a pause are pushed on the stack. Notice, however,
2561 // that this problem can only happen when the region stack is not
2562 // empty during an evacuation pause. So, we make the fix a bit less
2563 // conservative and ensure that regions are pushed on the stack,
2564 // irrespective whether all collection set regions are below the
2565 // finger, if the region stack is not empty. This is expected to be
2566 // a rare case, so I don't think it's necessary to be smarted about it.
2567 if (!region_stack_empty())
2568 _should_gray_objects = true;
2569 }
2571 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2572 if (!concurrent_marking_in_progress())
2573 return;
2575 HeapWord* region_end = hr->end();
2576 if (region_end > _min_finger)
2577 _should_gray_objects = true;
2578 }
2580 // abandon current marking iteration due to a Full GC
2581 void ConcurrentMark::abort() {
2582 // Clear all marks to force marking thread to do nothing
2583 _nextMarkBitMap->clearAll();
2584 // Empty mark stack
2585 clear_marking_state();
2586 for (int i = 0; i < (int)_max_task_num; ++i)
2587 _tasks[i]->clear_region_fields();
2588 _has_aborted = true;
2590 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2591 satb_mq_set.abandon_partial_marking();
2592 satb_mq_set.set_active_all_threads(false);
2593 }
2595 static void print_ms_time_info(const char* prefix, const char* name,
2596 NumberSeq& ns) {
2597 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2598 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2599 if (ns.num() > 0) {
2600 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2601 prefix, ns.sd(), ns.maximum());
2602 }
2603 }
2605 void ConcurrentMark::print_summary_info() {
2606 gclog_or_tty->print_cr(" Concurrent marking:");
2607 print_ms_time_info(" ", "init marks", _init_times);
2608 print_ms_time_info(" ", "remarks", _remark_times);
2609 {
2610 print_ms_time_info(" ", "final marks", _remark_mark_times);
2611 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2613 }
2614 print_ms_time_info(" ", "cleanups", _cleanup_times);
2615 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2616 _total_counting_time,
2617 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2618 (double)_cleanup_times.num()
2619 : 0.0));
2620 if (G1ScrubRemSets) {
2621 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2622 _total_rs_scrub_time,
2623 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2624 (double)_cleanup_times.num()
2625 : 0.0));
2626 }
2627 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2628 (_init_times.sum() + _remark_times.sum() +
2629 _cleanup_times.sum())/1000.0);
2630 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2631 "(%8.2f s marking, %8.2f s counting).",
2632 cmThread()->vtime_accum(),
2633 cmThread()->vtime_mark_accum(),
2634 cmThread()->vtime_count_accum());
2635 }
2637 // Closures
2638 // XXX: there seems to be a lot of code duplication here;
2639 // should refactor and consolidate the shared code.
2641 // This closure is used to mark refs into the CMS generation in
2642 // the CMS bit map. Called at the first checkpoint.
2644 // We take a break if someone is trying to stop the world.
2645 bool ConcurrentMark::do_yield_check(int worker_i) {
2646 if (should_yield()) {
2647 if (worker_i == 0)
2648 _g1h->g1_policy()->record_concurrent_pause();
2649 cmThread()->yield();
2650 if (worker_i == 0)
2651 _g1h->g1_policy()->record_concurrent_pause_end();
2652 return true;
2653 } else {
2654 return false;
2655 }
2656 }
2658 bool ConcurrentMark::should_yield() {
2659 return cmThread()->should_yield();
2660 }
2662 bool ConcurrentMark::containing_card_is_marked(void* p) {
2663 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2664 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2665 }
2667 bool ConcurrentMark::containing_cards_are_marked(void* start,
2668 void* last) {
2669 return
2670 containing_card_is_marked(start) &&
2671 containing_card_is_marked(last);
2672 }
2674 #ifndef PRODUCT
2675 // for debugging purposes
2676 void ConcurrentMark::print_finger() {
2677 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2678 _heap_start, _heap_end, _finger);
2679 for (int i = 0; i < (int) _max_task_num; ++i) {
2680 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2681 }
2682 gclog_or_tty->print_cr("");
2683 }
2684 #endif
2686 // Closure for iteration over bitmaps
2687 class CMBitMapClosure : public BitMapClosure {
2688 private:
2689 // the bitmap that is being iterated over
2690 CMBitMap* _nextMarkBitMap;
2691 ConcurrentMark* _cm;
2692 CMTask* _task;
2693 // true if we're scanning a heap region claimed by the task (so that
2694 // we move the finger along), false if we're not, i.e. currently when
2695 // scanning a heap region popped from the region stack (so that we
2696 // do not move the task finger along; it'd be a mistake if we did so).
2697 bool _scanning_heap_region;
2699 public:
2700 CMBitMapClosure(CMTask *task,
2701 ConcurrentMark* cm,
2702 CMBitMap* nextMarkBitMap)
2703 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2705 void set_scanning_heap_region(bool scanning_heap_region) {
2706 _scanning_heap_region = scanning_heap_region;
2707 }
2709 bool do_bit(size_t offset) {
2710 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2711 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2712 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2714 if (_scanning_heap_region) {
2715 statsOnly( _task->increase_objs_found_on_bitmap() );
2716 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2717 // We move that task's local finger along.
2718 _task->move_finger_to(addr);
2719 } else {
2720 // We move the task's region finger along.
2721 _task->move_region_finger_to(addr);
2722 }
2724 _task->scan_object(oop(addr));
2725 // we only partially drain the local queue and global stack
2726 _task->drain_local_queue(true);
2727 _task->drain_global_stack(true);
2729 // if the has_aborted flag has been raised, we need to bail out of
2730 // the iteration
2731 return !_task->has_aborted();
2732 }
2733 };
2735 // Closure for iterating over objects, currently only used for
2736 // processing SATB buffers.
2737 class CMObjectClosure : public ObjectClosure {
2738 private:
2739 CMTask* _task;
2741 public:
2742 void do_object(oop obj) {
2743 _task->deal_with_reference(obj);
2744 }
2746 CMObjectClosure(CMTask* task) : _task(task) { }
2747 };
2749 // Closure for iterating over object fields
2750 class CMOopClosure : public OopClosure {
2751 private:
2752 G1CollectedHeap* _g1h;
2753 ConcurrentMark* _cm;
2754 CMTask* _task;
2756 public:
2757 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2758 virtual void do_oop( oop* p) { do_oop_work(p); }
2760 template <class T> void do_oop_work(T* p) {
2761 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2762 tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
2764 oop obj = oopDesc::load_decode_heap_oop(p);
2765 if (_cm->verbose_high())
2766 gclog_or_tty->print_cr("[%d] we're looking at location "
2767 "*"PTR_FORMAT" = "PTR_FORMAT,
2768 _task->task_id(), p, (void*) obj);
2769 _task->deal_with_reference(obj);
2770 }
2772 CMOopClosure(G1CollectedHeap* g1h,
2773 ConcurrentMark* cm,
2774 CMTask* task)
2775 : _g1h(g1h), _cm(cm), _task(task) { }
2776 };
2778 void CMTask::setup_for_region(HeapRegion* hr) {
2779 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2780 "claim_region() should have filtered out continues humongous regions" );
2782 if (_cm->verbose_low())
2783 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2784 _task_id, hr);
2786 _curr_region = hr;
2787 _finger = hr->bottom();
2788 update_region_limit();
2789 }
2791 void CMTask::update_region_limit() {
2792 HeapRegion* hr = _curr_region;
2793 HeapWord* bottom = hr->bottom();
2794 HeapWord* limit = hr->next_top_at_mark_start();
2796 if (limit == bottom) {
2797 if (_cm->verbose_low())
2798 gclog_or_tty->print_cr("[%d] found an empty region "
2799 "["PTR_FORMAT", "PTR_FORMAT")",
2800 _task_id, bottom, limit);
2801 // The region was collected underneath our feet.
2802 // We set the finger to bottom to ensure that the bitmap
2803 // iteration that will follow this will not do anything.
2804 // (this is not a condition that holds when we set the region up,
2805 // as the region is not supposed to be empty in the first place)
2806 _finger = bottom;
2807 } else if (limit >= _region_limit) {
2808 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2809 } else {
2810 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2811 // This can happen under some pretty unusual circumstances. An
2812 // evacuation pause empties the region underneath our feet (NTAMS
2813 // at bottom). We then do some allocation in the region (NTAMS
2814 // stays at bottom), followed by the region being used as a GC
2815 // alloc region (NTAMS will move to top() and the objects
2816 // originally below it will be grayed). All objects now marked in
2817 // the region are explicitly grayed, if below the global finger,
2818 // and we do not need in fact to scan anything else. So, we simply
2819 // set _finger to be limit to ensure that the bitmap iteration
2820 // doesn't do anything.
2821 _finger = limit;
2822 }
2824 _region_limit = limit;
2825 }
2827 void CMTask::giveup_current_region() {
2828 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2829 if (_cm->verbose_low())
2830 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2831 _task_id, _curr_region);
2832 clear_region_fields();
2833 }
2835 void CMTask::clear_region_fields() {
2836 // Values for these three fields that indicate that we're not
2837 // holding on to a region.
2838 _curr_region = NULL;
2839 _finger = NULL;
2840 _region_limit = NULL;
2842 _region_finger = NULL;
2843 }
2845 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2846 guarantee( nextMarkBitMap != NULL, "invariant" );
2848 if (_cm->verbose_low())
2849 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2851 _nextMarkBitMap = nextMarkBitMap;
2852 clear_region_fields();
2854 _calls = 0;
2855 _elapsed_time_ms = 0.0;
2856 _termination_time_ms = 0.0;
2857 _termination_start_time_ms = 0.0;
2859 #if _MARKING_STATS_
2860 _local_pushes = 0;
2861 _local_pops = 0;
2862 _local_max_size = 0;
2863 _objs_scanned = 0;
2864 _global_pushes = 0;
2865 _global_pops = 0;
2866 _global_max_size = 0;
2867 _global_transfers_to = 0;
2868 _global_transfers_from = 0;
2869 _region_stack_pops = 0;
2870 _regions_claimed = 0;
2871 _objs_found_on_bitmap = 0;
2872 _satb_buffers_processed = 0;
2873 _steal_attempts = 0;
2874 _steals = 0;
2875 _aborted = 0;
2876 _aborted_overflow = 0;
2877 _aborted_cm_aborted = 0;
2878 _aborted_yield = 0;
2879 _aborted_timed_out = 0;
2880 _aborted_satb = 0;
2881 _aborted_termination = 0;
2882 #endif // _MARKING_STATS_
2883 }
2885 bool CMTask::should_exit_termination() {
2886 regular_clock_call();
2887 // This is called when we are in the termination protocol. We should
2888 // quit if, for some reason, this task wants to abort or the global
2889 // stack is not empty (this means that we can get work from it).
2890 return !_cm->mark_stack_empty() || has_aborted();
2891 }
2893 // This determines whether the method below will check both the local
2894 // and global fingers when determining whether to push on the stack a
2895 // gray object (value 1) or whether it will only check the global one
2896 // (value 0). The tradeoffs are that the former will be a bit more
2897 // accurate and possibly push less on the stack, but it might also be
2898 // a little bit slower.
2900 #define _CHECK_BOTH_FINGERS_ 1
2902 void CMTask::deal_with_reference(oop obj) {
2903 if (_cm->verbose_high())
2904 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2905 _task_id, (void*) obj);
2907 ++_refs_reached;
2909 HeapWord* objAddr = (HeapWord*) obj;
2910 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2911 if (_g1h->is_in_g1_reserved(objAddr)) {
2912 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2913 HeapRegion* hr = _g1h->heap_region_containing(obj);
2914 if (_g1h->is_obj_ill(obj, hr)) {
2915 if (_cm->verbose_high())
2916 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2917 _task_id, (void*) obj);
2919 // we need to mark it first
2920 if (_nextMarkBitMap->parMark(objAddr)) {
2921 // No OrderAccess:store_load() is needed. It is implicit in the
2922 // CAS done in parMark(objAddr) above
2923 HeapWord* global_finger = _cm->finger();
2925 #if _CHECK_BOTH_FINGERS_
2926 // we will check both the local and global fingers
2928 if (_finger != NULL && objAddr < _finger) {
2929 if (_cm->verbose_high())
2930 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2931 "pushing it", _task_id, _finger);
2932 push(obj);
2933 } else if (_curr_region != NULL && objAddr < _region_limit) {
2934 // do nothing
2935 } else if (objAddr < global_finger) {
2936 // Notice that the global finger might be moving forward
2937 // concurrently. This is not a problem. In the worst case, we
2938 // mark the object while it is above the global finger and, by
2939 // the time we read the global finger, it has moved forward
2940 // passed this object. In this case, the object will probably
2941 // be visited when a task is scanning the region and will also
2942 // be pushed on the stack. So, some duplicate work, but no
2943 // correctness problems.
2945 if (_cm->verbose_high())
2946 gclog_or_tty->print_cr("[%d] below the global finger "
2947 "("PTR_FORMAT"), pushing it",
2948 _task_id, global_finger);
2949 push(obj);
2950 } else {
2951 // do nothing
2952 }
2953 #else // _CHECK_BOTH_FINGERS_
2954 // we will only check the global finger
2956 if (objAddr < global_finger) {
2957 // see long comment above
2959 if (_cm->verbose_high())
2960 gclog_or_tty->print_cr("[%d] below the global finger "
2961 "("PTR_FORMAT"), pushing it",
2962 _task_id, global_finger);
2963 push(obj);
2964 }
2965 #endif // _CHECK_BOTH_FINGERS_
2966 }
2967 }
2968 }
2969 }
2971 void CMTask::push(oop obj) {
2972 HeapWord* objAddr = (HeapWord*) obj;
2973 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2974 tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
2975 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2976 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2978 if (_cm->verbose_high())
2979 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2981 if (!_task_queue->push(obj)) {
2982 // The local task queue looks full. We need to push some entries
2983 // to the global stack.
2985 if (_cm->verbose_medium())
2986 gclog_or_tty->print_cr("[%d] task queue overflow, "
2987 "moving entries to the global stack",
2988 _task_id);
2989 move_entries_to_global_stack();
2991 // this should succeed since, even if we overflow the global
2992 // stack, we should have definitely removed some entries from the
2993 // local queue. So, there must be space on it.
2994 bool success = _task_queue->push(obj);
2995 tmp_guarantee_CM( success, "invariant" );
2996 }
2998 statsOnly( int tmp_size = _task_queue->size();
2999 if (tmp_size > _local_max_size)
3000 _local_max_size = tmp_size;
3001 ++_local_pushes );
3002 }
3004 void CMTask::reached_limit() {
3005 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3006 _refs_reached >= _refs_reached_limit ,
3007 "shouldn't have been called otherwise" );
3008 regular_clock_call();
3009 }
3011 void CMTask::regular_clock_call() {
3012 if (has_aborted())
3013 return;
3015 // First, we need to recalculate the words scanned and refs reached
3016 // limits for the next clock call.
3017 recalculate_limits();
3019 // During the regular clock call we do the following
3021 // (1) If an overflow has been flagged, then we abort.
3022 if (_cm->has_overflown()) {
3023 set_has_aborted();
3024 return;
3025 }
3027 // If we are not concurrent (i.e. we're doing remark) we don't need
3028 // to check anything else. The other steps are only needed during
3029 // the concurrent marking phase.
3030 if (!concurrent())
3031 return;
3033 // (2) If marking has been aborted for Full GC, then we also abort.
3034 if (_cm->has_aborted()) {
3035 set_has_aborted();
3036 statsOnly( ++_aborted_cm_aborted );
3037 return;
3038 }
3040 double curr_time_ms = os::elapsedVTime() * 1000.0;
3042 // (3) If marking stats are enabled, then we update the step history.
3043 #if _MARKING_STATS_
3044 if (_words_scanned >= _words_scanned_limit)
3045 ++_clock_due_to_scanning;
3046 if (_refs_reached >= _refs_reached_limit)
3047 ++_clock_due_to_marking;
3049 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3050 _interval_start_time_ms = curr_time_ms;
3051 _all_clock_intervals_ms.add(last_interval_ms);
3053 if (_cm->verbose_medium()) {
3054 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3055 "scanned = %d%s, refs reached = %d%s",
3056 _task_id, last_interval_ms,
3057 _words_scanned,
3058 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3059 _refs_reached,
3060 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3061 }
3062 #endif // _MARKING_STATS_
3064 // (4) We check whether we should yield. If we have to, then we abort.
3065 if (_cm->should_yield()) {
3066 // We should yield. To do this we abort the task. The caller is
3067 // responsible for yielding.
3068 set_has_aborted();
3069 statsOnly( ++_aborted_yield );
3070 return;
3071 }
3073 // (5) We check whether we've reached our time quota. If we have,
3074 // then we abort.
3075 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3076 if (elapsed_time_ms > _time_target_ms) {
3077 set_has_aborted();
3078 _has_aborted_timed_out = true;
3079 statsOnly( ++_aborted_timed_out );
3080 return;
3081 }
3083 // (6) Finally, we check whether there are enough completed STAB
3084 // buffers available for processing. If there are, we abort.
3085 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3086 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3087 if (_cm->verbose_low())
3088 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3089 _task_id);
3090 // we do need to process SATB buffers, we'll abort and restart
3091 // the marking task to do so
3092 set_has_aborted();
3093 statsOnly( ++_aborted_satb );
3094 return;
3095 }
3096 }
3098 void CMTask::recalculate_limits() {
3099 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3100 _words_scanned_limit = _real_words_scanned_limit;
3102 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3103 _refs_reached_limit = _real_refs_reached_limit;
3104 }
3106 void CMTask::decrease_limits() {
3107 // This is called when we believe that we're going to do an infrequent
3108 // operation which will increase the per byte scanned cost (i.e. move
3109 // entries to/from the global stack). It basically tries to decrease the
3110 // scanning limit so that the clock is called earlier.
3112 if (_cm->verbose_medium())
3113 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3115 _words_scanned_limit = _real_words_scanned_limit -
3116 3 * words_scanned_period / 4;
3117 _refs_reached_limit = _real_refs_reached_limit -
3118 3 * refs_reached_period / 4;
3119 }
3121 void CMTask::move_entries_to_global_stack() {
3122 // local array where we'll store the entries that will be popped
3123 // from the local queue
3124 oop buffer[global_stack_transfer_size];
3126 int n = 0;
3127 oop obj;
3128 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3129 buffer[n] = obj;
3130 ++n;
3131 }
3133 if (n > 0) {
3134 // we popped at least one entry from the local queue
3136 statsOnly( ++_global_transfers_to; _local_pops += n );
3138 if (!_cm->mark_stack_push(buffer, n)) {
3139 if (_cm->verbose_low())
3140 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3141 set_has_aborted();
3142 } else {
3143 // the transfer was successful
3145 if (_cm->verbose_medium())
3146 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3147 _task_id, n);
3148 statsOnly( int tmp_size = _cm->mark_stack_size();
3149 if (tmp_size > _global_max_size)
3150 _global_max_size = tmp_size;
3151 _global_pushes += n );
3152 }
3153 }
3155 // this operation was quite expensive, so decrease the limits
3156 decrease_limits();
3157 }
3159 void CMTask::get_entries_from_global_stack() {
3160 // local array where we'll store the entries that will be popped
3161 // from the global stack.
3162 oop buffer[global_stack_transfer_size];
3163 int n;
3164 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3165 tmp_guarantee_CM( n <= global_stack_transfer_size,
3166 "we should not pop more than the given limit" );
3167 if (n > 0) {
3168 // yes, we did actually pop at least one entry
3170 statsOnly( ++_global_transfers_from; _global_pops += n );
3171 if (_cm->verbose_medium())
3172 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3173 _task_id, n);
3174 for (int i = 0; i < n; ++i) {
3175 bool success = _task_queue->push(buffer[i]);
3176 // We only call this when the local queue is empty or under a
3177 // given target limit. So, we do not expect this push to fail.
3178 tmp_guarantee_CM( success, "invariant" );
3179 }
3181 statsOnly( int tmp_size = _task_queue->size();
3182 if (tmp_size > _local_max_size)
3183 _local_max_size = tmp_size;
3184 _local_pushes += n );
3185 }
3187 // this operation was quite expensive, so decrease the limits
3188 decrease_limits();
3189 }
3191 void CMTask::drain_local_queue(bool partially) {
3192 if (has_aborted())
3193 return;
3195 // Decide what the target size is, depending whether we're going to
3196 // drain it partially (so that other tasks can steal if they run out
3197 // of things to do) or totally (at the very end).
3198 size_t target_size;
3199 if (partially)
3200 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3201 else
3202 target_size = 0;
3204 if (_task_queue->size() > target_size) {
3205 if (_cm->verbose_high())
3206 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3207 _task_id, target_size);
3209 oop obj;
3210 bool ret = _task_queue->pop_local(obj);
3211 while (ret) {
3212 statsOnly( ++_local_pops );
3214 if (_cm->verbose_high())
3215 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3216 (void*) obj);
3218 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3219 "invariant" );
3220 tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
3221 "invariant" );
3223 scan_object(obj);
3225 if (_task_queue->size() <= target_size || has_aborted())
3226 ret = false;
3227 else
3228 ret = _task_queue->pop_local(obj);
3229 }
3231 if (_cm->verbose_high())
3232 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3233 _task_id, _task_queue->size());
3234 }
3235 }
3237 void CMTask::drain_global_stack(bool partially) {
3238 if (has_aborted())
3239 return;
3241 // We have a policy to drain the local queue before we attempt to
3242 // drain the global stack.
3243 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3245 // Decide what the target size is, depending whether we're going to
3246 // drain it partially (so that other tasks can steal if they run out
3247 // of things to do) or totally (at the very end). Notice that,
3248 // because we move entries from the global stack in chunks or
3249 // because another task might be doing the same, we might in fact
3250 // drop below the target. But, this is not a problem.
3251 size_t target_size;
3252 if (partially)
3253 target_size = _cm->partial_mark_stack_size_target();
3254 else
3255 target_size = 0;
3257 if (_cm->mark_stack_size() > target_size) {
3258 if (_cm->verbose_low())
3259 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3260 _task_id, target_size);
3262 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3263 get_entries_from_global_stack();
3264 drain_local_queue(partially);
3265 }
3267 if (_cm->verbose_low())
3268 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3269 _task_id, _cm->mark_stack_size());
3270 }
3271 }
3273 // SATB Queue has several assumptions on whether to call the par or
3274 // non-par versions of the methods. this is why some of the code is
3275 // replicated. We should really get rid of the single-threaded version
3276 // of the code to simplify things.
3277 void CMTask::drain_satb_buffers() {
3278 if (has_aborted())
3279 return;
3281 // We set this so that the regular clock knows that we're in the
3282 // middle of draining buffers and doesn't set the abort flag when it
3283 // notices that SATB buffers are available for draining. It'd be
3284 // very counter productive if it did that. :-)
3285 _draining_satb_buffers = true;
3287 CMObjectClosure oc(this);
3288 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3289 if (ParallelGCThreads > 0)
3290 satb_mq_set.set_par_closure(_task_id, &oc);
3291 else
3292 satb_mq_set.set_closure(&oc);
3294 // This keeps claiming and applying the closure to completed buffers
3295 // until we run out of buffers or we need to abort.
3296 if (ParallelGCThreads > 0) {
3297 while (!has_aborted() &&
3298 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3299 if (_cm->verbose_medium())
3300 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3301 statsOnly( ++_satb_buffers_processed );
3302 regular_clock_call();
3303 }
3304 } else {
3305 while (!has_aborted() &&
3306 satb_mq_set.apply_closure_to_completed_buffer()) {
3307 if (_cm->verbose_medium())
3308 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3309 statsOnly( ++_satb_buffers_processed );
3310 regular_clock_call();
3311 }
3312 }
3314 if (!concurrent() && !has_aborted()) {
3315 // We should only do this during remark.
3316 if (ParallelGCThreads > 0)
3317 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3318 else
3319 satb_mq_set.iterate_closure_all_threads();
3320 }
3322 _draining_satb_buffers = false;
3324 tmp_guarantee_CM( has_aborted() ||
3325 concurrent() ||
3326 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3328 if (ParallelGCThreads > 0)
3329 satb_mq_set.set_par_closure(_task_id, NULL);
3330 else
3331 satb_mq_set.set_closure(NULL);
3333 // again, this was a potentially expensive operation, decrease the
3334 // limits to get the regular clock call early
3335 decrease_limits();
3336 }
3338 void CMTask::drain_region_stack(BitMapClosure* bc) {
3339 if (has_aborted())
3340 return;
3342 tmp_guarantee_CM( _region_finger == NULL,
3343 "it should be NULL when we're not scanning a region" );
3345 if (!_cm->region_stack_empty()) {
3346 if (_cm->verbose_low())
3347 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3348 _task_id, _cm->region_stack_size());
3350 MemRegion mr = _cm->region_stack_pop();
3351 // it returns MemRegion() if the pop fails
3352 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3354 while (mr.start() != NULL) {
3355 if (_cm->verbose_medium())
3356 gclog_or_tty->print_cr("[%d] we are scanning region "
3357 "["PTR_FORMAT", "PTR_FORMAT")",
3358 _task_id, mr.start(), mr.end());
3359 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3360 "otherwise the region shouldn't be on the stack" );
3361 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3362 if (_nextMarkBitMap->iterate(bc, mr)) {
3363 tmp_guarantee_CM( !has_aborted(),
3364 "cannot abort the task without aborting the bitmap iteration" );
3366 // We finished iterating over the region without aborting.
3367 regular_clock_call();
3368 if (has_aborted())
3369 mr = MemRegion();
3370 else {
3371 mr = _cm->region_stack_pop();
3372 // it returns MemRegion() if the pop fails
3373 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3374 }
3375 } else {
3376 guarantee( has_aborted(), "currently the only way to do so" );
3378 // The only way to abort the bitmap iteration is to return
3379 // false from the do_bit() method. However, inside the
3380 // do_bit() method we move the _region_finger to point to the
3381 // object currently being looked at. So, if we bail out, we
3382 // have definitely set _region_finger to something non-null.
3383 guarantee( _region_finger != NULL, "invariant" );
3385 // The iteration was actually aborted. So now _region_finger
3386 // points to the address of the object we last scanned. If we
3387 // leave it there, when we restart this task, we will rescan
3388 // the object. It is easy to avoid this. We move the finger by
3389 // enough to point to the next possible object header (the
3390 // bitmap knows by how much we need to move it as it knows its
3391 // granularity).
3392 MemRegion newRegion =
3393 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3395 if (!newRegion.is_empty()) {
3396 if (_cm->verbose_low()) {
3397 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3398 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3399 _task_id,
3400 newRegion.start(), newRegion.end());
3401 }
3402 // Now push the part of the region we didn't scan on the
3403 // region stack to make sure a task scans it later.
3404 _cm->region_stack_push(newRegion);
3405 }
3406 // break from while
3407 mr = MemRegion();
3408 }
3409 _region_finger = NULL;
3410 }
3412 // We only push regions on the region stack during evacuation
3413 // pauses. So if we come out the above iteration because we region
3414 // stack is empty, it will remain empty until the next yield
3415 // point. So, the guarantee below is safe.
3416 guarantee( has_aborted() || _cm->region_stack_empty(),
3417 "only way to exit the loop" );
3419 if (_cm->verbose_low())
3420 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3421 _task_id, _cm->region_stack_size());
3422 }
3423 }
3425 void CMTask::print_stats() {
3426 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3427 _task_id, _calls);
3428 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3429 _elapsed_time_ms, _termination_time_ms);
3430 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3431 _step_times_ms.num(), _step_times_ms.avg(),
3432 _step_times_ms.sd());
3433 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3434 _step_times_ms.maximum(), _step_times_ms.sum());
3436 #if _MARKING_STATS_
3437 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3438 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3439 _all_clock_intervals_ms.sd());
3440 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3441 _all_clock_intervals_ms.maximum(),
3442 _all_clock_intervals_ms.sum());
3443 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3444 _clock_due_to_scanning, _clock_due_to_marking);
3445 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3446 _objs_scanned, _objs_found_on_bitmap);
3447 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3448 _local_pushes, _local_pops, _local_max_size);
3449 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3450 _global_pushes, _global_pops, _global_max_size);
3451 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3452 _global_transfers_to,_global_transfers_from);
3453 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3454 _regions_claimed, _region_stack_pops);
3455 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3456 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3457 _steal_attempts, _steals);
3458 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3459 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3460 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3461 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3462 _aborted_timed_out, _aborted_satb, _aborted_termination);
3463 #endif // _MARKING_STATS_
3464 }
3466 /*****************************************************************************
3468 The do_marking_step(time_target_ms) method is the building block
3469 of the parallel marking framework. It can be called in parallel
3470 with other invocations of do_marking_step() on different tasks
3471 (but only one per task, obviously) and concurrently with the
3472 mutator threads, or during remark, hence it eliminates the need
3473 for two versions of the code. When called during remark, it will
3474 pick up from where the task left off during the concurrent marking
3475 phase. Interestingly, tasks are also claimable during evacuation
3476 pauses too, since do_marking_step() ensures that it aborts before
3477 it needs to yield.
3479 The data structures that is uses to do marking work are the
3480 following:
3482 (1) Marking Bitmap. If there are gray objects that appear only
3483 on the bitmap (this happens either when dealing with an overflow
3484 or when the initial marking phase has simply marked the roots
3485 and didn't push them on the stack), then tasks claim heap
3486 regions whose bitmap they then scan to find gray objects. A
3487 global finger indicates where the end of the last claimed region
3488 is. A local finger indicates how far into the region a task has
3489 scanned. The two fingers are used to determine how to gray an
3490 object (i.e. whether simply marking it is OK, as it will be
3491 visited by a task in the future, or whether it needs to be also
3492 pushed on a stack).
3494 (2) Local Queue. The local queue of the task which is accessed
3495 reasonably efficiently by the task. Other tasks can steal from
3496 it when they run out of work. Throughout the marking phase, a
3497 task attempts to keep its local queue short but not totally
3498 empty, so that entries are available for stealing by other
3499 tasks. Only when there is no more work, a task will totally
3500 drain its local queue.
3502 (3) Global Mark Stack. This handles local queue overflow. During
3503 marking only sets of entries are moved between it and the local
3504 queues, as access to it requires a mutex and more fine-grain
3505 interaction with it which might cause contention. If it
3506 overflows, then the marking phase should restart and iterate
3507 over the bitmap to identify gray objects. Throughout the marking
3508 phase, tasks attempt to keep the global mark stack at a small
3509 length but not totally empty, so that entries are available for
3510 popping by other tasks. Only when there is no more work, tasks
3511 will totally drain the global mark stack.
3513 (4) Global Region Stack. Entries on it correspond to areas of
3514 the bitmap that need to be scanned since they contain gray
3515 objects. Pushes on the region stack only happen during
3516 evacuation pauses and typically correspond to areas covered by
3517 GC LABS. If it overflows, then the marking phase should restart
3518 and iterate over the bitmap to identify gray objects. Tasks will
3519 try to totally drain the region stack as soon as possible.
3521 (5) SATB Buffer Queue. This is where completed SATB buffers are
3522 made available. Buffers are regularly removed from this queue
3523 and scanned for roots, so that the queue doesn't get too
3524 long. During remark, all completed buffers are processed, as
3525 well as the filled in parts of any uncompleted buffers.
3527 The do_marking_step() method tries to abort when the time target
3528 has been reached. There are a few other cases when the
3529 do_marking_step() method also aborts:
3531 (1) When the marking phase has been aborted (after a Full GC).
3533 (2) When a global overflow (either on the global stack or the
3534 region stack) has been triggered. Before the task aborts, it
3535 will actually sync up with the other tasks to ensure that all
3536 the marking data structures (local queues, stacks, fingers etc.)
3537 are re-initialised so that when do_marking_step() completes,
3538 the marking phase can immediately restart.
3540 (3) When enough completed SATB buffers are available. The
3541 do_marking_step() method only tries to drain SATB buffers right
3542 at the beginning. So, if enough buffers are available, the
3543 marking step aborts and the SATB buffers are processed at
3544 the beginning of the next invocation.
3546 (4) To yield. when we have to yield then we abort and yield
3547 right at the end of do_marking_step(). This saves us from a lot
3548 of hassle as, by yielding we might allow a Full GC. If this
3549 happens then objects will be compacted underneath our feet, the
3550 heap might shrink, etc. We save checking for this by just
3551 aborting and doing the yield right at the end.
3553 From the above it follows that the do_marking_step() method should
3554 be called in a loop (or, otherwise, regularly) until it completes.
3556 If a marking step completes without its has_aborted() flag being
3557 true, it means it has completed the current marking phase (and
3558 also all other marking tasks have done so and have all synced up).
3560 A method called regular_clock_call() is invoked "regularly" (in
3561 sub ms intervals) throughout marking. It is this clock method that
3562 checks all the abort conditions which were mentioned above and
3563 decides when the task should abort. A work-based scheme is used to
3564 trigger this clock method: when the number of object words the
3565 marking phase has scanned or the number of references the marking
3566 phase has visited reach a given limit. Additional invocations to
3567 the method clock have been planted in a few other strategic places
3568 too. The initial reason for the clock method was to avoid calling
3569 vtime too regularly, as it is quite expensive. So, once it was in
3570 place, it was natural to piggy-back all the other conditions on it
3571 too and not constantly check them throughout the code.
3573 *****************************************************************************/
3575 void CMTask::do_marking_step(double time_target_ms) {
3576 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3577 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3579 guarantee( concurrent() || _cm->region_stack_empty(),
3580 "the region stack should have been cleared before remark" );
3581 guarantee( _region_finger == NULL,
3582 "this should be non-null only when a region is being scanned" );
3584 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3585 guarantee( _task_queues != NULL, "invariant" );
3586 guarantee( _task_queue != NULL, "invariant" );
3587 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3589 guarantee( !_claimed,
3590 "only one thread should claim this task at any one time" );
3592 // OK, this doesn't safeguard again all possible scenarios, as it is
3593 // possible for two threads to set the _claimed flag at the same
3594 // time. But it is only for debugging purposes anyway and it will
3595 // catch most problems.
3596 _claimed = true;
3598 _start_time_ms = os::elapsedVTime() * 1000.0;
3599 statsOnly( _interval_start_time_ms = _start_time_ms );
3601 double diff_prediction_ms =
3602 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3603 _time_target_ms = time_target_ms - diff_prediction_ms;
3605 // set up the variables that are used in the work-based scheme to
3606 // call the regular clock method
3607 _words_scanned = 0;
3608 _refs_reached = 0;
3609 recalculate_limits();
3611 // clear all flags
3612 clear_has_aborted();
3613 _has_aborted_timed_out = false;
3614 _draining_satb_buffers = false;
3616 ++_calls;
3618 if (_cm->verbose_low())
3619 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3620 "target = %1.2lfms >>>>>>>>>>",
3621 _task_id, _calls, _time_target_ms);
3623 // Set up the bitmap and oop closures. Anything that uses them is
3624 // eventually called from this method, so it is OK to allocate these
3625 // statically.
3626 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3627 CMOopClosure oop_closure(_g1h, _cm, this);
3628 set_oop_closure(&oop_closure);
3630 if (_cm->has_overflown()) {
3631 // This can happen if the region stack or the mark stack overflows
3632 // during a GC pause and this task, after a yield point,
3633 // restarts. We have to abort as we need to get into the overflow
3634 // protocol which happens right at the end of this task.
3635 set_has_aborted();
3636 }
3638 // First drain any available SATB buffers. After this, we will not
3639 // look at SATB buffers before the next invocation of this method.
3640 // If enough completed SATB buffers are queued up, the regular clock
3641 // will abort this task so that it restarts.
3642 drain_satb_buffers();
3643 // ...then partially drain the local queue and the global stack
3644 drain_local_queue(true);
3645 drain_global_stack(true);
3647 // Then totally drain the region stack. We will not look at
3648 // it again before the next invocation of this method. Entries on
3649 // the region stack are only added during evacuation pauses, for
3650 // which we have to yield. When we do, we abort the task anyway so
3651 // it will look at the region stack again when it restarts.
3652 bitmap_closure.set_scanning_heap_region(false);
3653 drain_region_stack(&bitmap_closure);
3654 // ...then partially drain the local queue and the global stack
3655 drain_local_queue(true);
3656 drain_global_stack(true);
3658 do {
3659 if (!has_aborted() && _curr_region != NULL) {
3660 // This means that we're already holding on to a region.
3661 tmp_guarantee_CM( _finger != NULL,
3662 "if region is not NULL, then the finger "
3663 "should not be NULL either" );
3665 // We might have restarted this task after an evacuation pause
3666 // which might have evacuated the region we're holding on to
3667 // underneath our feet. Let's read its limit again to make sure
3668 // that we do not iterate over a region of the heap that
3669 // contains garbage (update_region_limit() will also move
3670 // _finger to the start of the region if it is found empty).
3671 update_region_limit();
3672 // We will start from _finger not from the start of the region,
3673 // as we might be restarting this task after aborting half-way
3674 // through scanning this region. In this case, _finger points to
3675 // the address where we last found a marked object. If this is a
3676 // fresh region, _finger points to start().
3677 MemRegion mr = MemRegion(_finger, _region_limit);
3679 if (_cm->verbose_low())
3680 gclog_or_tty->print_cr("[%d] we're scanning part "
3681 "["PTR_FORMAT", "PTR_FORMAT") "
3682 "of region "PTR_FORMAT,
3683 _task_id, _finger, _region_limit, _curr_region);
3685 // Let's iterate over the bitmap of the part of the
3686 // region that is left.
3687 bitmap_closure.set_scanning_heap_region(true);
3688 if (mr.is_empty() ||
3689 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3690 // We successfully completed iterating over the region. Now,
3691 // let's give up the region.
3692 giveup_current_region();
3693 regular_clock_call();
3694 } else {
3695 guarantee( has_aborted(), "currently the only way to do so" );
3696 // The only way to abort the bitmap iteration is to return
3697 // false from the do_bit() method. However, inside the
3698 // do_bit() method we move the _finger to point to the
3699 // object currently being looked at. So, if we bail out, we
3700 // have definitely set _finger to something non-null.
3701 guarantee( _finger != NULL, "invariant" );
3703 // Region iteration was actually aborted. So now _finger
3704 // points to the address of the object we last scanned. If we
3705 // leave it there, when we restart this task, we will rescan
3706 // the object. It is easy to avoid this. We move the finger by
3707 // enough to point to the next possible object header (the
3708 // bitmap knows by how much we need to move it as it knows its
3709 // granularity).
3710 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3711 }
3712 }
3713 // At this point we have either completed iterating over the
3714 // region we were holding on to, or we have aborted.
3716 // We then partially drain the local queue and the global stack.
3717 // (Do we really need this?)
3718 drain_local_queue(true);
3719 drain_global_stack(true);
3721 // Read the note on the claim_region() method on why it might
3722 // return NULL with potentially more regions available for
3723 // claiming and why we have to check out_of_regions() to determine
3724 // whether we're done or not.
3725 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3726 // We are going to try to claim a new region. We should have
3727 // given up on the previous one.
3728 tmp_guarantee_CM( _curr_region == NULL &&
3729 _finger == NULL &&
3730 _region_limit == NULL, "invariant" );
3731 if (_cm->verbose_low())
3732 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3733 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3734 if (claimed_region != NULL) {
3735 // Yes, we managed to claim one
3736 statsOnly( ++_regions_claimed );
3738 if (_cm->verbose_low())
3739 gclog_or_tty->print_cr("[%d] we successfully claimed "
3740 "region "PTR_FORMAT,
3741 _task_id, claimed_region);
3743 setup_for_region(claimed_region);
3744 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3745 }
3746 // It is important to call the regular clock here. It might take
3747 // a while to claim a region if, for example, we hit a large
3748 // block of empty regions. So we need to call the regular clock
3749 // method once round the loop to make sure it's called
3750 // frequently enough.
3751 regular_clock_call();
3752 }
3754 if (!has_aborted() && _curr_region == NULL) {
3755 tmp_guarantee_CM( _cm->out_of_regions(),
3756 "at this point we should be out of regions" );
3757 }
3758 } while ( _curr_region != NULL && !has_aborted());
3760 if (!has_aborted()) {
3761 // We cannot check whether the global stack is empty, since other
3762 // tasks might be pushing objects to it concurrently. We also cannot
3763 // check if the region stack is empty because if a thread is aborting
3764 // it can push a partially done region back.
3765 tmp_guarantee_CM( _cm->out_of_regions(),
3766 "at this point we should be out of regions" );
3768 if (_cm->verbose_low())
3769 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3771 // Try to reduce the number of available SATB buffers so that
3772 // remark has less work to do.
3773 drain_satb_buffers();
3774 }
3776 // Since we've done everything else, we can now totally drain the
3777 // local queue and global stack.
3778 drain_local_queue(false);
3779 drain_global_stack(false);
3781 // Attempt at work stealing from other task's queues.
3782 if (!has_aborted()) {
3783 // We have not aborted. This means that we have finished all that
3784 // we could. Let's try to do some stealing...
3786 // We cannot check whether the global stack is empty, since other
3787 // tasks might be pushing objects to it concurrently. We also cannot
3788 // check if the region stack is empty because if a thread is aborting
3789 // it can push a partially done region back.
3790 guarantee( _cm->out_of_regions() &&
3791 _task_queue->size() == 0, "only way to reach here" );
3793 if (_cm->verbose_low())
3794 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3796 while (!has_aborted()) {
3797 oop obj;
3798 statsOnly( ++_steal_attempts );
3800 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3801 if (_cm->verbose_medium())
3802 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3803 _task_id, (void*) obj);
3805 statsOnly( ++_steals );
3807 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3808 "any stolen object should be marked" );
3809 scan_object(obj);
3811 // And since we're towards the end, let's totally drain the
3812 // local queue and global stack.
3813 drain_local_queue(false);
3814 drain_global_stack(false);
3815 } else {
3816 break;
3817 }
3818 }
3819 }
3821 // We still haven't aborted. Now, let's try to get into the
3822 // termination protocol.
3823 if (!has_aborted()) {
3824 // We cannot check whether the global stack is empty, since other
3825 // tasks might be concurrently pushing objects on it. We also cannot
3826 // check if the region stack is empty because if a thread is aborting
3827 // it can push a partially done region back.
3828 guarantee( _cm->out_of_regions() &&
3829 _task_queue->size() == 0, "only way to reach here" );
3831 if (_cm->verbose_low())
3832 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3834 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3835 // The CMTask class also extends the TerminatorTerminator class,
3836 // hence its should_exit_termination() method will also decide
3837 // whether to exit the termination protocol or not.
3838 bool finished = _cm->terminator()->offer_termination(this);
3839 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3840 _termination_time_ms +=
3841 termination_end_time_ms - _termination_start_time_ms;
3843 if (finished) {
3844 // We're all done.
3846 if (_task_id == 0) {
3847 // let's allow task 0 to do this
3848 if (concurrent()) {
3849 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3850 // we need to set this to false before the next
3851 // safepoint. This way we ensure that the marking phase
3852 // doesn't observe any more heap expansions.
3853 _cm->clear_concurrent_marking_in_progress();
3854 }
3855 }
3857 // We can now guarantee that the global stack is empty, since
3858 // all other tasks have finished.
3859 guarantee( _cm->out_of_regions() &&
3860 _cm->region_stack_empty() &&
3861 _cm->mark_stack_empty() &&
3862 _task_queue->size() == 0 &&
3863 !_cm->has_overflown() &&
3864 !_cm->mark_stack_overflow() &&
3865 !_cm->region_stack_overflow(),
3866 "only way to reach here" );
3868 if (_cm->verbose_low())
3869 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3870 } else {
3871 // Apparently there's more work to do. Let's abort this task. It
3872 // will restart it and we can hopefully find more things to do.
3874 if (_cm->verbose_low())
3875 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3877 set_has_aborted();
3878 statsOnly( ++_aborted_termination );
3879 }
3880 }
3882 // Mainly for debugging purposes to make sure that a pointer to the
3883 // closure which was statically allocated in this frame doesn't
3884 // escape it by accident.
3885 set_oop_closure(NULL);
3886 double end_time_ms = os::elapsedVTime() * 1000.0;
3887 double elapsed_time_ms = end_time_ms - _start_time_ms;
3888 // Update the step history.
3889 _step_times_ms.add(elapsed_time_ms);
3891 if (has_aborted()) {
3892 // The task was aborted for some reason.
3894 statsOnly( ++_aborted );
3896 if (_has_aborted_timed_out) {
3897 double diff_ms = elapsed_time_ms - _time_target_ms;
3898 // Keep statistics of how well we did with respect to hitting
3899 // our target only if we actually timed out (if we aborted for
3900 // other reasons, then the results might get skewed).
3901 _marking_step_diffs_ms.add(diff_ms);
3902 }
3904 if (_cm->has_overflown()) {
3905 // This is the interesting one. We aborted because a global
3906 // overflow was raised. This means we have to restart the
3907 // marking phase and start iterating over regions. However, in
3908 // order to do this we have to make sure that all tasks stop
3909 // what they are doing and re-initialise in a safe manner. We
3910 // will achieve this with the use of two barrier sync points.
3912 if (_cm->verbose_low())
3913 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3915 _cm->enter_first_sync_barrier(_task_id);
3916 // When we exit this sync barrier we know that all tasks have
3917 // stopped doing marking work. So, it's now safe to
3918 // re-initialise our data structures. At the end of this method,
3919 // task 0 will clear the global data structures.
3921 statsOnly( ++_aborted_overflow );
3923 // We clear the local state of this task...
3924 clear_region_fields();
3926 // ...and enter the second barrier.
3927 _cm->enter_second_sync_barrier(_task_id);
3928 // At this point everything has bee re-initialised and we're
3929 // ready to restart.
3930 }
3932 if (_cm->verbose_low()) {
3933 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3934 "elapsed = %1.2lfms <<<<<<<<<<",
3935 _task_id, _time_target_ms, elapsed_time_ms);
3936 if (_cm->has_aborted())
3937 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3938 _task_id);
3939 }
3940 } else {
3941 if (_cm->verbose_low())
3942 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3943 "elapsed = %1.2lfms <<<<<<<<<<",
3944 _task_id, _time_target_ms, elapsed_time_ms);
3945 }
3947 _claimed = false;
3948 }
3950 CMTask::CMTask(int task_id,
3951 ConcurrentMark* cm,
3952 CMTaskQueue* task_queue,
3953 CMTaskQueueSet* task_queues)
3954 : _g1h(G1CollectedHeap::heap()),
3955 _task_id(task_id), _cm(cm),
3956 _claimed(false),
3957 _nextMarkBitMap(NULL), _hash_seed(17),
3958 _task_queue(task_queue),
3959 _task_queues(task_queues),
3960 _oop_closure(NULL) {
3961 guarantee( task_queue != NULL, "invariant" );
3962 guarantee( task_queues != NULL, "invariant" );
3964 statsOnly( _clock_due_to_scanning = 0;
3965 _clock_due_to_marking = 0 );
3967 _marking_step_diffs_ms.add(0.5);
3968 }