Tue, 26 Jan 2010 16:52:29 -0800
6920090: G1: Disable ReduceInitialCardMarks at least until 6920109 is fixed
Summary: G1 now answers "no" to the query can_elide_initializing_store_barrier() in the product build. A debug flag allows alternate behaviour in debug builds.
Reviewed-by: iveresov, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 assert(mr.end() != NULL, "invariant");
314 assert(mr.word_size() > 0, "invariant");
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 assert(mr.end() == NULL, "invariant");
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 assert(mr.end() != NULL, "invariant");
332 assert(mr.word_size() > 0, "invariant");
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 assert(hr != NULL, "invariant");
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 assert(mr.end() == NULL, "invariant");
343 }
344 }
345 return result;
346 }
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
421 _has_overflown(false),
422 _concurrent(false),
423 _has_aborted(false),
424 _restart_for_overflow(false),
425 _concurrent_marking_in_progress(false),
426 _should_gray_objects(false),
428 // _verbose_level set below
430 _init_times(),
431 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
432 _cleanup_times(),
433 _total_counting_time(0.0),
434 _total_rs_scrub_time(0.0),
436 _parallel_workers(NULL)
437 {
438 CMVerboseLevel verbose_level =
439 (CMVerboseLevel) G1MarkingVerboseLevel;
440 if (verbose_level < no_verbose)
441 verbose_level = no_verbose;
442 if (verbose_level > high_verbose)
443 verbose_level = high_verbose;
444 _verbose_level = verbose_level;
446 if (verbose_low())
447 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
448 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
450 _markStack.allocate(G1MarkStackSize);
451 _regionStack.allocate(G1MarkRegionStackSize);
453 // Create & start a ConcurrentMark thread.
454 _cmThread = new ConcurrentMarkThread(this);
455 assert(cmThread() != NULL, "CM Thread should have been created");
456 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
458 _g1h = G1CollectedHeap::heap();
459 assert(CGC_lock != NULL, "Where's the CGC_lock?");
460 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
461 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
463 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
464 satb_qs.set_buffer_size(G1SATBLogBufferSize);
466 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
467 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
468 for (int i = 0 ; i < size; i++) {
469 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
470 }
472 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
473 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
475 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
476 _active_tasks = _max_task_num;
477 for (int i = 0; i < (int) _max_task_num; ++i) {
478 CMTaskQueue* task_queue = new CMTaskQueue();
479 task_queue->initialize();
480 _task_queues->register_queue(i, task_queue);
482 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
483 _accum_task_vtime[i] = 0.0;
484 }
486 if (ParallelMarkingThreads > ParallelGCThreads) {
487 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
488 "than ParallelGCThreads.");
489 }
490 if (ParallelGCThreads == 0) {
491 // if we are not running with any parallel GC threads we will not
492 // spawn any marking threads either
493 _parallel_marking_threads = 0;
494 _sleep_factor = 0.0;
495 _marking_task_overhead = 1.0;
496 } else {
497 if (ParallelMarkingThreads > 0) {
498 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
499 // if both are set
501 _parallel_marking_threads = ParallelMarkingThreads;
502 _sleep_factor = 0.0;
503 _marking_task_overhead = 1.0;
504 } else if (G1MarkingOverheadPercent > 0) {
505 // we will calculate the number of parallel marking threads
506 // based on a target overhead with respect to the soft real-time
507 // goal
509 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
510 double overall_cm_overhead =
511 (double) MaxGCPauseMillis * marking_overhead /
512 (double) GCPauseIntervalMillis;
513 double cpu_ratio = 1.0 / (double) os::processor_count();
514 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
515 double marking_task_overhead =
516 overall_cm_overhead / marking_thread_num *
517 (double) os::processor_count();
518 double sleep_factor =
519 (1.0 - marking_task_overhead) / marking_task_overhead;
521 _parallel_marking_threads = (size_t) marking_thread_num;
522 _sleep_factor = sleep_factor;
523 _marking_task_overhead = marking_task_overhead;
524 } else {
525 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
526 _sleep_factor = 0.0;
527 _marking_task_overhead = 1.0;
528 }
530 if (parallel_marking_threads() > 1)
531 _cleanup_task_overhead = 1.0;
532 else
533 _cleanup_task_overhead = marking_task_overhead();
534 _cleanup_sleep_factor =
535 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
537 #if 0
538 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
539 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
540 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
541 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
542 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
543 #endif
545 guarantee(parallel_marking_threads() > 0, "peace of mind");
546 _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
547 (int) parallel_marking_threads(), false, true);
548 if (_parallel_workers == NULL)
549 vm_exit_during_initialization("Failed necessary allocation.");
550 }
552 // so that the call below can read a sensible value
553 _heap_start = (HeapWord*) rs.base();
554 set_non_marking_state();
555 }
557 void ConcurrentMark::update_g1_committed(bool force) {
558 // If concurrent marking is not in progress, then we do not need to
559 // update _heap_end. This has a subtle and important
560 // side-effect. Imagine that two evacuation pauses happen between
561 // marking completion and remark. The first one can grow the
562 // heap (hence now the finger is below the heap end). Then, the
563 // second one could unnecessarily push regions on the region
564 // stack. This causes the invariant that the region stack is empty
565 // at the beginning of remark to be false. By ensuring that we do
566 // not observe heap expansions after marking is complete, then we do
567 // not have this problem.
568 if (!concurrent_marking_in_progress() && !force)
569 return;
571 MemRegion committed = _g1h->g1_committed();
572 assert(committed.start() == _heap_start, "start shouldn't change");
573 HeapWord* new_end = committed.end();
574 if (new_end > _heap_end) {
575 // The heap has been expanded.
577 _heap_end = new_end;
578 }
579 // Notice that the heap can also shrink. However, this only happens
580 // during a Full GC (at least currently) and the entire marking
581 // phase will bail out and the task will not be restarted. So, let's
582 // do nothing.
583 }
585 void ConcurrentMark::reset() {
586 // Starting values for these two. This should be called in a STW
587 // phase. CM will be notified of any future g1_committed expansions
588 // will be at the end of evacuation pauses, when tasks are
589 // inactive.
590 MemRegion committed = _g1h->g1_committed();
591 _heap_start = committed.start();
592 _heap_end = committed.end();
594 // Separated the asserts so that we know which one fires.
595 assert(_heap_start != NULL, "heap bounds should look ok");
596 assert(_heap_end != NULL, "heap bounds should look ok");
597 assert(_heap_start < _heap_end, "heap bounds should look ok");
599 // reset all the marking data structures and any necessary flags
600 clear_marking_state();
602 if (verbose_low())
603 gclog_or_tty->print_cr("[global] resetting");
605 // We do reset all of them, since different phases will use
606 // different number of active threads. So, it's easiest to have all
607 // of them ready.
608 for (int i = 0; i < (int) _max_task_num; ++i)
609 _tasks[i]->reset(_nextMarkBitMap);
611 // we need this to make sure that the flag is on during the evac
612 // pause with initial mark piggy-backed
613 set_concurrent_marking_in_progress();
614 }
616 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
617 assert(active_tasks <= _max_task_num, "we should not have more");
619 _active_tasks = active_tasks;
620 // Need to update the three data structures below according to the
621 // number of active threads for this phase.
622 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
623 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
624 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
626 _concurrent = concurrent;
627 // We propagate this to all tasks, not just the active ones.
628 for (int i = 0; i < (int) _max_task_num; ++i)
629 _tasks[i]->set_concurrent(concurrent);
631 if (concurrent) {
632 set_concurrent_marking_in_progress();
633 } else {
634 // We currently assume that the concurrent flag has been set to
635 // false before we start remark. At this point we should also be
636 // in a STW phase.
637 assert(!concurrent_marking_in_progress(), "invariant");
638 assert(_finger == _heap_end, "only way to get here");
639 update_g1_committed(true);
640 }
641 }
643 void ConcurrentMark::set_non_marking_state() {
644 // We set the global marking state to some default values when we're
645 // not doing marking.
646 clear_marking_state();
647 _active_tasks = 0;
648 clear_concurrent_marking_in_progress();
649 }
651 ConcurrentMark::~ConcurrentMark() {
652 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
653 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
654 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
655 _par_cleanup_thread_state);
657 for (int i = 0; i < (int) _max_task_num; ++i) {
658 delete _task_queues->queue(i);
659 delete _tasks[i];
660 }
661 delete _task_queues;
662 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
663 }
665 // This closure is used to mark refs into the g1 generation
666 // from external roots in the CMS bit map.
667 // Called at the first checkpoint.
668 //
670 void ConcurrentMark::clearNextBitmap() {
671 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
673 // clear the mark bitmap (no grey objects to start with).
674 // We need to do this in chunks and offer to yield in between
675 // each chunk.
676 HeapWord* start = _nextMarkBitMap->startWord();
677 HeapWord* end = _nextMarkBitMap->endWord();
678 HeapWord* cur = start;
679 size_t chunkSize = M;
680 while (cur < end) {
681 HeapWord* next = cur + chunkSize;
682 if (next > end)
683 next = end;
684 MemRegion mr(cur,next);
685 _nextMarkBitMap->clearRange(mr);
686 cur = next;
687 do_yield_check();
688 }
689 }
691 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
692 public:
693 bool doHeapRegion(HeapRegion* r) {
694 if (!r->continuesHumongous()) {
695 r->note_start_of_marking(true);
696 }
697 return false;
698 }
699 };
701 void ConcurrentMark::checkpointRootsInitialPre() {
702 G1CollectedHeap* g1h = G1CollectedHeap::heap();
703 G1CollectorPolicy* g1p = g1h->g1_policy();
705 _has_aborted = false;
707 if (G1PrintReachableAtInitialMark) {
708 print_reachable(true, "before");
709 }
711 // Initialise marking structures. This has to be done in a STW phase.
712 reset();
713 }
715 class CMMarkRootsClosure: public OopsInGenClosure {
716 private:
717 ConcurrentMark* _cm;
718 G1CollectedHeap* _g1h;
719 bool _do_barrier;
721 public:
722 CMMarkRootsClosure(ConcurrentMark* cm,
723 G1CollectedHeap* g1h,
724 bool do_barrier) : _cm(cm), _g1h(g1h),
725 _do_barrier(do_barrier) { }
727 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
728 virtual void do_oop( oop* p) { do_oop_work(p); }
730 template <class T> void do_oop_work(T* p) {
731 T heap_oop = oopDesc::load_heap_oop(p);
732 if (!oopDesc::is_null(heap_oop)) {
733 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
734 assert(obj->is_oop() || obj->mark() == NULL,
735 "expected an oop, possibly with mark word displaced");
736 HeapWord* addr = (HeapWord*)obj;
737 if (_g1h->is_in_g1_reserved(addr)) {
738 _cm->grayRoot(obj);
739 }
740 }
741 if (_do_barrier) {
742 assert(!_g1h->is_in_g1_reserved(p),
743 "Should be called on external roots");
744 do_barrier(p);
745 }
746 }
747 };
749 void ConcurrentMark::checkpointRootsInitialPost() {
750 G1CollectedHeap* g1h = G1CollectedHeap::heap();
752 // For each region note start of marking.
753 NoteStartOfMarkHRClosure startcl;
754 g1h->heap_region_iterate(&startcl);
756 // Start weak-reference discovery.
757 ReferenceProcessor* rp = g1h->ref_processor();
758 rp->verify_no_references_recorded();
759 rp->enable_discovery(); // enable ("weak") refs discovery
760 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
762 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
763 satb_mq_set.set_active_all_threads(true);
765 // update_g1_committed() will be called at the end of an evac pause
766 // when marking is on. So, it's also called at the end of the
767 // initial-mark pause to update the heap end, if the heap expands
768 // during it. No need to call it here.
769 }
771 // Checkpoint the roots into this generation from outside
772 // this generation. [Note this initial checkpoint need only
773 // be approximate -- we'll do a catch up phase subsequently.]
774 void ConcurrentMark::checkpointRootsInitial() {
775 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
776 G1CollectedHeap* g1h = G1CollectedHeap::heap();
778 double start = os::elapsedTime();
780 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
781 g1p->record_concurrent_mark_init_start();
782 checkpointRootsInitialPre();
784 // YSR: when concurrent precleaning is in place, we'll
785 // need to clear the cached card table here
787 ResourceMark rm;
788 HandleMark hm;
790 g1h->ensure_parsability(false);
791 g1h->perm_gen()->save_marks();
793 CMMarkRootsClosure notOlder(this, g1h, false);
794 CMMarkRootsClosure older(this, g1h, true);
796 g1h->set_marking_started();
797 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
799 g1h->process_strong_roots(true, // activate StrongRootsScope
800 false, // fake perm gen collection
801 SharedHeap::SO_AllClasses,
802 ¬Older, // Regular roots
803 NULL, // do not visit active blobs
804 &older // Perm Gen Roots
805 );
806 checkpointRootsInitialPost();
808 // Statistics.
809 double end = os::elapsedTime();
810 _init_times.add((end - start) * 1000.0);
812 g1p->record_concurrent_mark_init_end();
813 }
815 /*
816 Notice that in the next two methods, we actually leave the STS
817 during the barrier sync and join it immediately afterwards. If we
818 do not do this, this then the following deadlock can occur: one
819 thread could be in the barrier sync code, waiting for the other
820 thread to also sync up, whereas another one could be trying to
821 yield, while also waiting for the other threads to sync up too.
823 Because the thread that does the sync barrier has left the STS, it
824 is possible to be suspended for a Full GC or an evacuation pause
825 could occur. This is actually safe, since the entering the sync
826 barrier is one of the last things do_marking_step() does, and it
827 doesn't manipulate any data structures afterwards.
828 */
830 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
831 if (verbose_low())
832 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
834 ConcurrentGCThread::stsLeave();
835 _first_overflow_barrier_sync.enter();
836 ConcurrentGCThread::stsJoin();
837 // at this point everyone should have synced up and not be doing any
838 // more work
840 if (verbose_low())
841 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
843 // let task 0 do this
844 if (task_num == 0) {
845 // task 0 is responsible for clearing the global data structures
846 clear_marking_state();
848 if (PrintGC) {
849 gclog_or_tty->date_stamp(PrintGCDateStamps);
850 gclog_or_tty->stamp(PrintGCTimeStamps);
851 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
852 }
853 }
855 // after this, each task should reset its own data structures then
856 // then go into the second barrier
857 }
859 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
860 if (verbose_low())
861 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
863 ConcurrentGCThread::stsLeave();
864 _second_overflow_barrier_sync.enter();
865 ConcurrentGCThread::stsJoin();
866 // at this point everything should be re-initialised and ready to go
868 if (verbose_low())
869 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
870 }
872 void ConcurrentMark::grayRoot(oop p) {
873 HeapWord* addr = (HeapWord*) p;
874 // We can't really check against _heap_start and _heap_end, since it
875 // is possible during an evacuation pause with piggy-backed
876 // initial-mark that the committed space is expanded during the
877 // pause without CM observing this change. So the assertions below
878 // is a bit conservative; but better than nothing.
879 assert(_g1h->g1_committed().contains(addr),
880 "address should be within the heap bounds");
882 if (!_nextMarkBitMap->isMarked(addr))
883 _nextMarkBitMap->parMark(addr);
884 }
886 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
887 // The objects on the region have already been marked "in bulk" by
888 // the caller. We only need to decide whether to push the region on
889 // the region stack or not.
891 if (!concurrent_marking_in_progress() || !_should_gray_objects)
892 // We're done with marking and waiting for remark. We do not need to
893 // push anything else on the region stack.
894 return;
896 HeapWord* finger = _finger;
898 if (verbose_low())
899 gclog_or_tty->print_cr("[global] attempting to push "
900 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
901 PTR_FORMAT, mr.start(), mr.end(), finger);
903 if (mr.start() < finger) {
904 // The finger is always heap region aligned and it is not possible
905 // for mr to span heap regions.
906 assert(mr.end() <= finger, "invariant");
908 // Separated the asserts so that we know which one fires.
909 assert(mr.start() <= mr.end(),
910 "region boundaries should fall within the committed space");
911 assert(_heap_start <= mr.start(),
912 "region boundaries should fall within the committed space");
913 assert(mr.end() <= _heap_end,
914 "region boundaries should fall within the committed space");
915 if (verbose_low())
916 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
917 "below the finger, pushing it",
918 mr.start(), mr.end());
920 if (!region_stack_push(mr)) {
921 if (verbose_low())
922 gclog_or_tty->print_cr("[global] region stack has overflown.");
923 }
924 }
925 }
927 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
928 // The object is not marked by the caller. We need to at least mark
929 // it and maybe push in on the stack.
931 HeapWord* addr = (HeapWord*)p;
932 if (!_nextMarkBitMap->isMarked(addr)) {
933 // We definitely need to mark it, irrespective whether we bail out
934 // because we're done with marking.
935 if (_nextMarkBitMap->parMark(addr)) {
936 if (!concurrent_marking_in_progress() || !_should_gray_objects)
937 // If we're done with concurrent marking and we're waiting for
938 // remark, then we're not pushing anything on the stack.
939 return;
941 // No OrderAccess:store_load() is needed. It is implicit in the
942 // CAS done in parMark(addr) above
943 HeapWord* finger = _finger;
945 if (addr < finger) {
946 if (!mark_stack_push(oop(addr))) {
947 if (verbose_low())
948 gclog_or_tty->print_cr("[global] global stack overflow "
949 "during parMark");
950 }
951 }
952 }
953 }
954 }
956 class CMConcurrentMarkingTask: public AbstractGangTask {
957 private:
958 ConcurrentMark* _cm;
959 ConcurrentMarkThread* _cmt;
961 public:
962 void work(int worker_i) {
963 assert(Thread::current()->is_ConcurrentGC_thread(),
964 "this should only be done by a conc GC thread");
966 double start_vtime = os::elapsedVTime();
968 ConcurrentGCThread::stsJoin();
970 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
971 CMTask* the_task = _cm->task(worker_i);
972 the_task->record_start_time();
973 if (!_cm->has_aborted()) {
974 do {
975 double start_vtime_sec = os::elapsedVTime();
976 double start_time_sec = os::elapsedTime();
977 the_task->do_marking_step(10.0);
978 double end_time_sec = os::elapsedTime();
979 double end_vtime_sec = os::elapsedVTime();
980 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
981 double elapsed_time_sec = end_time_sec - start_time_sec;
982 _cm->clear_has_overflown();
984 bool ret = _cm->do_yield_check(worker_i);
986 jlong sleep_time_ms;
987 if (!_cm->has_aborted() && the_task->has_aborted()) {
988 sleep_time_ms =
989 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
990 ConcurrentGCThread::stsLeave();
991 os::sleep(Thread::current(), sleep_time_ms, false);
992 ConcurrentGCThread::stsJoin();
993 }
994 double end_time2_sec = os::elapsedTime();
995 double elapsed_time2_sec = end_time2_sec - start_time_sec;
997 #if 0
998 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
999 "overhead %1.4lf",
1000 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1001 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1002 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1003 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1004 #endif
1005 } while (!_cm->has_aborted() && the_task->has_aborted());
1006 }
1007 the_task->record_end_time();
1008 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1010 ConcurrentGCThread::stsLeave();
1012 double end_vtime = os::elapsedVTime();
1013 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1014 }
1016 CMConcurrentMarkingTask(ConcurrentMark* cm,
1017 ConcurrentMarkThread* cmt) :
1018 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1020 ~CMConcurrentMarkingTask() { }
1021 };
1023 void ConcurrentMark::markFromRoots() {
1024 // we might be tempted to assert that:
1025 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1026 // "inconsistent argument?");
1027 // However that wouldn't be right, because it's possible that
1028 // a safepoint is indeed in progress as a younger generation
1029 // stop-the-world GC happens even as we mark in this generation.
1031 _restart_for_overflow = false;
1033 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1035 CMConcurrentMarkingTask markingTask(this, cmThread());
1036 if (parallel_marking_threads() > 0)
1037 _parallel_workers->run_task(&markingTask);
1038 else
1039 markingTask.work(0);
1040 print_stats();
1041 }
1043 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1044 // world is stopped at this checkpoint
1045 assert(SafepointSynchronize::is_at_safepoint(),
1046 "world should be stopped");
1047 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1049 // If a full collection has happened, we shouldn't do this.
1050 if (has_aborted()) {
1051 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1052 return;
1053 }
1055 if (VerifyDuringGC) {
1056 HandleMark hm; // handle scope
1057 gclog_or_tty->print(" VerifyDuringGC:(before)");
1058 Universe::heap()->prepare_for_verify();
1059 Universe::verify(true, false, true);
1060 }
1062 G1CollectorPolicy* g1p = g1h->g1_policy();
1063 g1p->record_concurrent_mark_remark_start();
1065 double start = os::elapsedTime();
1067 checkpointRootsFinalWork();
1069 double mark_work_end = os::elapsedTime();
1071 weakRefsWork(clear_all_soft_refs);
1073 if (has_overflown()) {
1074 // Oops. We overflowed. Restart concurrent marking.
1075 _restart_for_overflow = true;
1076 // Clear the flag. We do not need it any more.
1077 clear_has_overflown();
1078 if (G1TraceMarkStackOverflow)
1079 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1080 } else {
1081 // We're done with marking.
1082 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1084 if (VerifyDuringGC) {
1085 HandleMark hm; // handle scope
1086 gclog_or_tty->print(" VerifyDuringGC:(after)");
1087 Universe::heap()->prepare_for_verify();
1088 Universe::heap()->verify(/* allow_dirty */ true,
1089 /* silent */ false,
1090 /* use_prev_marking */ false);
1091 }
1092 }
1094 #if VERIFY_OBJS_PROCESSED
1095 _scan_obj_cl.objs_processed = 0;
1096 ThreadLocalObjQueue::objs_enqueued = 0;
1097 #endif
1099 // Statistics
1100 double now = os::elapsedTime();
1101 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1102 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1103 _remark_times.add((now - start) * 1000.0);
1105 g1p->record_concurrent_mark_remark_end();
1106 }
1109 #define CARD_BM_TEST_MODE 0
1111 class CalcLiveObjectsClosure: public HeapRegionClosure {
1113 CMBitMapRO* _bm;
1114 ConcurrentMark* _cm;
1115 bool _changed;
1116 bool _yield;
1117 size_t _words_done;
1118 size_t _tot_live;
1119 size_t _tot_used;
1120 size_t _regions_done;
1121 double _start_vtime_sec;
1123 BitMap* _region_bm;
1124 BitMap* _card_bm;
1125 intptr_t _bottom_card_num;
1126 bool _final;
1128 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1129 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1130 #if CARD_BM_TEST_MODE
1131 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1132 #else
1133 _card_bm->par_at_put(i - _bottom_card_num, 1);
1134 #endif
1135 }
1136 }
1138 public:
1139 CalcLiveObjectsClosure(bool final,
1140 CMBitMapRO *bm, ConcurrentMark *cm,
1141 BitMap* region_bm, BitMap* card_bm) :
1142 _bm(bm), _cm(cm), _changed(false), _yield(true),
1143 _words_done(0), _tot_live(0), _tot_used(0),
1144 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1145 _regions_done(0), _start_vtime_sec(0.0)
1146 {
1147 _bottom_card_num =
1148 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1149 CardTableModRefBS::card_shift);
1150 }
1152 // It takes a region that's not empty (i.e., it has at least one
1153 // live object in it and sets its corresponding bit on the region
1154 // bitmap to 1. If the region is "starts humongous" it will also set
1155 // to 1 the bits on the region bitmap that correspond to its
1156 // associated "continues humongous" regions.
1157 void set_bit_for_region(HeapRegion* hr) {
1158 assert(!hr->continuesHumongous(), "should have filtered those out");
1160 size_t index = hr->hrs_index();
1161 if (!hr->startsHumongous()) {
1162 // Normal (non-humongous) case: just set the bit.
1163 _region_bm->par_at_put((BitMap::idx_t) index, true);
1164 } else {
1165 // Starts humongous case: calculate how many regions are part of
1166 // this humongous region and then set the bit range. It might
1167 // have been a bit more efficient to look at the object that
1168 // spans these humongous regions to calculate their number from
1169 // the object's size. However, it's a good idea to calculate
1170 // this based on the metadata itself, and not the region
1171 // contents, so that this code is not aware of what goes into
1172 // the humongous regions (in case this changes in the future).
1173 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1174 size_t end_index = index + 1;
1175 while (end_index < g1h->n_regions()) {
1176 HeapRegion* chr = g1h->region_at(end_index);
1177 if (!chr->continuesHumongous()) {
1178 break;
1179 }
1180 end_index += 1;
1181 }
1182 _region_bm->par_at_put_range((BitMap::idx_t) index,
1183 (BitMap::idx_t) end_index, true);
1184 }
1185 }
1187 bool doHeapRegion(HeapRegion* hr) {
1188 if (!_final && _regions_done == 0)
1189 _start_vtime_sec = os::elapsedVTime();
1191 if (hr->continuesHumongous()) {
1192 // We will ignore these here and process them when their
1193 // associated "starts humongous" region is processed (see
1194 // set_bit_for_heap_region()). Note that we cannot rely on their
1195 // associated "starts humongous" region to have their bit set to
1196 // 1 since, due to the region chunking in the parallel region
1197 // iteration, a "continues humongous" region might be visited
1198 // before its associated "starts humongous".
1199 return false;
1200 }
1202 HeapWord* nextTop = hr->next_top_at_mark_start();
1203 HeapWord* start = hr->top_at_conc_mark_count();
1204 assert(hr->bottom() <= start && start <= hr->end() &&
1205 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1206 start <= nextTop,
1207 "Preconditions.");
1208 // Otherwise, record the number of word's we'll examine.
1209 size_t words_done = (nextTop - start);
1210 // Find the first marked object at or after "start".
1211 start = _bm->getNextMarkedWordAddress(start, nextTop);
1212 size_t marked_bytes = 0;
1214 // Below, the term "card num" means the result of shifting an address
1215 // by the card shift -- address 0 corresponds to card number 0. One
1216 // must subtract the card num of the bottom of the heap to obtain a
1217 // card table index.
1218 // The first card num of the sequence of live cards currently being
1219 // constructed. -1 ==> no sequence.
1220 intptr_t start_card_num = -1;
1221 // The last card num of the sequence of live cards currently being
1222 // constructed. -1 ==> no sequence.
1223 intptr_t last_card_num = -1;
1225 while (start < nextTop) {
1226 if (_yield && _cm->do_yield_check()) {
1227 // We yielded. It might be for a full collection, in which case
1228 // all bets are off; terminate the traversal.
1229 if (_cm->has_aborted()) {
1230 _changed = false;
1231 return true;
1232 } else {
1233 // Otherwise, it might be a collection pause, and the region
1234 // we're looking at might be in the collection set. We'll
1235 // abandon this region.
1236 return false;
1237 }
1238 }
1239 oop obj = oop(start);
1240 int obj_sz = obj->size();
1241 // The card num of the start of the current object.
1242 intptr_t obj_card_num =
1243 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1245 HeapWord* obj_last = start + obj_sz - 1;
1246 intptr_t obj_last_card_num =
1247 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1249 if (obj_card_num != last_card_num) {
1250 if (start_card_num == -1) {
1251 assert(last_card_num == -1, "Both or neither.");
1252 start_card_num = obj_card_num;
1253 } else {
1254 assert(last_card_num != -1, "Both or neither.");
1255 assert(obj_card_num >= last_card_num, "Inv");
1256 if ((obj_card_num - last_card_num) > 1) {
1257 // Mark the last run, and start a new one.
1258 mark_card_num_range(start_card_num, last_card_num);
1259 start_card_num = obj_card_num;
1260 }
1261 }
1262 #if CARD_BM_TEST_MODE
1263 /*
1264 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1265 obj_card_num - _bottom_card_num,
1266 obj_last_card_num - _bottom_card_num);
1267 */
1268 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1269 _card_bm->par_at_put(j - _bottom_card_num, 1);
1270 }
1271 #endif
1272 }
1273 // In any case, we set the last card num.
1274 last_card_num = obj_last_card_num;
1276 marked_bytes += (size_t)obj_sz * HeapWordSize;
1277 // Find the next marked object after this one.
1278 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1279 _changed = true;
1280 }
1281 // Handle the last range, if any.
1282 if (start_card_num != -1)
1283 mark_card_num_range(start_card_num, last_card_num);
1284 if (_final) {
1285 // Mark the allocated-since-marking portion...
1286 HeapWord* tp = hr->top();
1287 if (nextTop < tp) {
1288 start_card_num =
1289 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1290 last_card_num =
1291 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1292 mark_card_num_range(start_card_num, last_card_num);
1293 // This definitely means the region has live objects.
1294 set_bit_for_region(hr);
1295 }
1296 }
1298 hr->add_to_marked_bytes(marked_bytes);
1299 // Update the live region bitmap.
1300 if (marked_bytes > 0) {
1301 set_bit_for_region(hr);
1302 }
1303 hr->set_top_at_conc_mark_count(nextTop);
1304 _tot_live += hr->next_live_bytes();
1305 _tot_used += hr->used();
1306 _words_done = words_done;
1308 if (!_final) {
1309 ++_regions_done;
1310 if (_regions_done % 10 == 0) {
1311 double end_vtime_sec = os::elapsedVTime();
1312 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1313 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1314 jlong sleep_time_ms =
1315 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1316 os::sleep(Thread::current(), sleep_time_ms, false);
1317 _start_vtime_sec = end_vtime_sec;
1318 }
1319 }
1320 }
1322 return false;
1323 }
1325 bool changed() { return _changed; }
1326 void reset() { _changed = false; _words_done = 0; }
1327 void no_yield() { _yield = false; }
1328 size_t words_done() { return _words_done; }
1329 size_t tot_live() { return _tot_live; }
1330 size_t tot_used() { return _tot_used; }
1331 };
1334 void ConcurrentMark::calcDesiredRegions() {
1335 _region_bm.clear();
1336 _card_bm.clear();
1337 CalcLiveObjectsClosure calccl(false /*final*/,
1338 nextMarkBitMap(), this,
1339 &_region_bm, &_card_bm);
1340 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1341 g1h->heap_region_iterate(&calccl);
1343 do {
1344 calccl.reset();
1345 g1h->heap_region_iterate(&calccl);
1346 } while (calccl.changed());
1347 }
1349 class G1ParFinalCountTask: public AbstractGangTask {
1350 protected:
1351 G1CollectedHeap* _g1h;
1352 CMBitMap* _bm;
1353 size_t _n_workers;
1354 size_t *_live_bytes;
1355 size_t *_used_bytes;
1356 BitMap* _region_bm;
1357 BitMap* _card_bm;
1358 public:
1359 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1360 BitMap* region_bm, BitMap* card_bm) :
1361 AbstractGangTask("G1 final counting"), _g1h(g1h),
1362 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1363 {
1364 if (ParallelGCThreads > 0)
1365 _n_workers = _g1h->workers()->total_workers();
1366 else
1367 _n_workers = 1;
1368 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1369 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1370 }
1372 ~G1ParFinalCountTask() {
1373 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1374 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1375 }
1377 void work(int i) {
1378 CalcLiveObjectsClosure calccl(true /*final*/,
1379 _bm, _g1h->concurrent_mark(),
1380 _region_bm, _card_bm);
1381 calccl.no_yield();
1382 if (ParallelGCThreads > 0) {
1383 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1384 HeapRegion::FinalCountClaimValue);
1385 } else {
1386 _g1h->heap_region_iterate(&calccl);
1387 }
1388 assert(calccl.complete(), "Shouldn't have yielded!");
1390 assert((size_t) i < _n_workers, "invariant");
1391 _live_bytes[i] = calccl.tot_live();
1392 _used_bytes[i] = calccl.tot_used();
1393 }
1394 size_t live_bytes() {
1395 size_t live_bytes = 0;
1396 for (size_t i = 0; i < _n_workers; ++i)
1397 live_bytes += _live_bytes[i];
1398 return live_bytes;
1399 }
1400 size_t used_bytes() {
1401 size_t used_bytes = 0;
1402 for (size_t i = 0; i < _n_workers; ++i)
1403 used_bytes += _used_bytes[i];
1404 return used_bytes;
1405 }
1406 };
1408 class G1ParNoteEndTask;
1410 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1411 G1CollectedHeap* _g1;
1412 int _worker_num;
1413 size_t _max_live_bytes;
1414 size_t _regions_claimed;
1415 size_t _freed_bytes;
1416 size_t _cleared_h_regions;
1417 size_t _freed_regions;
1418 UncleanRegionList* _unclean_region_list;
1419 double _claimed_region_time;
1420 double _max_region_time;
1422 public:
1423 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1424 UncleanRegionList* list,
1425 int worker_num);
1426 size_t freed_bytes() { return _freed_bytes; }
1427 size_t cleared_h_regions() { return _cleared_h_regions; }
1428 size_t freed_regions() { return _freed_regions; }
1429 UncleanRegionList* unclean_region_list() {
1430 return _unclean_region_list;
1431 }
1433 bool doHeapRegion(HeapRegion *r);
1435 size_t max_live_bytes() { return _max_live_bytes; }
1436 size_t regions_claimed() { return _regions_claimed; }
1437 double claimed_region_time_sec() { return _claimed_region_time; }
1438 double max_region_time_sec() { return _max_region_time; }
1439 };
1441 class G1ParNoteEndTask: public AbstractGangTask {
1442 friend class G1NoteEndOfConcMarkClosure;
1443 protected:
1444 G1CollectedHeap* _g1h;
1445 size_t _max_live_bytes;
1446 size_t _freed_bytes;
1447 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1448 public:
1449 G1ParNoteEndTask(G1CollectedHeap* g1h,
1450 ConcurrentMark::ParCleanupThreadState**
1451 par_cleanup_thread_state) :
1452 AbstractGangTask("G1 note end"), _g1h(g1h),
1453 _max_live_bytes(0), _freed_bytes(0),
1454 _par_cleanup_thread_state(par_cleanup_thread_state)
1455 {}
1457 void work(int i) {
1458 double start = os::elapsedTime();
1459 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1460 &_par_cleanup_thread_state[i]->list,
1461 i);
1462 if (ParallelGCThreads > 0) {
1463 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1464 HeapRegion::NoteEndClaimValue);
1465 } else {
1466 _g1h->heap_region_iterate(&g1_note_end);
1467 }
1468 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1470 // Now finish up freeing the current thread's regions.
1471 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1472 g1_note_end.cleared_h_regions(),
1473 0, NULL);
1474 {
1475 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1476 _max_live_bytes += g1_note_end.max_live_bytes();
1477 _freed_bytes += g1_note_end.freed_bytes();
1478 }
1479 double end = os::elapsedTime();
1480 if (G1PrintParCleanupStats) {
1481 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1482 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1483 i, start, end, (end-start)*1000.0,
1484 g1_note_end.regions_claimed(),
1485 g1_note_end.claimed_region_time_sec()*1000.0,
1486 g1_note_end.max_region_time_sec()*1000.0);
1487 }
1488 }
1489 size_t max_live_bytes() { return _max_live_bytes; }
1490 size_t freed_bytes() { return _freed_bytes; }
1491 };
1493 class G1ParScrubRemSetTask: public AbstractGangTask {
1494 protected:
1495 G1RemSet* _g1rs;
1496 BitMap* _region_bm;
1497 BitMap* _card_bm;
1498 public:
1499 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1500 BitMap* region_bm, BitMap* card_bm) :
1501 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1502 _region_bm(region_bm), _card_bm(card_bm)
1503 {}
1505 void work(int i) {
1506 if (ParallelGCThreads > 0) {
1507 _g1rs->scrub_par(_region_bm, _card_bm, i,
1508 HeapRegion::ScrubRemSetClaimValue);
1509 } else {
1510 _g1rs->scrub(_region_bm, _card_bm);
1511 }
1512 }
1514 };
1516 G1NoteEndOfConcMarkClosure::
1517 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1518 UncleanRegionList* list,
1519 int worker_num)
1520 : _g1(g1), _worker_num(worker_num),
1521 _max_live_bytes(0), _regions_claimed(0),
1522 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1523 _claimed_region_time(0.0), _max_region_time(0.0),
1524 _unclean_region_list(list)
1525 {}
1527 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1528 // We use a claim value of zero here because all regions
1529 // were claimed with value 1 in the FinalCount task.
1530 r->reset_gc_time_stamp();
1531 if (!r->continuesHumongous()) {
1532 double start = os::elapsedTime();
1533 _regions_claimed++;
1534 r->note_end_of_marking();
1535 _max_live_bytes += r->max_live_bytes();
1536 _g1->free_region_if_totally_empty_work(r,
1537 _freed_bytes,
1538 _cleared_h_regions,
1539 _freed_regions,
1540 _unclean_region_list,
1541 true /*par*/);
1542 double region_time = (os::elapsedTime() - start);
1543 _claimed_region_time += region_time;
1544 if (region_time > _max_region_time) _max_region_time = region_time;
1545 }
1546 return false;
1547 }
1549 void ConcurrentMark::cleanup() {
1550 // world is stopped at this checkpoint
1551 assert(SafepointSynchronize::is_at_safepoint(),
1552 "world should be stopped");
1553 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1555 // If a full collection has happened, we shouldn't do this.
1556 if (has_aborted()) {
1557 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1558 return;
1559 }
1561 if (VerifyDuringGC) {
1562 HandleMark hm; // handle scope
1563 gclog_or_tty->print(" VerifyDuringGC:(before)");
1564 Universe::heap()->prepare_for_verify();
1565 Universe::verify(/* allow dirty */ true,
1566 /* silent */ false,
1567 /* prev marking */ true);
1568 }
1570 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1571 g1p->record_concurrent_mark_cleanup_start();
1573 double start = os::elapsedTime();
1575 // Do counting once more with the world stopped for good measure.
1576 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1577 &_region_bm, &_card_bm);
1578 if (ParallelGCThreads > 0) {
1579 assert(g1h->check_heap_region_claim_values(
1580 HeapRegion::InitialClaimValue),
1581 "sanity check");
1583 int n_workers = g1h->workers()->total_workers();
1584 g1h->set_par_threads(n_workers);
1585 g1h->workers()->run_task(&g1_par_count_task);
1586 g1h->set_par_threads(0);
1588 assert(g1h->check_heap_region_claim_values(
1589 HeapRegion::FinalCountClaimValue),
1590 "sanity check");
1591 } else {
1592 g1_par_count_task.work(0);
1593 }
1595 size_t known_garbage_bytes =
1596 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1597 #if 0
1598 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1599 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1600 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1601 (double) known_garbage_bytes / (double) (1024 * 1024));
1602 #endif // 0
1603 g1p->set_known_garbage_bytes(known_garbage_bytes);
1605 size_t start_used_bytes = g1h->used();
1606 _at_least_one_mark_complete = true;
1607 g1h->set_marking_complete();
1609 double count_end = os::elapsedTime();
1610 double this_final_counting_time = (count_end - start);
1611 if (G1PrintParCleanupStats) {
1612 gclog_or_tty->print_cr("Cleanup:");
1613 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1614 this_final_counting_time*1000.0);
1615 }
1616 _total_counting_time += this_final_counting_time;
1618 // Install newly created mark bitMap as "prev".
1619 swapMarkBitMaps();
1621 g1h->reset_gc_time_stamp();
1623 // Note end of marking in all heap regions.
1624 double note_end_start = os::elapsedTime();
1625 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1626 if (ParallelGCThreads > 0) {
1627 int n_workers = g1h->workers()->total_workers();
1628 g1h->set_par_threads(n_workers);
1629 g1h->workers()->run_task(&g1_par_note_end_task);
1630 g1h->set_par_threads(0);
1632 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1633 "sanity check");
1634 } else {
1635 g1_par_note_end_task.work(0);
1636 }
1637 g1h->set_unclean_regions_coming(true);
1638 double note_end_end = os::elapsedTime();
1639 // Tell the mutators that there might be unclean regions coming...
1640 if (G1PrintParCleanupStats) {
1641 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1642 (note_end_end - note_end_start)*1000.0);
1643 }
1646 // call below, since it affects the metric by which we sort the heap
1647 // regions.
1648 if (G1ScrubRemSets) {
1649 double rs_scrub_start = os::elapsedTime();
1650 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1651 if (ParallelGCThreads > 0) {
1652 int n_workers = g1h->workers()->total_workers();
1653 g1h->set_par_threads(n_workers);
1654 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1655 g1h->set_par_threads(0);
1657 assert(g1h->check_heap_region_claim_values(
1658 HeapRegion::ScrubRemSetClaimValue),
1659 "sanity check");
1660 } else {
1661 g1_par_scrub_rs_task.work(0);
1662 }
1664 double rs_scrub_end = os::elapsedTime();
1665 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1666 _total_rs_scrub_time += this_rs_scrub_time;
1667 }
1669 // this will also free any regions totally full of garbage objects,
1670 // and sort the regions.
1671 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1672 g1_par_note_end_task.freed_bytes(),
1673 g1_par_note_end_task.max_live_bytes());
1675 // Statistics.
1676 double end = os::elapsedTime();
1677 _cleanup_times.add((end - start) * 1000.0);
1679 // G1CollectedHeap::heap()->print();
1680 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1681 // G1CollectedHeap::heap()->get_gc_time_stamp());
1683 if (PrintGC || PrintGCDetails) {
1684 g1h->print_size_transition(gclog_or_tty,
1685 start_used_bytes,
1686 g1h->used(),
1687 g1h->capacity());
1688 }
1690 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1691 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1693 // We need to make this be a "collection" so any collection pause that
1694 // races with it goes around and waits for completeCleanup to finish.
1695 g1h->increment_total_collections();
1697 if (VerifyDuringGC) {
1698 HandleMark hm; // handle scope
1699 gclog_or_tty->print(" VerifyDuringGC:(after)");
1700 Universe::heap()->prepare_for_verify();
1701 Universe::verify(/* allow dirty */ true,
1702 /* silent */ false,
1703 /* prev marking */ true);
1704 }
1705 }
1707 void ConcurrentMark::completeCleanup() {
1708 // A full collection intervened.
1709 if (has_aborted()) return;
1711 int first = 0;
1712 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1713 for (int t = 0; t < last; t++) {
1714 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1715 assert(list->well_formed(), "Inv");
1716 HeapRegion* hd = list->hd();
1717 while (hd != NULL) {
1718 // Now finish up the other stuff.
1719 hd->rem_set()->clear();
1720 HeapRegion* next_hd = hd->next_from_unclean_list();
1721 (void)list->pop();
1722 assert(list->hd() == next_hd, "how not?");
1723 _g1h->put_region_on_unclean_list(hd);
1724 if (!hd->isHumongous()) {
1725 // Add this to the _free_regions count by 1.
1726 _g1h->finish_free_region_work(0, 0, 1, NULL);
1727 }
1728 hd = list->hd();
1729 assert(hd == next_hd, "how not?");
1730 }
1731 }
1732 }
1735 class G1CMIsAliveClosure: public BoolObjectClosure {
1736 G1CollectedHeap* _g1;
1737 public:
1738 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1739 _g1(g1)
1740 {}
1742 void do_object(oop obj) {
1743 assert(false, "not to be invoked");
1744 }
1745 bool do_object_b(oop obj) {
1746 HeapWord* addr = (HeapWord*)obj;
1747 return addr != NULL &&
1748 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1749 }
1750 };
1752 class G1CMKeepAliveClosure: public OopClosure {
1753 G1CollectedHeap* _g1;
1754 ConcurrentMark* _cm;
1755 CMBitMap* _bitMap;
1756 public:
1757 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1758 CMBitMap* bitMap) :
1759 _g1(g1), _cm(cm),
1760 _bitMap(bitMap) {}
1762 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1763 virtual void do_oop( oop* p) { do_oop_work(p); }
1765 template <class T> void do_oop_work(T* p) {
1766 oop thisOop = oopDesc::load_decode_heap_oop(p);
1767 HeapWord* addr = (HeapWord*)thisOop;
1768 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1769 _bitMap->mark(addr);
1770 _cm->mark_stack_push(thisOop);
1771 }
1772 }
1773 };
1775 class G1CMDrainMarkingStackClosure: public VoidClosure {
1776 CMMarkStack* _markStack;
1777 CMBitMap* _bitMap;
1778 G1CMKeepAliveClosure* _oopClosure;
1779 public:
1780 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1781 G1CMKeepAliveClosure* oopClosure) :
1782 _bitMap(bitMap),
1783 _markStack(markStack),
1784 _oopClosure(oopClosure)
1785 {}
1787 void do_void() {
1788 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1789 }
1790 };
1792 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1793 ResourceMark rm;
1794 HandleMark hm;
1795 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1796 ReferenceProcessor* rp = g1h->ref_processor();
1798 // Process weak references.
1799 rp->setup_policy(clear_all_soft_refs);
1800 assert(_markStack.isEmpty(), "mark stack should be empty");
1802 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1803 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1804 G1CMDrainMarkingStackClosure
1805 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1806 &g1KeepAliveClosure);
1808 // XXXYYY Also: copy the parallel ref processing code from CMS.
1809 rp->process_discovered_references(&g1IsAliveClosure,
1810 &g1KeepAliveClosure,
1811 &g1DrainMarkingStackClosure,
1812 NULL);
1813 assert(_markStack.overflow() || _markStack.isEmpty(),
1814 "mark stack should be empty (unless it overflowed)");
1815 if (_markStack.overflow()) {
1816 set_has_overflown();
1817 }
1819 rp->enqueue_discovered_references();
1820 rp->verify_no_references_recorded();
1821 assert(!rp->discovery_enabled(), "should have been disabled");
1823 // Now clean up stale oops in SymbolTable and StringTable
1824 SymbolTable::unlink(&g1IsAliveClosure);
1825 StringTable::unlink(&g1IsAliveClosure);
1826 }
1828 void ConcurrentMark::swapMarkBitMaps() {
1829 CMBitMapRO* temp = _prevMarkBitMap;
1830 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1831 _nextMarkBitMap = (CMBitMap*) temp;
1832 }
1834 class CMRemarkTask: public AbstractGangTask {
1835 private:
1836 ConcurrentMark *_cm;
1838 public:
1839 void work(int worker_i) {
1840 // Since all available tasks are actually started, we should
1841 // only proceed if we're supposed to be actived.
1842 if ((size_t)worker_i < _cm->active_tasks()) {
1843 CMTask* task = _cm->task(worker_i);
1844 task->record_start_time();
1845 do {
1846 task->do_marking_step(1000000000.0 /* something very large */);
1847 } while (task->has_aborted() && !_cm->has_overflown());
1848 // If we overflow, then we do not want to restart. We instead
1849 // want to abort remark and do concurrent marking again.
1850 task->record_end_time();
1851 }
1852 }
1854 CMRemarkTask(ConcurrentMark* cm) :
1855 AbstractGangTask("Par Remark"), _cm(cm) { }
1856 };
1858 void ConcurrentMark::checkpointRootsFinalWork() {
1859 ResourceMark rm;
1860 HandleMark hm;
1861 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1863 g1h->ensure_parsability(false);
1865 if (ParallelGCThreads > 0) {
1866 G1CollectedHeap::StrongRootsScope srs(g1h);
1867 // this is remark, so we'll use up all available threads
1868 int active_workers = ParallelGCThreads;
1869 set_phase(active_workers, false);
1871 CMRemarkTask remarkTask(this);
1872 // We will start all available threads, even if we decide that the
1873 // active_workers will be fewer. The extra ones will just bail out
1874 // immediately.
1875 int n_workers = g1h->workers()->total_workers();
1876 g1h->set_par_threads(n_workers);
1877 g1h->workers()->run_task(&remarkTask);
1878 g1h->set_par_threads(0);
1879 } else {
1880 G1CollectedHeap::StrongRootsScope srs(g1h);
1881 // this is remark, so we'll use up all available threads
1882 int active_workers = 1;
1883 set_phase(active_workers, false);
1885 CMRemarkTask remarkTask(this);
1886 // We will start all available threads, even if we decide that the
1887 // active_workers will be fewer. The extra ones will just bail out
1888 // immediately.
1889 remarkTask.work(0);
1890 }
1891 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1892 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1894 print_stats();
1896 if (!restart_for_overflow())
1897 set_non_marking_state();
1899 #if VERIFY_OBJS_PROCESSED
1900 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1901 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1902 _scan_obj_cl.objs_processed,
1903 ThreadLocalObjQueue::objs_enqueued);
1904 guarantee(_scan_obj_cl.objs_processed ==
1905 ThreadLocalObjQueue::objs_enqueued,
1906 "Different number of objs processed and enqueued.");
1907 }
1908 #endif
1909 }
1911 #ifndef PRODUCT
1913 class ReachablePrinterOopClosure: public OopClosure {
1914 private:
1915 G1CollectedHeap* _g1h;
1916 CMBitMapRO* _bitmap;
1917 outputStream* _out;
1918 bool _use_prev_marking;
1920 public:
1921 ReachablePrinterOopClosure(CMBitMapRO* bitmap,
1922 outputStream* out,
1923 bool use_prev_marking) :
1924 _g1h(G1CollectedHeap::heap()),
1925 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
1927 void do_oop(narrowOop* p) { do_oop_work(p); }
1928 void do_oop( oop* p) { do_oop_work(p); }
1930 template <class T> void do_oop_work(T* p) {
1931 oop obj = oopDesc::load_decode_heap_oop(p);
1932 const char* str = NULL;
1933 const char* str2 = "";
1935 if (!_g1h->is_in_g1_reserved(obj))
1936 str = "outside G1 reserved";
1937 else {
1938 HeapRegion* hr = _g1h->heap_region_containing(obj);
1939 guarantee(hr != NULL, "invariant");
1940 bool over_tams = false;
1941 if (_use_prev_marking) {
1942 over_tams = hr->obj_allocated_since_prev_marking(obj);
1943 } else {
1944 over_tams = hr->obj_allocated_since_next_marking(obj);
1945 }
1947 if (over_tams) {
1948 str = "over TAMS";
1949 if (_bitmap->isMarked((HeapWord*) obj)) {
1950 str2 = " AND MARKED";
1951 }
1952 } else if (_bitmap->isMarked((HeapWord*) obj)) {
1953 str = "marked";
1954 } else {
1955 str = "#### NOT MARKED ####";
1956 }
1957 }
1959 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
1960 p, (void*) obj, str, str2);
1961 }
1962 };
1964 class ReachablePrinterClosure: public BitMapClosure {
1965 private:
1966 CMBitMapRO* _bitmap;
1967 outputStream* _out;
1968 bool _use_prev_marking;
1970 public:
1971 ReachablePrinterClosure(CMBitMapRO* bitmap,
1972 outputStream* out,
1973 bool use_prev_marking) :
1974 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
1976 bool do_bit(size_t offset) {
1977 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
1978 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
1980 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
1981 oop(addr)->oop_iterate(&oopCl);
1982 _out->print_cr("");
1984 return true;
1985 }
1986 };
1988 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
1989 private:
1990 CMBitMapRO* _bitmap;
1991 outputStream* _out;
1992 bool _use_prev_marking;
1994 public:
1995 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap,
1996 outputStream* out,
1997 bool use_prev_marking) :
1998 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2000 void do_object(oop o) {
2001 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
2003 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2004 o->oop_iterate(&oopCl);
2005 _out->print_cr("");
2006 }
2007 };
2009 class RegionReachablePrinterClosure : public HeapRegionClosure {
2010 private:
2011 CMBitMapRO* _bitmap;
2012 outputStream* _out;
2013 bool _use_prev_marking;
2015 public:
2016 bool doHeapRegion(HeapRegion* hr) {
2017 HeapWord* b = hr->bottom();
2018 HeapWord* e = hr->end();
2019 HeapWord* t = hr->top();
2020 HeapWord* p = NULL;
2021 if (_use_prev_marking) {
2022 p = hr->prev_top_at_mark_start();
2023 } else {
2024 p = hr->next_top_at_mark_start();
2025 }
2026 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2027 "TAMS: "PTR_FORMAT, b, e, t, p);
2028 _out->print_cr("");
2030 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
2031 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2033 return false;
2034 }
2036 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2037 outputStream* out,
2038 bool use_prev_marking) :
2039 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2040 };
2042 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
2043 gclog_or_tty->print_cr("== Doing reachable object dump... ");
2045 if (G1PrintReachableBaseFile == NULL) {
2046 gclog_or_tty->print_cr(" #### error: no base file defined");
2047 return;
2048 }
2050 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2051 (JVM_MAXPATHLEN - 1)) {
2052 gclog_or_tty->print_cr(" #### error: file name too long");
2053 return;
2054 }
2056 char file_name[JVM_MAXPATHLEN];
2057 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2058 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2060 fileStream fout(file_name);
2061 if (!fout.is_open()) {
2062 gclog_or_tty->print_cr(" #### error: could not open file");
2063 return;
2064 }
2066 outputStream* out = &fout;
2068 CMBitMapRO* bitmap = NULL;
2069 if (use_prev_marking) {
2070 bitmap = _prevMarkBitMap;
2071 } else {
2072 bitmap = _nextMarkBitMap;
2073 }
2075 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2076 out->cr();
2078 RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
2079 out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
2080 out->cr();
2081 _g1h->heap_region_iterate(&rcl);
2082 out->cr();
2084 ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
2085 out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
2086 out->cr();
2087 bitmap->iterate(&cl);
2088 out->cr();
2090 gclog_or_tty->print_cr(" done");
2091 }
2093 #endif // PRODUCT
2095 // This note is for drainAllSATBBuffers and the code in between.
2096 // In the future we could reuse a task to do this work during an
2097 // evacuation pause (since now tasks are not active and can be claimed
2098 // during an evacuation pause). This was a late change to the code and
2099 // is currently not being taken advantage of.
2101 class CMGlobalObjectClosure : public ObjectClosure {
2102 private:
2103 ConcurrentMark* _cm;
2105 public:
2106 void do_object(oop obj) {
2107 _cm->deal_with_reference(obj);
2108 }
2110 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2111 };
2113 void ConcurrentMark::deal_with_reference(oop obj) {
2114 if (verbose_high())
2115 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2116 (void*) obj);
2119 HeapWord* objAddr = (HeapWord*) obj;
2120 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2121 if (_g1h->is_in_g1_reserved(objAddr)) {
2122 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2123 HeapRegion* hr = _g1h->heap_region_containing(obj);
2124 if (_g1h->is_obj_ill(obj, hr)) {
2125 if (verbose_high())
2126 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2127 "marked", (void*) obj);
2129 // we need to mark it first
2130 if (_nextMarkBitMap->parMark(objAddr)) {
2131 // No OrderAccess:store_load() is needed. It is implicit in the
2132 // CAS done in parMark(objAddr) above
2133 HeapWord* finger = _finger;
2134 if (objAddr < finger) {
2135 if (verbose_high())
2136 gclog_or_tty->print_cr("[global] below the global finger "
2137 "("PTR_FORMAT"), pushing it", finger);
2138 if (!mark_stack_push(obj)) {
2139 if (verbose_low())
2140 gclog_or_tty->print_cr("[global] global stack overflow during "
2141 "deal_with_reference");
2142 }
2143 }
2144 }
2145 }
2146 }
2147 }
2149 void ConcurrentMark::drainAllSATBBuffers() {
2150 CMGlobalObjectClosure oc(this);
2151 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2152 satb_mq_set.set_closure(&oc);
2154 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2155 if (verbose_medium())
2156 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2157 }
2159 // no need to check whether we should do this, as this is only
2160 // called during an evacuation pause
2161 satb_mq_set.iterate_closure_all_threads();
2163 satb_mq_set.set_closure(NULL);
2164 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2165 }
2167 void ConcurrentMark::markPrev(oop p) {
2168 // Note we are overriding the read-only view of the prev map here, via
2169 // the cast.
2170 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2171 }
2173 void ConcurrentMark::clear(oop p) {
2174 assert(p != NULL && p->is_oop(), "expected an oop");
2175 HeapWord* addr = (HeapWord*)p;
2176 assert(addr >= _nextMarkBitMap->startWord() ||
2177 addr < _nextMarkBitMap->endWord(), "in a region");
2179 _nextMarkBitMap->clear(addr);
2180 }
2182 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2183 // Note we are overriding the read-only view of the prev map here, via
2184 // the cast.
2185 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2186 _nextMarkBitMap->clearRange(mr);
2187 }
2189 HeapRegion*
2190 ConcurrentMark::claim_region(int task_num) {
2191 // "checkpoint" the finger
2192 HeapWord* finger = _finger;
2194 // _heap_end will not change underneath our feet; it only changes at
2195 // yield points.
2196 while (finger < _heap_end) {
2197 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2199 // is the gap between reading the finger and doing the CAS too long?
2201 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2202 HeapWord* bottom = curr_region->bottom();
2203 HeapWord* end = curr_region->end();
2204 HeapWord* limit = curr_region->next_top_at_mark_start();
2206 if (verbose_low())
2207 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2208 "["PTR_FORMAT", "PTR_FORMAT"), "
2209 "limit = "PTR_FORMAT,
2210 task_num, curr_region, bottom, end, limit);
2212 HeapWord* res =
2213 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2214 if (res == finger) {
2215 // we succeeded
2217 // notice that _finger == end cannot be guaranteed here since,
2218 // someone else might have moved the finger even further
2219 assert(_finger >= end, "the finger should have moved forward");
2221 if (verbose_low())
2222 gclog_or_tty->print_cr("[%d] we were successful with region = "
2223 PTR_FORMAT, task_num, curr_region);
2225 if (limit > bottom) {
2226 if (verbose_low())
2227 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2228 "returning it ", task_num, curr_region);
2229 return curr_region;
2230 } else {
2231 assert(limit == bottom,
2232 "the region limit should be at bottom");
2233 if (verbose_low())
2234 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2235 "returning NULL", task_num, curr_region);
2236 // we return NULL and the caller should try calling
2237 // claim_region() again.
2238 return NULL;
2239 }
2240 } else {
2241 assert(_finger > finger, "the finger should have moved forward");
2242 if (verbose_low())
2243 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2244 "global finger = "PTR_FORMAT", "
2245 "our finger = "PTR_FORMAT,
2246 task_num, _finger, finger);
2248 // read it again
2249 finger = _finger;
2250 }
2251 }
2253 return NULL;
2254 }
2256 void ConcurrentMark::oops_do(OopClosure* cl) {
2257 if (_markStack.size() > 0 && verbose_low())
2258 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2259 "size = %d", _markStack.size());
2260 // we first iterate over the contents of the mark stack...
2261 _markStack.oops_do(cl);
2263 for (int i = 0; i < (int)_max_task_num; ++i) {
2264 OopTaskQueue* queue = _task_queues->queue((int)i);
2266 if (queue->size() > 0 && verbose_low())
2267 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2268 "size = %d", i, queue->size());
2270 // ...then over the contents of the all the task queues.
2271 queue->oops_do(cl);
2272 }
2274 // finally, invalidate any entries that in the region stack that
2275 // point into the collection set
2276 if (_regionStack.invalidate_entries_into_cset()) {
2277 // otherwise, any gray objects copied during the evacuation pause
2278 // might not be visited.
2279 assert(_should_gray_objects, "invariant");
2280 }
2281 }
2283 void ConcurrentMark::clear_marking_state() {
2284 _markStack.setEmpty();
2285 _markStack.clear_overflow();
2286 _regionStack.setEmpty();
2287 _regionStack.clear_overflow();
2288 clear_has_overflown();
2289 _finger = _heap_start;
2291 for (int i = 0; i < (int)_max_task_num; ++i) {
2292 OopTaskQueue* queue = _task_queues->queue(i);
2293 queue->set_empty();
2294 }
2295 }
2297 void ConcurrentMark::print_stats() {
2298 if (verbose_stats()) {
2299 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2300 for (size_t i = 0; i < _active_tasks; ++i) {
2301 _tasks[i]->print_stats();
2302 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2303 }
2304 }
2305 }
2307 class CSMarkOopClosure: public OopClosure {
2308 friend class CSMarkBitMapClosure;
2310 G1CollectedHeap* _g1h;
2311 CMBitMap* _bm;
2312 ConcurrentMark* _cm;
2313 oop* _ms;
2314 jint* _array_ind_stack;
2315 int _ms_size;
2316 int _ms_ind;
2317 int _array_increment;
2319 bool push(oop obj, int arr_ind = 0) {
2320 if (_ms_ind == _ms_size) {
2321 gclog_or_tty->print_cr("Mark stack is full.");
2322 return false;
2323 }
2324 _ms[_ms_ind] = obj;
2325 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2326 _ms_ind++;
2327 return true;
2328 }
2330 oop pop() {
2331 if (_ms_ind == 0) return NULL;
2332 else {
2333 _ms_ind--;
2334 return _ms[_ms_ind];
2335 }
2336 }
2338 template <class T> bool drain() {
2339 while (_ms_ind > 0) {
2340 oop obj = pop();
2341 assert(obj != NULL, "Since index was non-zero.");
2342 if (obj->is_objArray()) {
2343 jint arr_ind = _array_ind_stack[_ms_ind];
2344 objArrayOop aobj = objArrayOop(obj);
2345 jint len = aobj->length();
2346 jint next_arr_ind = arr_ind + _array_increment;
2347 if (next_arr_ind < len) {
2348 push(obj, next_arr_ind);
2349 }
2350 // Now process this portion of this one.
2351 int lim = MIN2(next_arr_ind, len);
2352 for (int j = arr_ind; j < lim; j++) {
2353 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2354 }
2356 } else {
2357 obj->oop_iterate(this);
2358 }
2359 if (abort()) return false;
2360 }
2361 return true;
2362 }
2364 public:
2365 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2366 _g1h(G1CollectedHeap::heap()),
2367 _cm(cm),
2368 _bm(cm->nextMarkBitMap()),
2369 _ms_size(ms_size), _ms_ind(0),
2370 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2371 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2372 _array_increment(MAX2(ms_size/8, 16))
2373 {}
2375 ~CSMarkOopClosure() {
2376 FREE_C_HEAP_ARRAY(oop, _ms);
2377 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2378 }
2380 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2381 virtual void do_oop( oop* p) { do_oop_work(p); }
2383 template <class T> void do_oop_work(T* p) {
2384 T heap_oop = oopDesc::load_heap_oop(p);
2385 if (oopDesc::is_null(heap_oop)) return;
2386 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2387 if (obj->is_forwarded()) {
2388 // If the object has already been forwarded, we have to make sure
2389 // that it's marked. So follow the forwarding pointer. Note that
2390 // this does the right thing for self-forwarding pointers in the
2391 // evacuation failure case.
2392 obj = obj->forwardee();
2393 }
2394 HeapRegion* hr = _g1h->heap_region_containing(obj);
2395 if (hr != NULL) {
2396 if (hr->in_collection_set()) {
2397 if (_g1h->is_obj_ill(obj)) {
2398 _bm->mark((HeapWord*)obj);
2399 if (!push(obj)) {
2400 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2401 set_abort();
2402 }
2403 }
2404 } else {
2405 // Outside the collection set; we need to gray it
2406 _cm->deal_with_reference(obj);
2407 }
2408 }
2409 }
2410 };
2412 class CSMarkBitMapClosure: public BitMapClosure {
2413 G1CollectedHeap* _g1h;
2414 CMBitMap* _bitMap;
2415 ConcurrentMark* _cm;
2416 CSMarkOopClosure _oop_cl;
2417 public:
2418 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2419 _g1h(G1CollectedHeap::heap()),
2420 _bitMap(cm->nextMarkBitMap()),
2421 _oop_cl(cm, ms_size)
2422 {}
2424 ~CSMarkBitMapClosure() {}
2426 bool do_bit(size_t offset) {
2427 // convert offset into a HeapWord*
2428 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2429 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2430 "address out of range");
2431 assert(_bitMap->isMarked(addr), "tautology");
2432 oop obj = oop(addr);
2433 if (!obj->is_forwarded()) {
2434 if (!_oop_cl.push(obj)) return false;
2435 if (UseCompressedOops) {
2436 if (!_oop_cl.drain<narrowOop>()) return false;
2437 } else {
2438 if (!_oop_cl.drain<oop>()) return false;
2439 }
2440 }
2441 // Otherwise...
2442 return true;
2443 }
2444 };
2447 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2448 CMBitMap* _bm;
2449 CSMarkBitMapClosure _bit_cl;
2450 enum SomePrivateConstants {
2451 MSSize = 1000
2452 };
2453 bool _completed;
2454 public:
2455 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2456 _bm(cm->nextMarkBitMap()),
2457 _bit_cl(cm, MSSize),
2458 _completed(true)
2459 {}
2461 ~CompleteMarkingInCSHRClosure() {}
2463 bool doHeapRegion(HeapRegion* r) {
2464 if (!r->evacuation_failed()) {
2465 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2466 if (!mr.is_empty()) {
2467 if (!_bm->iterate(&_bit_cl, mr)) {
2468 _completed = false;
2469 return true;
2470 }
2471 }
2472 }
2473 return false;
2474 }
2476 bool completed() { return _completed; }
2477 };
2479 class ClearMarksInHRClosure: public HeapRegionClosure {
2480 CMBitMap* _bm;
2481 public:
2482 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2484 bool doHeapRegion(HeapRegion* r) {
2485 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2486 MemRegion usedMR = r->used_region();
2487 _bm->clearRange(r->used_region());
2488 }
2489 return false;
2490 }
2491 };
2493 void ConcurrentMark::complete_marking_in_collection_set() {
2494 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2496 if (!g1h->mark_in_progress()) {
2497 g1h->g1_policy()->record_mark_closure_time(0.0);
2498 return;
2499 }
2501 int i = 1;
2502 double start = os::elapsedTime();
2503 while (true) {
2504 i++;
2505 CompleteMarkingInCSHRClosure cmplt(this);
2506 g1h->collection_set_iterate(&cmplt);
2507 if (cmplt.completed()) break;
2508 }
2509 double end_time = os::elapsedTime();
2510 double elapsed_time_ms = (end_time - start) * 1000.0;
2511 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2512 if (PrintGCDetails) {
2513 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2514 }
2516 ClearMarksInHRClosure clr(nextMarkBitMap());
2517 g1h->collection_set_iterate(&clr);
2518 }
2520 // The next two methods deal with the following optimisation. Some
2521 // objects are gray by being marked and located above the finger. If
2522 // they are copied, during an evacuation pause, below the finger then
2523 // the need to be pushed on the stack. The observation is that, if
2524 // there are no regions in the collection set located above the
2525 // finger, then the above cannot happen, hence we do not need to
2526 // explicitly gray any objects when copying them to below the
2527 // finger. The global stack will be scanned to ensure that, if it
2528 // points to objects being copied, it will update their
2529 // location. There is a tricky situation with the gray objects in
2530 // region stack that are being coped, however. See the comment in
2531 // newCSet().
2533 void ConcurrentMark::newCSet() {
2534 if (!concurrent_marking_in_progress())
2535 // nothing to do if marking is not in progress
2536 return;
2538 // find what the lowest finger is among the global and local fingers
2539 _min_finger = _finger;
2540 for (int i = 0; i < (int)_max_task_num; ++i) {
2541 CMTask* task = _tasks[i];
2542 HeapWord* task_finger = task->finger();
2543 if (task_finger != NULL && task_finger < _min_finger)
2544 _min_finger = task_finger;
2545 }
2547 _should_gray_objects = false;
2549 // This fixes a very subtle and fustrating bug. It might be the case
2550 // that, during en evacuation pause, heap regions that contain
2551 // objects that are gray (by being in regions contained in the
2552 // region stack) are included in the collection set. Since such gray
2553 // objects will be moved, and because it's not easy to redirect
2554 // region stack entries to point to a new location (because objects
2555 // in one region might be scattered to multiple regions after they
2556 // are copied), one option is to ensure that all marked objects
2557 // copied during a pause are pushed on the stack. Notice, however,
2558 // that this problem can only happen when the region stack is not
2559 // empty during an evacuation pause. So, we make the fix a bit less
2560 // conservative and ensure that regions are pushed on the stack,
2561 // irrespective whether all collection set regions are below the
2562 // finger, if the region stack is not empty. This is expected to be
2563 // a rare case, so I don't think it's necessary to be smarted about it.
2564 if (!region_stack_empty())
2565 _should_gray_objects = true;
2566 }
2568 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2569 if (!concurrent_marking_in_progress())
2570 return;
2572 HeapWord* region_end = hr->end();
2573 if (region_end > _min_finger)
2574 _should_gray_objects = true;
2575 }
2577 // abandon current marking iteration due to a Full GC
2578 void ConcurrentMark::abort() {
2579 // Clear all marks to force marking thread to do nothing
2580 _nextMarkBitMap->clearAll();
2581 // Empty mark stack
2582 clear_marking_state();
2583 for (int i = 0; i < (int)_max_task_num; ++i)
2584 _tasks[i]->clear_region_fields();
2585 _has_aborted = true;
2587 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2588 satb_mq_set.abandon_partial_marking();
2589 satb_mq_set.set_active_all_threads(false);
2590 }
2592 static void print_ms_time_info(const char* prefix, const char* name,
2593 NumberSeq& ns) {
2594 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2595 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2596 if (ns.num() > 0) {
2597 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2598 prefix, ns.sd(), ns.maximum());
2599 }
2600 }
2602 void ConcurrentMark::print_summary_info() {
2603 gclog_or_tty->print_cr(" Concurrent marking:");
2604 print_ms_time_info(" ", "init marks", _init_times);
2605 print_ms_time_info(" ", "remarks", _remark_times);
2606 {
2607 print_ms_time_info(" ", "final marks", _remark_mark_times);
2608 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2610 }
2611 print_ms_time_info(" ", "cleanups", _cleanup_times);
2612 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2613 _total_counting_time,
2614 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2615 (double)_cleanup_times.num()
2616 : 0.0));
2617 if (G1ScrubRemSets) {
2618 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2619 _total_rs_scrub_time,
2620 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2621 (double)_cleanup_times.num()
2622 : 0.0));
2623 }
2624 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2625 (_init_times.sum() + _remark_times.sum() +
2626 _cleanup_times.sum())/1000.0);
2627 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2628 "(%8.2f s marking, %8.2f s counting).",
2629 cmThread()->vtime_accum(),
2630 cmThread()->vtime_mark_accum(),
2631 cmThread()->vtime_count_accum());
2632 }
2634 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2635 _parallel_workers->print_worker_threads_on(st);
2636 }
2638 // Closures
2639 // XXX: there seems to be a lot of code duplication here;
2640 // should refactor and consolidate the shared code.
2642 // This closure is used to mark refs into the CMS generation in
2643 // the CMS bit map. Called at the first checkpoint.
2645 // We take a break if someone is trying to stop the world.
2646 bool ConcurrentMark::do_yield_check(int worker_i) {
2647 if (should_yield()) {
2648 if (worker_i == 0)
2649 _g1h->g1_policy()->record_concurrent_pause();
2650 cmThread()->yield();
2651 if (worker_i == 0)
2652 _g1h->g1_policy()->record_concurrent_pause_end();
2653 return true;
2654 } else {
2655 return false;
2656 }
2657 }
2659 bool ConcurrentMark::should_yield() {
2660 return cmThread()->should_yield();
2661 }
2663 bool ConcurrentMark::containing_card_is_marked(void* p) {
2664 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2665 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2666 }
2668 bool ConcurrentMark::containing_cards_are_marked(void* start,
2669 void* last) {
2670 return
2671 containing_card_is_marked(start) &&
2672 containing_card_is_marked(last);
2673 }
2675 #ifndef PRODUCT
2676 // for debugging purposes
2677 void ConcurrentMark::print_finger() {
2678 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2679 _heap_start, _heap_end, _finger);
2680 for (int i = 0; i < (int) _max_task_num; ++i) {
2681 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2682 }
2683 gclog_or_tty->print_cr("");
2684 }
2685 #endif
2687 // Closure for iteration over bitmaps
2688 class CMBitMapClosure : public BitMapClosure {
2689 private:
2690 // the bitmap that is being iterated over
2691 CMBitMap* _nextMarkBitMap;
2692 ConcurrentMark* _cm;
2693 CMTask* _task;
2694 // true if we're scanning a heap region claimed by the task (so that
2695 // we move the finger along), false if we're not, i.e. currently when
2696 // scanning a heap region popped from the region stack (so that we
2697 // do not move the task finger along; it'd be a mistake if we did so).
2698 bool _scanning_heap_region;
2700 public:
2701 CMBitMapClosure(CMTask *task,
2702 ConcurrentMark* cm,
2703 CMBitMap* nextMarkBitMap)
2704 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2706 void set_scanning_heap_region(bool scanning_heap_region) {
2707 _scanning_heap_region = scanning_heap_region;
2708 }
2710 bool do_bit(size_t offset) {
2711 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2712 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2713 assert( addr < _cm->finger(), "invariant");
2715 if (_scanning_heap_region) {
2716 statsOnly( _task->increase_objs_found_on_bitmap() );
2717 assert(addr >= _task->finger(), "invariant");
2718 // We move that task's local finger along.
2719 _task->move_finger_to(addr);
2720 } else {
2721 // We move the task's region finger along.
2722 _task->move_region_finger_to(addr);
2723 }
2725 _task->scan_object(oop(addr));
2726 // we only partially drain the local queue and global stack
2727 _task->drain_local_queue(true);
2728 _task->drain_global_stack(true);
2730 // if the has_aborted flag has been raised, we need to bail out of
2731 // the iteration
2732 return !_task->has_aborted();
2733 }
2734 };
2736 // Closure for iterating over objects, currently only used for
2737 // processing SATB buffers.
2738 class CMObjectClosure : public ObjectClosure {
2739 private:
2740 CMTask* _task;
2742 public:
2743 void do_object(oop obj) {
2744 _task->deal_with_reference(obj);
2745 }
2747 CMObjectClosure(CMTask* task) : _task(task) { }
2748 };
2750 // Closure for iterating over object fields
2751 class CMOopClosure : public OopClosure {
2752 private:
2753 G1CollectedHeap* _g1h;
2754 ConcurrentMark* _cm;
2755 CMTask* _task;
2757 public:
2758 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2759 virtual void do_oop( oop* p) { do_oop_work(p); }
2761 template <class T> void do_oop_work(T* p) {
2762 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2763 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2764 "invariant");
2766 oop obj = oopDesc::load_decode_heap_oop(p);
2767 if (_cm->verbose_high())
2768 gclog_or_tty->print_cr("[%d] we're looking at location "
2769 "*"PTR_FORMAT" = "PTR_FORMAT,
2770 _task->task_id(), p, (void*) obj);
2771 _task->deal_with_reference(obj);
2772 }
2774 CMOopClosure(G1CollectedHeap* g1h,
2775 ConcurrentMark* cm,
2776 CMTask* task)
2777 : _g1h(g1h), _cm(cm), _task(task) { }
2778 };
2780 void CMTask::setup_for_region(HeapRegion* hr) {
2781 // Separated the asserts so that we know which one fires.
2782 assert(hr != NULL,
2783 "claim_region() should have filtered out continues humongous regions");
2784 assert(!hr->continuesHumongous(),
2785 "claim_region() should have filtered out continues humongous regions");
2787 if (_cm->verbose_low())
2788 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2789 _task_id, hr);
2791 _curr_region = hr;
2792 _finger = hr->bottom();
2793 update_region_limit();
2794 }
2796 void CMTask::update_region_limit() {
2797 HeapRegion* hr = _curr_region;
2798 HeapWord* bottom = hr->bottom();
2799 HeapWord* limit = hr->next_top_at_mark_start();
2801 if (limit == bottom) {
2802 if (_cm->verbose_low())
2803 gclog_or_tty->print_cr("[%d] found an empty region "
2804 "["PTR_FORMAT", "PTR_FORMAT")",
2805 _task_id, bottom, limit);
2806 // The region was collected underneath our feet.
2807 // We set the finger to bottom to ensure that the bitmap
2808 // iteration that will follow this will not do anything.
2809 // (this is not a condition that holds when we set the region up,
2810 // as the region is not supposed to be empty in the first place)
2811 _finger = bottom;
2812 } else if (limit >= _region_limit) {
2813 assert(limit >= _finger, "peace of mind");
2814 } else {
2815 assert(limit < _region_limit, "only way to get here");
2816 // This can happen under some pretty unusual circumstances. An
2817 // evacuation pause empties the region underneath our feet (NTAMS
2818 // at bottom). We then do some allocation in the region (NTAMS
2819 // stays at bottom), followed by the region being used as a GC
2820 // alloc region (NTAMS will move to top() and the objects
2821 // originally below it will be grayed). All objects now marked in
2822 // the region are explicitly grayed, if below the global finger,
2823 // and we do not need in fact to scan anything else. So, we simply
2824 // set _finger to be limit to ensure that the bitmap iteration
2825 // doesn't do anything.
2826 _finger = limit;
2827 }
2829 _region_limit = limit;
2830 }
2832 void CMTask::giveup_current_region() {
2833 assert(_curr_region != NULL, "invariant");
2834 if (_cm->verbose_low())
2835 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2836 _task_id, _curr_region);
2837 clear_region_fields();
2838 }
2840 void CMTask::clear_region_fields() {
2841 // Values for these three fields that indicate that we're not
2842 // holding on to a region.
2843 _curr_region = NULL;
2844 _finger = NULL;
2845 _region_limit = NULL;
2847 _region_finger = NULL;
2848 }
2850 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2851 guarantee(nextMarkBitMap != NULL, "invariant");
2853 if (_cm->verbose_low())
2854 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2856 _nextMarkBitMap = nextMarkBitMap;
2857 clear_region_fields();
2859 _calls = 0;
2860 _elapsed_time_ms = 0.0;
2861 _termination_time_ms = 0.0;
2862 _termination_start_time_ms = 0.0;
2864 #if _MARKING_STATS_
2865 _local_pushes = 0;
2866 _local_pops = 0;
2867 _local_max_size = 0;
2868 _objs_scanned = 0;
2869 _global_pushes = 0;
2870 _global_pops = 0;
2871 _global_max_size = 0;
2872 _global_transfers_to = 0;
2873 _global_transfers_from = 0;
2874 _region_stack_pops = 0;
2875 _regions_claimed = 0;
2876 _objs_found_on_bitmap = 0;
2877 _satb_buffers_processed = 0;
2878 _steal_attempts = 0;
2879 _steals = 0;
2880 _aborted = 0;
2881 _aborted_overflow = 0;
2882 _aborted_cm_aborted = 0;
2883 _aborted_yield = 0;
2884 _aborted_timed_out = 0;
2885 _aborted_satb = 0;
2886 _aborted_termination = 0;
2887 #endif // _MARKING_STATS_
2888 }
2890 bool CMTask::should_exit_termination() {
2891 regular_clock_call();
2892 // This is called when we are in the termination protocol. We should
2893 // quit if, for some reason, this task wants to abort or the global
2894 // stack is not empty (this means that we can get work from it).
2895 return !_cm->mark_stack_empty() || has_aborted();
2896 }
2898 // This determines whether the method below will check both the local
2899 // and global fingers when determining whether to push on the stack a
2900 // gray object (value 1) or whether it will only check the global one
2901 // (value 0). The tradeoffs are that the former will be a bit more
2902 // accurate and possibly push less on the stack, but it might also be
2903 // a little bit slower.
2905 #define _CHECK_BOTH_FINGERS_ 1
2907 void CMTask::deal_with_reference(oop obj) {
2908 if (_cm->verbose_high())
2909 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2910 _task_id, (void*) obj);
2912 ++_refs_reached;
2914 HeapWord* objAddr = (HeapWord*) obj;
2915 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2916 if (_g1h->is_in_g1_reserved(objAddr)) {
2917 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2918 HeapRegion* hr = _g1h->heap_region_containing(obj);
2919 if (_g1h->is_obj_ill(obj, hr)) {
2920 if (_cm->verbose_high())
2921 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2922 _task_id, (void*) obj);
2924 // we need to mark it first
2925 if (_nextMarkBitMap->parMark(objAddr)) {
2926 // No OrderAccess:store_load() is needed. It is implicit in the
2927 // CAS done in parMark(objAddr) above
2928 HeapWord* global_finger = _cm->finger();
2930 #if _CHECK_BOTH_FINGERS_
2931 // we will check both the local and global fingers
2933 if (_finger != NULL && objAddr < _finger) {
2934 if (_cm->verbose_high())
2935 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2936 "pushing it", _task_id, _finger);
2937 push(obj);
2938 } else if (_curr_region != NULL && objAddr < _region_limit) {
2939 // do nothing
2940 } else if (objAddr < global_finger) {
2941 // Notice that the global finger might be moving forward
2942 // concurrently. This is not a problem. In the worst case, we
2943 // mark the object while it is above the global finger and, by
2944 // the time we read the global finger, it has moved forward
2945 // passed this object. In this case, the object will probably
2946 // be visited when a task is scanning the region and will also
2947 // be pushed on the stack. So, some duplicate work, but no
2948 // correctness problems.
2950 if (_cm->verbose_high())
2951 gclog_or_tty->print_cr("[%d] below the global finger "
2952 "("PTR_FORMAT"), pushing it",
2953 _task_id, global_finger);
2954 push(obj);
2955 } else {
2956 // do nothing
2957 }
2958 #else // _CHECK_BOTH_FINGERS_
2959 // we will only check the global finger
2961 if (objAddr < global_finger) {
2962 // see long comment above
2964 if (_cm->verbose_high())
2965 gclog_or_tty->print_cr("[%d] below the global finger "
2966 "("PTR_FORMAT"), pushing it",
2967 _task_id, global_finger);
2968 push(obj);
2969 }
2970 #endif // _CHECK_BOTH_FINGERS_
2971 }
2972 }
2973 }
2974 }
2976 void CMTask::push(oop obj) {
2977 HeapWord* objAddr = (HeapWord*) obj;
2978 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
2979 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
2980 "invariant");
2981 assert(!_g1h->is_obj_ill(obj), "invariant");
2982 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
2984 if (_cm->verbose_high())
2985 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2987 if (!_task_queue->push(obj)) {
2988 // The local task queue looks full. We need to push some entries
2989 // to the global stack.
2991 if (_cm->verbose_medium())
2992 gclog_or_tty->print_cr("[%d] task queue overflow, "
2993 "moving entries to the global stack",
2994 _task_id);
2995 move_entries_to_global_stack();
2997 // this should succeed since, even if we overflow the global
2998 // stack, we should have definitely removed some entries from the
2999 // local queue. So, there must be space on it.
3000 bool success = _task_queue->push(obj);
3001 assert(success, "invariant");
3002 }
3004 statsOnly( int tmp_size = _task_queue->size();
3005 if (tmp_size > _local_max_size)
3006 _local_max_size = tmp_size;
3007 ++_local_pushes );
3008 }
3010 void CMTask::reached_limit() {
3011 assert(_words_scanned >= _words_scanned_limit ||
3012 _refs_reached >= _refs_reached_limit ,
3013 "shouldn't have been called otherwise");
3014 regular_clock_call();
3015 }
3017 void CMTask::regular_clock_call() {
3018 if (has_aborted())
3019 return;
3021 // First, we need to recalculate the words scanned and refs reached
3022 // limits for the next clock call.
3023 recalculate_limits();
3025 // During the regular clock call we do the following
3027 // (1) If an overflow has been flagged, then we abort.
3028 if (_cm->has_overflown()) {
3029 set_has_aborted();
3030 return;
3031 }
3033 // If we are not concurrent (i.e. we're doing remark) we don't need
3034 // to check anything else. The other steps are only needed during
3035 // the concurrent marking phase.
3036 if (!concurrent())
3037 return;
3039 // (2) If marking has been aborted for Full GC, then we also abort.
3040 if (_cm->has_aborted()) {
3041 set_has_aborted();
3042 statsOnly( ++_aborted_cm_aborted );
3043 return;
3044 }
3046 double curr_time_ms = os::elapsedVTime() * 1000.0;
3048 // (3) If marking stats are enabled, then we update the step history.
3049 #if _MARKING_STATS_
3050 if (_words_scanned >= _words_scanned_limit)
3051 ++_clock_due_to_scanning;
3052 if (_refs_reached >= _refs_reached_limit)
3053 ++_clock_due_to_marking;
3055 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3056 _interval_start_time_ms = curr_time_ms;
3057 _all_clock_intervals_ms.add(last_interval_ms);
3059 if (_cm->verbose_medium()) {
3060 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3061 "scanned = %d%s, refs reached = %d%s",
3062 _task_id, last_interval_ms,
3063 _words_scanned,
3064 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3065 _refs_reached,
3066 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3067 }
3068 #endif // _MARKING_STATS_
3070 // (4) We check whether we should yield. If we have to, then we abort.
3071 if (_cm->should_yield()) {
3072 // We should yield. To do this we abort the task. The caller is
3073 // responsible for yielding.
3074 set_has_aborted();
3075 statsOnly( ++_aborted_yield );
3076 return;
3077 }
3079 // (5) We check whether we've reached our time quota. If we have,
3080 // then we abort.
3081 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3082 if (elapsed_time_ms > _time_target_ms) {
3083 set_has_aborted();
3084 _has_aborted_timed_out = true;
3085 statsOnly( ++_aborted_timed_out );
3086 return;
3087 }
3089 // (6) Finally, we check whether there are enough completed STAB
3090 // buffers available for processing. If there are, we abort.
3091 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3092 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3093 if (_cm->verbose_low())
3094 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3095 _task_id);
3096 // we do need to process SATB buffers, we'll abort and restart
3097 // the marking task to do so
3098 set_has_aborted();
3099 statsOnly( ++_aborted_satb );
3100 return;
3101 }
3102 }
3104 void CMTask::recalculate_limits() {
3105 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3106 _words_scanned_limit = _real_words_scanned_limit;
3108 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3109 _refs_reached_limit = _real_refs_reached_limit;
3110 }
3112 void CMTask::decrease_limits() {
3113 // This is called when we believe that we're going to do an infrequent
3114 // operation which will increase the per byte scanned cost (i.e. move
3115 // entries to/from the global stack). It basically tries to decrease the
3116 // scanning limit so that the clock is called earlier.
3118 if (_cm->verbose_medium())
3119 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3121 _words_scanned_limit = _real_words_scanned_limit -
3122 3 * words_scanned_period / 4;
3123 _refs_reached_limit = _real_refs_reached_limit -
3124 3 * refs_reached_period / 4;
3125 }
3127 void CMTask::move_entries_to_global_stack() {
3128 // local array where we'll store the entries that will be popped
3129 // from the local queue
3130 oop buffer[global_stack_transfer_size];
3132 int n = 0;
3133 oop obj;
3134 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3135 buffer[n] = obj;
3136 ++n;
3137 }
3139 if (n > 0) {
3140 // we popped at least one entry from the local queue
3142 statsOnly( ++_global_transfers_to; _local_pops += n );
3144 if (!_cm->mark_stack_push(buffer, n)) {
3145 if (_cm->verbose_low())
3146 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3147 set_has_aborted();
3148 } else {
3149 // the transfer was successful
3151 if (_cm->verbose_medium())
3152 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3153 _task_id, n);
3154 statsOnly( int tmp_size = _cm->mark_stack_size();
3155 if (tmp_size > _global_max_size)
3156 _global_max_size = tmp_size;
3157 _global_pushes += n );
3158 }
3159 }
3161 // this operation was quite expensive, so decrease the limits
3162 decrease_limits();
3163 }
3165 void CMTask::get_entries_from_global_stack() {
3166 // local array where we'll store the entries that will be popped
3167 // from the global stack.
3168 oop buffer[global_stack_transfer_size];
3169 int n;
3170 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3171 assert(n <= global_stack_transfer_size,
3172 "we should not pop more than the given limit");
3173 if (n > 0) {
3174 // yes, we did actually pop at least one entry
3176 statsOnly( ++_global_transfers_from; _global_pops += n );
3177 if (_cm->verbose_medium())
3178 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3179 _task_id, n);
3180 for (int i = 0; i < n; ++i) {
3181 bool success = _task_queue->push(buffer[i]);
3182 // We only call this when the local queue is empty or under a
3183 // given target limit. So, we do not expect this push to fail.
3184 assert(success, "invariant");
3185 }
3187 statsOnly( int tmp_size = _task_queue->size();
3188 if (tmp_size > _local_max_size)
3189 _local_max_size = tmp_size;
3190 _local_pushes += n );
3191 }
3193 // this operation was quite expensive, so decrease the limits
3194 decrease_limits();
3195 }
3197 void CMTask::drain_local_queue(bool partially) {
3198 if (has_aborted())
3199 return;
3201 // Decide what the target size is, depending whether we're going to
3202 // drain it partially (so that other tasks can steal if they run out
3203 // of things to do) or totally (at the very end).
3204 size_t target_size;
3205 if (partially)
3206 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3207 else
3208 target_size = 0;
3210 if (_task_queue->size() > target_size) {
3211 if (_cm->verbose_high())
3212 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3213 _task_id, target_size);
3215 oop obj;
3216 bool ret = _task_queue->pop_local(obj);
3217 while (ret) {
3218 statsOnly( ++_local_pops );
3220 if (_cm->verbose_high())
3221 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3222 (void*) obj);
3224 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3225 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3226 "invariant");
3228 scan_object(obj);
3230 if (_task_queue->size() <= target_size || has_aborted())
3231 ret = false;
3232 else
3233 ret = _task_queue->pop_local(obj);
3234 }
3236 if (_cm->verbose_high())
3237 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3238 _task_id, _task_queue->size());
3239 }
3240 }
3242 void CMTask::drain_global_stack(bool partially) {
3243 if (has_aborted())
3244 return;
3246 // We have a policy to drain the local queue before we attempt to
3247 // drain the global stack.
3248 assert(partially || _task_queue->size() == 0, "invariant");
3250 // Decide what the target size is, depending whether we're going to
3251 // drain it partially (so that other tasks can steal if they run out
3252 // of things to do) or totally (at the very end). Notice that,
3253 // because we move entries from the global stack in chunks or
3254 // because another task might be doing the same, we might in fact
3255 // drop below the target. But, this is not a problem.
3256 size_t target_size;
3257 if (partially)
3258 target_size = _cm->partial_mark_stack_size_target();
3259 else
3260 target_size = 0;
3262 if (_cm->mark_stack_size() > target_size) {
3263 if (_cm->verbose_low())
3264 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3265 _task_id, target_size);
3267 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3268 get_entries_from_global_stack();
3269 drain_local_queue(partially);
3270 }
3272 if (_cm->verbose_low())
3273 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3274 _task_id, _cm->mark_stack_size());
3275 }
3276 }
3278 // SATB Queue has several assumptions on whether to call the par or
3279 // non-par versions of the methods. this is why some of the code is
3280 // replicated. We should really get rid of the single-threaded version
3281 // of the code to simplify things.
3282 void CMTask::drain_satb_buffers() {
3283 if (has_aborted())
3284 return;
3286 // We set this so that the regular clock knows that we're in the
3287 // middle of draining buffers and doesn't set the abort flag when it
3288 // notices that SATB buffers are available for draining. It'd be
3289 // very counter productive if it did that. :-)
3290 _draining_satb_buffers = true;
3292 CMObjectClosure oc(this);
3293 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3294 if (ParallelGCThreads > 0)
3295 satb_mq_set.set_par_closure(_task_id, &oc);
3296 else
3297 satb_mq_set.set_closure(&oc);
3299 // This keeps claiming and applying the closure to completed buffers
3300 // until we run out of buffers or we need to abort.
3301 if (ParallelGCThreads > 0) {
3302 while (!has_aborted() &&
3303 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3304 if (_cm->verbose_medium())
3305 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3306 statsOnly( ++_satb_buffers_processed );
3307 regular_clock_call();
3308 }
3309 } else {
3310 while (!has_aborted() &&
3311 satb_mq_set.apply_closure_to_completed_buffer()) {
3312 if (_cm->verbose_medium())
3313 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3314 statsOnly( ++_satb_buffers_processed );
3315 regular_clock_call();
3316 }
3317 }
3319 if (!concurrent() && !has_aborted()) {
3320 // We should only do this during remark.
3321 if (ParallelGCThreads > 0)
3322 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3323 else
3324 satb_mq_set.iterate_closure_all_threads();
3325 }
3327 _draining_satb_buffers = false;
3329 assert(has_aborted() ||
3330 concurrent() ||
3331 satb_mq_set.completed_buffers_num() == 0, "invariant");
3333 if (ParallelGCThreads > 0)
3334 satb_mq_set.set_par_closure(_task_id, NULL);
3335 else
3336 satb_mq_set.set_closure(NULL);
3338 // again, this was a potentially expensive operation, decrease the
3339 // limits to get the regular clock call early
3340 decrease_limits();
3341 }
3343 void CMTask::drain_region_stack(BitMapClosure* bc) {
3344 if (has_aborted())
3345 return;
3347 assert(_region_finger == NULL,
3348 "it should be NULL when we're not scanning a region");
3350 if (!_cm->region_stack_empty()) {
3351 if (_cm->verbose_low())
3352 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3353 _task_id, _cm->region_stack_size());
3355 MemRegion mr = _cm->region_stack_pop();
3356 // it returns MemRegion() if the pop fails
3357 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3359 while (mr.start() != NULL) {
3360 if (_cm->verbose_medium())
3361 gclog_or_tty->print_cr("[%d] we are scanning region "
3362 "["PTR_FORMAT", "PTR_FORMAT")",
3363 _task_id, mr.start(), mr.end());
3364 assert(mr.end() <= _cm->finger(),
3365 "otherwise the region shouldn't be on the stack");
3366 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3367 if (_nextMarkBitMap->iterate(bc, mr)) {
3368 assert(!has_aborted(),
3369 "cannot abort the task without aborting the bitmap iteration");
3371 // We finished iterating over the region without aborting.
3372 regular_clock_call();
3373 if (has_aborted())
3374 mr = MemRegion();
3375 else {
3376 mr = _cm->region_stack_pop();
3377 // it returns MemRegion() if the pop fails
3378 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3379 }
3380 } else {
3381 assert(has_aborted(), "currently the only way to do so");
3383 // The only way to abort the bitmap iteration is to return
3384 // false from the do_bit() method. However, inside the
3385 // do_bit() method we move the _region_finger to point to the
3386 // object currently being looked at. So, if we bail out, we
3387 // have definitely set _region_finger to something non-null.
3388 assert(_region_finger != NULL, "invariant");
3390 // The iteration was actually aborted. So now _region_finger
3391 // points to the address of the object we last scanned. If we
3392 // leave it there, when we restart this task, we will rescan
3393 // the object. It is easy to avoid this. We move the finger by
3394 // enough to point to the next possible object header (the
3395 // bitmap knows by how much we need to move it as it knows its
3396 // granularity).
3397 MemRegion newRegion =
3398 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3400 if (!newRegion.is_empty()) {
3401 if (_cm->verbose_low()) {
3402 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3403 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3404 _task_id,
3405 newRegion.start(), newRegion.end());
3406 }
3407 // Now push the part of the region we didn't scan on the
3408 // region stack to make sure a task scans it later.
3409 _cm->region_stack_push(newRegion);
3410 }
3411 // break from while
3412 mr = MemRegion();
3413 }
3414 _region_finger = NULL;
3415 }
3417 if (_cm->verbose_low())
3418 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3419 _task_id, _cm->region_stack_size());
3420 }
3421 }
3423 void CMTask::print_stats() {
3424 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3425 _task_id, _calls);
3426 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3427 _elapsed_time_ms, _termination_time_ms);
3428 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3429 _step_times_ms.num(), _step_times_ms.avg(),
3430 _step_times_ms.sd());
3431 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3432 _step_times_ms.maximum(), _step_times_ms.sum());
3434 #if _MARKING_STATS_
3435 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3436 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3437 _all_clock_intervals_ms.sd());
3438 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3439 _all_clock_intervals_ms.maximum(),
3440 _all_clock_intervals_ms.sum());
3441 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3442 _clock_due_to_scanning, _clock_due_to_marking);
3443 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3444 _objs_scanned, _objs_found_on_bitmap);
3445 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3446 _local_pushes, _local_pops, _local_max_size);
3447 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3448 _global_pushes, _global_pops, _global_max_size);
3449 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3450 _global_transfers_to,_global_transfers_from);
3451 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3452 _regions_claimed, _region_stack_pops);
3453 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3454 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3455 _steal_attempts, _steals);
3456 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3457 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3458 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3459 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3460 _aborted_timed_out, _aborted_satb, _aborted_termination);
3461 #endif // _MARKING_STATS_
3462 }
3464 /*****************************************************************************
3466 The do_marking_step(time_target_ms) method is the building block
3467 of the parallel marking framework. It can be called in parallel
3468 with other invocations of do_marking_step() on different tasks
3469 (but only one per task, obviously) and concurrently with the
3470 mutator threads, or during remark, hence it eliminates the need
3471 for two versions of the code. When called during remark, it will
3472 pick up from where the task left off during the concurrent marking
3473 phase. Interestingly, tasks are also claimable during evacuation
3474 pauses too, since do_marking_step() ensures that it aborts before
3475 it needs to yield.
3477 The data structures that is uses to do marking work are the
3478 following:
3480 (1) Marking Bitmap. If there are gray objects that appear only
3481 on the bitmap (this happens either when dealing with an overflow
3482 or when the initial marking phase has simply marked the roots
3483 and didn't push them on the stack), then tasks claim heap
3484 regions whose bitmap they then scan to find gray objects. A
3485 global finger indicates where the end of the last claimed region
3486 is. A local finger indicates how far into the region a task has
3487 scanned. The two fingers are used to determine how to gray an
3488 object (i.e. whether simply marking it is OK, as it will be
3489 visited by a task in the future, or whether it needs to be also
3490 pushed on a stack).
3492 (2) Local Queue. The local queue of the task which is accessed
3493 reasonably efficiently by the task. Other tasks can steal from
3494 it when they run out of work. Throughout the marking phase, a
3495 task attempts to keep its local queue short but not totally
3496 empty, so that entries are available for stealing by other
3497 tasks. Only when there is no more work, a task will totally
3498 drain its local queue.
3500 (3) Global Mark Stack. This handles local queue overflow. During
3501 marking only sets of entries are moved between it and the local
3502 queues, as access to it requires a mutex and more fine-grain
3503 interaction with it which might cause contention. If it
3504 overflows, then the marking phase should restart and iterate
3505 over the bitmap to identify gray objects. Throughout the marking
3506 phase, tasks attempt to keep the global mark stack at a small
3507 length but not totally empty, so that entries are available for
3508 popping by other tasks. Only when there is no more work, tasks
3509 will totally drain the global mark stack.
3511 (4) Global Region Stack. Entries on it correspond to areas of
3512 the bitmap that need to be scanned since they contain gray
3513 objects. Pushes on the region stack only happen during
3514 evacuation pauses and typically correspond to areas covered by
3515 GC LABS. If it overflows, then the marking phase should restart
3516 and iterate over the bitmap to identify gray objects. Tasks will
3517 try to totally drain the region stack as soon as possible.
3519 (5) SATB Buffer Queue. This is where completed SATB buffers are
3520 made available. Buffers are regularly removed from this queue
3521 and scanned for roots, so that the queue doesn't get too
3522 long. During remark, all completed buffers are processed, as
3523 well as the filled in parts of any uncompleted buffers.
3525 The do_marking_step() method tries to abort when the time target
3526 has been reached. There are a few other cases when the
3527 do_marking_step() method also aborts:
3529 (1) When the marking phase has been aborted (after a Full GC).
3531 (2) When a global overflow (either on the global stack or the
3532 region stack) has been triggered. Before the task aborts, it
3533 will actually sync up with the other tasks to ensure that all
3534 the marking data structures (local queues, stacks, fingers etc.)
3535 are re-initialised so that when do_marking_step() completes,
3536 the marking phase can immediately restart.
3538 (3) When enough completed SATB buffers are available. The
3539 do_marking_step() method only tries to drain SATB buffers right
3540 at the beginning. So, if enough buffers are available, the
3541 marking step aborts and the SATB buffers are processed at
3542 the beginning of the next invocation.
3544 (4) To yield. when we have to yield then we abort and yield
3545 right at the end of do_marking_step(). This saves us from a lot
3546 of hassle as, by yielding we might allow a Full GC. If this
3547 happens then objects will be compacted underneath our feet, the
3548 heap might shrink, etc. We save checking for this by just
3549 aborting and doing the yield right at the end.
3551 From the above it follows that the do_marking_step() method should
3552 be called in a loop (or, otherwise, regularly) until it completes.
3554 If a marking step completes without its has_aborted() flag being
3555 true, it means it has completed the current marking phase (and
3556 also all other marking tasks have done so and have all synced up).
3558 A method called regular_clock_call() is invoked "regularly" (in
3559 sub ms intervals) throughout marking. It is this clock method that
3560 checks all the abort conditions which were mentioned above and
3561 decides when the task should abort. A work-based scheme is used to
3562 trigger this clock method: when the number of object words the
3563 marking phase has scanned or the number of references the marking
3564 phase has visited reach a given limit. Additional invocations to
3565 the method clock have been planted in a few other strategic places
3566 too. The initial reason for the clock method was to avoid calling
3567 vtime too regularly, as it is quite expensive. So, once it was in
3568 place, it was natural to piggy-back all the other conditions on it
3569 too and not constantly check them throughout the code.
3571 *****************************************************************************/
3573 void CMTask::do_marking_step(double time_target_ms) {
3574 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3575 assert(concurrent() == _cm->concurrent(), "they should be the same");
3577 assert(concurrent() || _cm->region_stack_empty(),
3578 "the region stack should have been cleared before remark");
3579 assert(_region_finger == NULL,
3580 "this should be non-null only when a region is being scanned");
3582 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3583 assert(_task_queues != NULL, "invariant");
3584 assert(_task_queue != NULL, "invariant");
3585 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3587 assert(!_claimed,
3588 "only one thread should claim this task at any one time");
3590 // OK, this doesn't safeguard again all possible scenarios, as it is
3591 // possible for two threads to set the _claimed flag at the same
3592 // time. But it is only for debugging purposes anyway and it will
3593 // catch most problems.
3594 _claimed = true;
3596 _start_time_ms = os::elapsedVTime() * 1000.0;
3597 statsOnly( _interval_start_time_ms = _start_time_ms );
3599 double diff_prediction_ms =
3600 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3601 _time_target_ms = time_target_ms - diff_prediction_ms;
3603 // set up the variables that are used in the work-based scheme to
3604 // call the regular clock method
3605 _words_scanned = 0;
3606 _refs_reached = 0;
3607 recalculate_limits();
3609 // clear all flags
3610 clear_has_aborted();
3611 _has_aborted_timed_out = false;
3612 _draining_satb_buffers = false;
3614 ++_calls;
3616 if (_cm->verbose_low())
3617 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3618 "target = %1.2lfms >>>>>>>>>>",
3619 _task_id, _calls, _time_target_ms);
3621 // Set up the bitmap and oop closures. Anything that uses them is
3622 // eventually called from this method, so it is OK to allocate these
3623 // statically.
3624 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3625 CMOopClosure oop_closure(_g1h, _cm, this);
3626 set_oop_closure(&oop_closure);
3628 if (_cm->has_overflown()) {
3629 // This can happen if the region stack or the mark stack overflows
3630 // during a GC pause and this task, after a yield point,
3631 // restarts. We have to abort as we need to get into the overflow
3632 // protocol which happens right at the end of this task.
3633 set_has_aborted();
3634 }
3636 // First drain any available SATB buffers. After this, we will not
3637 // look at SATB buffers before the next invocation of this method.
3638 // If enough completed SATB buffers are queued up, the regular clock
3639 // will abort this task so that it restarts.
3640 drain_satb_buffers();
3641 // ...then partially drain the local queue and the global stack
3642 drain_local_queue(true);
3643 drain_global_stack(true);
3645 // Then totally drain the region stack. We will not look at
3646 // it again before the next invocation of this method. Entries on
3647 // the region stack are only added during evacuation pauses, for
3648 // which we have to yield. When we do, we abort the task anyway so
3649 // it will look at the region stack again when it restarts.
3650 bitmap_closure.set_scanning_heap_region(false);
3651 drain_region_stack(&bitmap_closure);
3652 // ...then partially drain the local queue and the global stack
3653 drain_local_queue(true);
3654 drain_global_stack(true);
3656 do {
3657 if (!has_aborted() && _curr_region != NULL) {
3658 // This means that we're already holding on to a region.
3659 assert(_finger != NULL, "if region is not NULL, then the finger "
3660 "should not be NULL either");
3662 // We might have restarted this task after an evacuation pause
3663 // which might have evacuated the region we're holding on to
3664 // underneath our feet. Let's read its limit again to make sure
3665 // that we do not iterate over a region of the heap that
3666 // contains garbage (update_region_limit() will also move
3667 // _finger to the start of the region if it is found empty).
3668 update_region_limit();
3669 // We will start from _finger not from the start of the region,
3670 // as we might be restarting this task after aborting half-way
3671 // through scanning this region. In this case, _finger points to
3672 // the address where we last found a marked object. If this is a
3673 // fresh region, _finger points to start().
3674 MemRegion mr = MemRegion(_finger, _region_limit);
3676 if (_cm->verbose_low())
3677 gclog_or_tty->print_cr("[%d] we're scanning part "
3678 "["PTR_FORMAT", "PTR_FORMAT") "
3679 "of region "PTR_FORMAT,
3680 _task_id, _finger, _region_limit, _curr_region);
3682 // Let's iterate over the bitmap of the part of the
3683 // region that is left.
3684 bitmap_closure.set_scanning_heap_region(true);
3685 if (mr.is_empty() ||
3686 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3687 // We successfully completed iterating over the region. Now,
3688 // let's give up the region.
3689 giveup_current_region();
3690 regular_clock_call();
3691 } else {
3692 assert(has_aborted(), "currently the only way to do so");
3693 // The only way to abort the bitmap iteration is to return
3694 // false from the do_bit() method. However, inside the
3695 // do_bit() method we move the _finger to point to the
3696 // object currently being looked at. So, if we bail out, we
3697 // have definitely set _finger to something non-null.
3698 assert(_finger != NULL, "invariant");
3700 // Region iteration was actually aborted. So now _finger
3701 // points to the address of the object we last scanned. If we
3702 // leave it there, when we restart this task, we will rescan
3703 // the object. It is easy to avoid this. We move the finger by
3704 // enough to point to the next possible object header (the
3705 // bitmap knows by how much we need to move it as it knows its
3706 // granularity).
3707 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3708 }
3709 }
3710 // At this point we have either completed iterating over the
3711 // region we were holding on to, or we have aborted.
3713 // We then partially drain the local queue and the global stack.
3714 // (Do we really need this?)
3715 drain_local_queue(true);
3716 drain_global_stack(true);
3718 // Read the note on the claim_region() method on why it might
3719 // return NULL with potentially more regions available for
3720 // claiming and why we have to check out_of_regions() to determine
3721 // whether we're done or not.
3722 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3723 // We are going to try to claim a new region. We should have
3724 // given up on the previous one.
3725 // Separated the asserts so that we know which one fires.
3726 assert(_curr_region == NULL, "invariant");
3727 assert(_finger == NULL, "invariant");
3728 assert(_region_limit == NULL, "invariant");
3729 if (_cm->verbose_low())
3730 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3731 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3732 if (claimed_region != NULL) {
3733 // Yes, we managed to claim one
3734 statsOnly( ++_regions_claimed );
3736 if (_cm->verbose_low())
3737 gclog_or_tty->print_cr("[%d] we successfully claimed "
3738 "region "PTR_FORMAT,
3739 _task_id, claimed_region);
3741 setup_for_region(claimed_region);
3742 assert(_curr_region == claimed_region, "invariant");
3743 }
3744 // It is important to call the regular clock here. It might take
3745 // a while to claim a region if, for example, we hit a large
3746 // block of empty regions. So we need to call the regular clock
3747 // method once round the loop to make sure it's called
3748 // frequently enough.
3749 regular_clock_call();
3750 }
3752 if (!has_aborted() && _curr_region == NULL) {
3753 assert(_cm->out_of_regions(),
3754 "at this point we should be out of regions");
3755 }
3756 } while ( _curr_region != NULL && !has_aborted());
3758 if (!has_aborted()) {
3759 // We cannot check whether the global stack is empty, since other
3760 // tasks might be pushing objects to it concurrently. We also cannot
3761 // check if the region stack is empty because if a thread is aborting
3762 // it can push a partially done region back.
3763 assert(_cm->out_of_regions(),
3764 "at this point we should be out of regions");
3766 if (_cm->verbose_low())
3767 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3769 // Try to reduce the number of available SATB buffers so that
3770 // remark has less work to do.
3771 drain_satb_buffers();
3772 }
3774 // Since we've done everything else, we can now totally drain the
3775 // local queue and global stack.
3776 drain_local_queue(false);
3777 drain_global_stack(false);
3779 // Attempt at work stealing from other task's queues.
3780 if (!has_aborted()) {
3781 // We have not aborted. This means that we have finished all that
3782 // we could. Let's try to do some stealing...
3784 // We cannot check whether the global stack is empty, since other
3785 // tasks might be pushing objects to it concurrently. We also cannot
3786 // check if the region stack is empty because if a thread is aborting
3787 // it can push a partially done region back.
3788 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3789 "only way to reach here");
3791 if (_cm->verbose_low())
3792 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3794 while (!has_aborted()) {
3795 oop obj;
3796 statsOnly( ++_steal_attempts );
3798 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3799 if (_cm->verbose_medium())
3800 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3801 _task_id, (void*) obj);
3803 statsOnly( ++_steals );
3805 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3806 "any stolen object should be marked");
3807 scan_object(obj);
3809 // And since we're towards the end, let's totally drain the
3810 // local queue and global stack.
3811 drain_local_queue(false);
3812 drain_global_stack(false);
3813 } else {
3814 break;
3815 }
3816 }
3817 }
3819 // We still haven't aborted. Now, let's try to get into the
3820 // termination protocol.
3821 if (!has_aborted()) {
3822 // We cannot check whether the global stack is empty, since other
3823 // tasks might be concurrently pushing objects on it. We also cannot
3824 // check if the region stack is empty because if a thread is aborting
3825 // it can push a partially done region back.
3826 // Separated the asserts so that we know which one fires.
3827 assert(_cm->out_of_regions(), "only way to reach here");
3828 assert(_task_queue->size() == 0, "only way to reach here");
3830 if (_cm->verbose_low())
3831 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3833 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3834 // The CMTask class also extends the TerminatorTerminator class,
3835 // hence its should_exit_termination() method will also decide
3836 // whether to exit the termination protocol or not.
3837 bool finished = _cm->terminator()->offer_termination(this);
3838 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3839 _termination_time_ms +=
3840 termination_end_time_ms - _termination_start_time_ms;
3842 if (finished) {
3843 // We're all done.
3845 if (_task_id == 0) {
3846 // let's allow task 0 to do this
3847 if (concurrent()) {
3848 assert(_cm->concurrent_marking_in_progress(), "invariant");
3849 // we need to set this to false before the next
3850 // safepoint. This way we ensure that the marking phase
3851 // doesn't observe any more heap expansions.
3852 _cm->clear_concurrent_marking_in_progress();
3853 }
3854 }
3856 // We can now guarantee that the global stack is empty, since
3857 // all other tasks have finished. We separated the guarantees so
3858 // that, if a condition is false, we can immediately find out
3859 // which one.
3860 guarantee(_cm->out_of_regions(), "only way to reach here");
3861 guarantee(_cm->region_stack_empty(), "only way to reach here");
3862 guarantee(_cm->mark_stack_empty(), "only way to reach here");
3863 guarantee(_task_queue->size() == 0, "only way to reach here");
3864 guarantee(!_cm->has_overflown(), "only way to reach here");
3865 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3866 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3868 if (_cm->verbose_low())
3869 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3870 } else {
3871 // Apparently there's more work to do. Let's abort this task. It
3872 // will restart it and we can hopefully find more things to do.
3874 if (_cm->verbose_low())
3875 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3877 set_has_aborted();
3878 statsOnly( ++_aborted_termination );
3879 }
3880 }
3882 // Mainly for debugging purposes to make sure that a pointer to the
3883 // closure which was statically allocated in this frame doesn't
3884 // escape it by accident.
3885 set_oop_closure(NULL);
3886 double end_time_ms = os::elapsedVTime() * 1000.0;
3887 double elapsed_time_ms = end_time_ms - _start_time_ms;
3888 // Update the step history.
3889 _step_times_ms.add(elapsed_time_ms);
3891 if (has_aborted()) {
3892 // The task was aborted for some reason.
3894 statsOnly( ++_aborted );
3896 if (_has_aborted_timed_out) {
3897 double diff_ms = elapsed_time_ms - _time_target_ms;
3898 // Keep statistics of how well we did with respect to hitting
3899 // our target only if we actually timed out (if we aborted for
3900 // other reasons, then the results might get skewed).
3901 _marking_step_diffs_ms.add(diff_ms);
3902 }
3904 if (_cm->has_overflown()) {
3905 // This is the interesting one. We aborted because a global
3906 // overflow was raised. This means we have to restart the
3907 // marking phase and start iterating over regions. However, in
3908 // order to do this we have to make sure that all tasks stop
3909 // what they are doing and re-initialise in a safe manner. We
3910 // will achieve this with the use of two barrier sync points.
3912 if (_cm->verbose_low())
3913 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3915 _cm->enter_first_sync_barrier(_task_id);
3916 // When we exit this sync barrier we know that all tasks have
3917 // stopped doing marking work. So, it's now safe to
3918 // re-initialise our data structures. At the end of this method,
3919 // task 0 will clear the global data structures.
3921 statsOnly( ++_aborted_overflow );
3923 // We clear the local state of this task...
3924 clear_region_fields();
3926 // ...and enter the second barrier.
3927 _cm->enter_second_sync_barrier(_task_id);
3928 // At this point everything has bee re-initialised and we're
3929 // ready to restart.
3930 }
3932 if (_cm->verbose_low()) {
3933 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3934 "elapsed = %1.2lfms <<<<<<<<<<",
3935 _task_id, _time_target_ms, elapsed_time_ms);
3936 if (_cm->has_aborted())
3937 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3938 _task_id);
3939 }
3940 } else {
3941 if (_cm->verbose_low())
3942 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3943 "elapsed = %1.2lfms <<<<<<<<<<",
3944 _task_id, _time_target_ms, elapsed_time_ms);
3945 }
3947 _claimed = false;
3948 }
3950 CMTask::CMTask(int task_id,
3951 ConcurrentMark* cm,
3952 CMTaskQueue* task_queue,
3953 CMTaskQueueSet* task_queues)
3954 : _g1h(G1CollectedHeap::heap()),
3955 _task_id(task_id), _cm(cm),
3956 _claimed(false),
3957 _nextMarkBitMap(NULL), _hash_seed(17),
3958 _task_queue(task_queue),
3959 _task_queues(task_queues),
3960 _oop_closure(NULL) {
3961 guarantee(task_queue != NULL, "invariant");
3962 guarantee(task_queues != NULL, "invariant");
3964 statsOnly( _clock_due_to_scanning = 0;
3965 _clock_due_to_marking = 0 );
3967 _marking_step_diffs_ms.add(0.5);
3968 }