Tue, 06 Apr 2010 10:59:45 -0400
6909756: G1: guarantee(G1CollectedHeap::heap()->mark_in_progress(),"Precondition.")
Summary: Make sure that two marking cycles do not overlap, i.e., a new one can only start after the concurrent marking thread finishes all its work. In the fix I piggy-back a couple of minor extra fixes: some general code reformatting for consistency (only around the code I modified), the removal of a field (G1CollectorPolicy::_should_initiate_conc_mark) which doesn't seem to be used at all (it's only set but never read), as well as moving the "is GC locker active" test earlier into the G1 pause / Full GC and using a more appropriate method for it.
Reviewed-by: johnc, jmasa, jcoomes, ysr
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 // Currently we do not call this at all. Normally we would call it
301 // during the concurrent marking / remark phases but we now call
302 // the lock-based version instead. But we might want to resurrect this
303 // code in the future. So, we'll leave it here commented out.
304 #if 0
305 MemRegion CMRegionStack::pop() {
306 while (true) {
307 // Otherwise...
308 jint index = _index;
310 if (index == 0) {
311 return MemRegion();
312 }
313 jint next_index = index-1;
314 jint res = Atomic::cmpxchg(next_index, &_index, index);
315 if (res == index) {
316 MemRegion mr = _base[next_index];
317 if (mr.start() != NULL) {
318 assert(mr.end() != NULL, "invariant");
319 assert(mr.word_size() > 0, "invariant");
320 return mr;
321 } else {
322 // that entry was invalidated... let's skip it
323 assert(mr.end() == NULL, "invariant");
324 }
325 }
326 // Otherwise, we need to try again.
327 }
328 }
329 #endif // 0
331 void CMRegionStack::push_with_lock(MemRegion mr) {
332 assert(mr.word_size() > 0, "Precondition");
333 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
335 if (isFull()) {
336 _overflow = true;
337 return;
338 }
340 _base[_index] = mr;
341 _index += 1;
342 }
344 MemRegion CMRegionStack::pop_with_lock() {
345 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
347 while (true) {
348 if (_index == 0) {
349 return MemRegion();
350 }
351 _index -= 1;
353 MemRegion mr = _base[_index];
354 if (mr.start() != NULL) {
355 assert(mr.end() != NULL, "invariant");
356 assert(mr.word_size() > 0, "invariant");
357 return mr;
358 } else {
359 // that entry was invalidated... let's skip it
360 assert(mr.end() == NULL, "invariant");
361 }
362 }
363 }
365 bool CMRegionStack::invalidate_entries_into_cset() {
366 bool result = false;
367 G1CollectedHeap* g1h = G1CollectedHeap::heap();
368 for (int i = 0; i < _oops_do_bound; ++i) {
369 MemRegion mr = _base[i];
370 if (mr.start() != NULL) {
371 assert(mr.end() != NULL, "invariant");
372 assert(mr.word_size() > 0, "invariant");
373 HeapRegion* hr = g1h->heap_region_containing(mr.start());
374 assert(hr != NULL, "invariant");
375 if (hr->in_collection_set()) {
376 // The region points into the collection set
377 _base[i] = MemRegion();
378 result = true;
379 }
380 } else {
381 // that entry was invalidated... let's skip it
382 assert(mr.end() == NULL, "invariant");
383 }
384 }
385 return result;
386 }
388 template<class OopClosureClass>
389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
391 || SafepointSynchronize::is_at_safepoint(),
392 "Drain recursion must be yield-safe.");
393 bool res = true;
394 debug_only(_drain_in_progress = true);
395 debug_only(_drain_in_progress_yields = yield_after);
396 while (!isEmpty()) {
397 oop newOop = pop();
398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
399 assert(newOop->is_oop(), "Expected an oop");
400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
401 "only grey objects on this stack");
402 // iterate over the oops in this oop, marking and pushing
403 // the ones in CMS generation.
404 newOop->oop_iterate(cl);
405 if (yield_after && _cm->do_yield_check()) {
406 res = false; break;
407 }
408 }
409 debug_only(_drain_in_progress = false);
410 return res;
411 }
413 void CMMarkStack::oops_do(OopClosure* f) {
414 if (_index == 0) return;
415 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
416 "Bound must be set.");
417 for (int i = 0; i < _oops_do_bound; i++) {
418 f->do_oop(&_base[i]);
419 }
420 _oops_do_bound = -1;
421 }
423 bool ConcurrentMark::not_yet_marked(oop obj) const {
424 return (_g1h->is_obj_ill(obj)
425 || (_g1h->is_in_permanent(obj)
426 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
427 }
429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
431 #endif // _MSC_VER
433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
434 int max_regions) :
435 _markBitMap1(rs, MinObjAlignment - 1),
436 _markBitMap2(rs, MinObjAlignment - 1),
438 _parallel_marking_threads(0),
439 _sleep_factor(0.0),
440 _marking_task_overhead(1.0),
441 _cleanup_sleep_factor(0.0),
442 _cleanup_task_overhead(1.0),
443 _region_bm(max_regions, false /* in_resource_area*/),
444 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
445 CardTableModRefBS::card_shift,
446 false /* in_resource_area*/),
447 _prevMarkBitMap(&_markBitMap1),
448 _nextMarkBitMap(&_markBitMap2),
449 _at_least_one_mark_complete(false),
451 _markStack(this),
452 _regionStack(),
453 // _finger set in set_non_marking_state
455 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
456 // _active_tasks set in set_non_marking_state
457 // _tasks set inside the constructor
458 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
459 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _concurrent_marking_in_progress(false),
466 _should_gray_objects(false),
468 // _verbose_level set below
470 _init_times(),
471 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
472 _cleanup_times(),
473 _total_counting_time(0.0),
474 _total_rs_scrub_time(0.0),
476 _parallel_workers(NULL)
477 {
478 CMVerboseLevel verbose_level =
479 (CMVerboseLevel) G1MarkingVerboseLevel;
480 if (verbose_level < no_verbose)
481 verbose_level = no_verbose;
482 if (verbose_level > high_verbose)
483 verbose_level = high_verbose;
484 _verbose_level = verbose_level;
486 if (verbose_low())
487 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
488 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
490 _markStack.allocate(MarkStackSize);
491 _regionStack.allocate(G1MarkRegionStackSize);
493 // Create & start a ConcurrentMark thread.
494 _cmThread = new ConcurrentMarkThread(this);
495 assert(cmThread() != NULL, "CM Thread should have been created");
496 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
498 _g1h = G1CollectedHeap::heap();
499 assert(CGC_lock != NULL, "Where's the CGC_lock?");
500 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
501 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
503 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
504 satb_qs.set_buffer_size(G1SATBBufferSize);
506 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
507 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
508 for (int i = 0 ; i < size; i++) {
509 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
510 }
512 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
513 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _active_tasks = _max_task_num;
517 for (int i = 0; i < (int) _max_task_num; ++i) {
518 CMTaskQueue* task_queue = new CMTaskQueue();
519 task_queue->initialize();
520 _task_queues->register_queue(i, task_queue);
522 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
523 _accum_task_vtime[i] = 0.0;
524 }
526 if (ConcGCThreads > ParallelGCThreads) {
527 vm_exit_during_initialization("Can't have more ConcGCThreads "
528 "than ParallelGCThreads.");
529 }
530 if (ParallelGCThreads == 0) {
531 // if we are not running with any parallel GC threads we will not
532 // spawn any marking threads either
533 _parallel_marking_threads = 0;
534 _sleep_factor = 0.0;
535 _marking_task_overhead = 1.0;
536 } else {
537 if (ConcGCThreads > 0) {
538 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
539 // if both are set
541 _parallel_marking_threads = ConcGCThreads;
542 _sleep_factor = 0.0;
543 _marking_task_overhead = 1.0;
544 } else if (G1MarkingOverheadPercent > 0) {
545 // we will calculate the number of parallel marking threads
546 // based on a target overhead with respect to the soft real-time
547 // goal
549 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
550 double overall_cm_overhead =
551 (double) MaxGCPauseMillis * marking_overhead /
552 (double) GCPauseIntervalMillis;
553 double cpu_ratio = 1.0 / (double) os::processor_count();
554 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
555 double marking_task_overhead =
556 overall_cm_overhead / marking_thread_num *
557 (double) os::processor_count();
558 double sleep_factor =
559 (1.0 - marking_task_overhead) / marking_task_overhead;
561 _parallel_marking_threads = (size_t) marking_thread_num;
562 _sleep_factor = sleep_factor;
563 _marking_task_overhead = marking_task_overhead;
564 } else {
565 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
566 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0;
568 }
570 if (parallel_marking_threads() > 1)
571 _cleanup_task_overhead = 1.0;
572 else
573 _cleanup_task_overhead = marking_task_overhead();
574 _cleanup_sleep_factor =
575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
577 #if 0
578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
583 #endif
585 guarantee(parallel_marking_threads() > 0, "peace of mind");
586 _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
587 (int) parallel_marking_threads(), false, true);
588 if (_parallel_workers == NULL)
589 vm_exit_during_initialization("Failed necessary allocation.");
590 }
592 // so that the call below can read a sensible value
593 _heap_start = (HeapWord*) rs.base();
594 set_non_marking_state();
595 }
597 void ConcurrentMark::update_g1_committed(bool force) {
598 // If concurrent marking is not in progress, then we do not need to
599 // update _heap_end. This has a subtle and important
600 // side-effect. Imagine that two evacuation pauses happen between
601 // marking completion and remark. The first one can grow the
602 // heap (hence now the finger is below the heap end). Then, the
603 // second one could unnecessarily push regions on the region
604 // stack. This causes the invariant that the region stack is empty
605 // at the beginning of remark to be false. By ensuring that we do
606 // not observe heap expansions after marking is complete, then we do
607 // not have this problem.
608 if (!concurrent_marking_in_progress() && !force)
609 return;
611 MemRegion committed = _g1h->g1_committed();
612 assert(committed.start() == _heap_start, "start shouldn't change");
613 HeapWord* new_end = committed.end();
614 if (new_end > _heap_end) {
615 // The heap has been expanded.
617 _heap_end = new_end;
618 }
619 // Notice that the heap can also shrink. However, this only happens
620 // during a Full GC (at least currently) and the entire marking
621 // phase will bail out and the task will not be restarted. So, let's
622 // do nothing.
623 }
625 void ConcurrentMark::reset() {
626 // Starting values for these two. This should be called in a STW
627 // phase. CM will be notified of any future g1_committed expansions
628 // will be at the end of evacuation pauses, when tasks are
629 // inactive.
630 MemRegion committed = _g1h->g1_committed();
631 _heap_start = committed.start();
632 _heap_end = committed.end();
634 // Separated the asserts so that we know which one fires.
635 assert(_heap_start != NULL, "heap bounds should look ok");
636 assert(_heap_end != NULL, "heap bounds should look ok");
637 assert(_heap_start < _heap_end, "heap bounds should look ok");
639 // reset all the marking data structures and any necessary flags
640 clear_marking_state();
642 if (verbose_low())
643 gclog_or_tty->print_cr("[global] resetting");
645 // We do reset all of them, since different phases will use
646 // different number of active threads. So, it's easiest to have all
647 // of them ready.
648 for (int i = 0; i < (int) _max_task_num; ++i)
649 _tasks[i]->reset(_nextMarkBitMap);
651 // we need this to make sure that the flag is on during the evac
652 // pause with initial mark piggy-backed
653 set_concurrent_marking_in_progress();
654 }
656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
657 assert(active_tasks <= _max_task_num, "we should not have more");
659 _active_tasks = active_tasks;
660 // Need to update the three data structures below according to the
661 // number of active threads for this phase.
662 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
663 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
664 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
666 _concurrent = concurrent;
667 // We propagate this to all tasks, not just the active ones.
668 for (int i = 0; i < (int) _max_task_num; ++i)
669 _tasks[i]->set_concurrent(concurrent);
671 if (concurrent) {
672 set_concurrent_marking_in_progress();
673 } else {
674 // We currently assume that the concurrent flag has been set to
675 // false before we start remark. At this point we should also be
676 // in a STW phase.
677 assert(!concurrent_marking_in_progress(), "invariant");
678 assert(_finger == _heap_end, "only way to get here");
679 update_g1_committed(true);
680 }
681 }
683 void ConcurrentMark::set_non_marking_state() {
684 // We set the global marking state to some default values when we're
685 // not doing marking.
686 clear_marking_state();
687 _active_tasks = 0;
688 clear_concurrent_marking_in_progress();
689 }
691 ConcurrentMark::~ConcurrentMark() {
692 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
693 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
694 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
695 _par_cleanup_thread_state);
697 for (int i = 0; i < (int) _max_task_num; ++i) {
698 delete _task_queues->queue(i);
699 delete _tasks[i];
700 }
701 delete _task_queues;
702 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
703 }
705 // This closure is used to mark refs into the g1 generation
706 // from external roots in the CMS bit map.
707 // Called at the first checkpoint.
708 //
710 void ConcurrentMark::clearNextBitmap() {
711 G1CollectedHeap* g1h = G1CollectedHeap::heap();
712 G1CollectorPolicy* g1p = g1h->g1_policy();
714 // Make sure that the concurrent mark thread looks to still be in
715 // the current cycle.
716 guarantee(cmThread()->during_cycle(), "invariant");
718 // We are finishing up the current cycle by clearing the next
719 // marking bitmap and getting it ready for the next cycle. During
720 // this time no other cycle can start. So, let's make sure that this
721 // is the case.
722 guarantee(!g1h->mark_in_progress(), "invariant");
724 // clear the mark bitmap (no grey objects to start with).
725 // We need to do this in chunks and offer to yield in between
726 // each chunk.
727 HeapWord* start = _nextMarkBitMap->startWord();
728 HeapWord* end = _nextMarkBitMap->endWord();
729 HeapWord* cur = start;
730 size_t chunkSize = M;
731 while (cur < end) {
732 HeapWord* next = cur + chunkSize;
733 if (next > end)
734 next = end;
735 MemRegion mr(cur,next);
736 _nextMarkBitMap->clearRange(mr);
737 cur = next;
738 do_yield_check();
740 // Repeat the asserts from above. We'll do them as asserts here to
741 // minimize their overhead on the product. However, we'll have
742 // them as guarantees at the beginning / end of the bitmap
743 // clearing to get some checking in the product.
744 assert(cmThread()->during_cycle(), "invariant");
745 assert(!g1h->mark_in_progress(), "invariant");
746 }
748 // Repeat the asserts from above.
749 guarantee(cmThread()->during_cycle(), "invariant");
750 guarantee(!g1h->mark_in_progress(), "invariant");
751 }
753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
754 public:
755 bool doHeapRegion(HeapRegion* r) {
756 if (!r->continuesHumongous()) {
757 r->note_start_of_marking(true);
758 }
759 return false;
760 }
761 };
763 void ConcurrentMark::checkpointRootsInitialPre() {
764 G1CollectedHeap* g1h = G1CollectedHeap::heap();
765 G1CollectorPolicy* g1p = g1h->g1_policy();
767 _has_aborted = false;
769 if (G1PrintReachableAtInitialMark) {
770 print_reachable(true, "before");
771 }
773 // Initialise marking structures. This has to be done in a STW phase.
774 reset();
775 }
777 class CMMarkRootsClosure: public OopsInGenClosure {
778 private:
779 ConcurrentMark* _cm;
780 G1CollectedHeap* _g1h;
781 bool _do_barrier;
783 public:
784 CMMarkRootsClosure(ConcurrentMark* cm,
785 G1CollectedHeap* g1h,
786 bool do_barrier) : _cm(cm), _g1h(g1h),
787 _do_barrier(do_barrier) { }
789 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
790 virtual void do_oop( oop* p) { do_oop_work(p); }
792 template <class T> void do_oop_work(T* p) {
793 T heap_oop = oopDesc::load_heap_oop(p);
794 if (!oopDesc::is_null(heap_oop)) {
795 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
796 assert(obj->is_oop() || obj->mark() == NULL,
797 "expected an oop, possibly with mark word displaced");
798 HeapWord* addr = (HeapWord*)obj;
799 if (_g1h->is_in_g1_reserved(addr)) {
800 _cm->grayRoot(obj);
801 }
802 }
803 if (_do_barrier) {
804 assert(!_g1h->is_in_g1_reserved(p),
805 "Should be called on external roots");
806 do_barrier(p);
807 }
808 }
809 };
811 void ConcurrentMark::checkpointRootsInitialPost() {
812 G1CollectedHeap* g1h = G1CollectedHeap::heap();
814 // For each region note start of marking.
815 NoteStartOfMarkHRClosure startcl;
816 g1h->heap_region_iterate(&startcl);
818 // Start weak-reference discovery.
819 ReferenceProcessor* rp = g1h->ref_processor();
820 rp->verify_no_references_recorded();
821 rp->enable_discovery(); // enable ("weak") refs discovery
822 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
824 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
825 // This is the start of the marking cycle, we're expected all
826 // threads to have SATB queues with active set to false.
827 satb_mq_set.set_active_all_threads(true, /* new active value */
828 false /* expected_active */);
830 // update_g1_committed() will be called at the end of an evac pause
831 // when marking is on. So, it's also called at the end of the
832 // initial-mark pause to update the heap end, if the heap expands
833 // during it. No need to call it here.
834 }
836 // Checkpoint the roots into this generation from outside
837 // this generation. [Note this initial checkpoint need only
838 // be approximate -- we'll do a catch up phase subsequently.]
839 void ConcurrentMark::checkpointRootsInitial() {
840 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
841 G1CollectedHeap* g1h = G1CollectedHeap::heap();
843 double start = os::elapsedTime();
845 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
846 g1p->record_concurrent_mark_init_start();
847 checkpointRootsInitialPre();
849 // YSR: when concurrent precleaning is in place, we'll
850 // need to clear the cached card table here
852 ResourceMark rm;
853 HandleMark hm;
855 g1h->ensure_parsability(false);
856 g1h->perm_gen()->save_marks();
858 CMMarkRootsClosure notOlder(this, g1h, false);
859 CMMarkRootsClosure older(this, g1h, true);
861 g1h->set_marking_started();
862 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
864 g1h->process_strong_roots(true, // activate StrongRootsScope
865 false, // fake perm gen collection
866 SharedHeap::SO_AllClasses,
867 ¬Older, // Regular roots
868 NULL, // do not visit active blobs
869 &older // Perm Gen Roots
870 );
871 checkpointRootsInitialPost();
873 // Statistics.
874 double end = os::elapsedTime();
875 _init_times.add((end - start) * 1000.0);
877 g1p->record_concurrent_mark_init_end();
878 }
880 /*
881 Notice that in the next two methods, we actually leave the STS
882 during the barrier sync and join it immediately afterwards. If we
883 do not do this, this then the following deadlock can occur: one
884 thread could be in the barrier sync code, waiting for the other
885 thread to also sync up, whereas another one could be trying to
886 yield, while also waiting for the other threads to sync up too.
888 Because the thread that does the sync barrier has left the STS, it
889 is possible to be suspended for a Full GC or an evacuation pause
890 could occur. This is actually safe, since the entering the sync
891 barrier is one of the last things do_marking_step() does, and it
892 doesn't manipulate any data structures afterwards.
893 */
895 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
896 if (verbose_low())
897 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
899 ConcurrentGCThread::stsLeave();
900 _first_overflow_barrier_sync.enter();
901 ConcurrentGCThread::stsJoin();
902 // at this point everyone should have synced up and not be doing any
903 // more work
905 if (verbose_low())
906 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
908 // let task 0 do this
909 if (task_num == 0) {
910 // task 0 is responsible for clearing the global data structures
911 clear_marking_state();
913 if (PrintGC) {
914 gclog_or_tty->date_stamp(PrintGCDateStamps);
915 gclog_or_tty->stamp(PrintGCTimeStamps);
916 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
917 }
918 }
920 // after this, each task should reset its own data structures then
921 // then go into the second barrier
922 }
924 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
925 if (verbose_low())
926 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
928 ConcurrentGCThread::stsLeave();
929 _second_overflow_barrier_sync.enter();
930 ConcurrentGCThread::stsJoin();
931 // at this point everything should be re-initialised and ready to go
933 if (verbose_low())
934 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
935 }
937 void ConcurrentMark::grayRoot(oop p) {
938 HeapWord* addr = (HeapWord*) p;
939 // We can't really check against _heap_start and _heap_end, since it
940 // is possible during an evacuation pause with piggy-backed
941 // initial-mark that the committed space is expanded during the
942 // pause without CM observing this change. So the assertions below
943 // is a bit conservative; but better than nothing.
944 assert(_g1h->g1_committed().contains(addr),
945 "address should be within the heap bounds");
947 if (!_nextMarkBitMap->isMarked(addr))
948 _nextMarkBitMap->parMark(addr);
949 }
951 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
952 // The objects on the region have already been marked "in bulk" by
953 // the caller. We only need to decide whether to push the region on
954 // the region stack or not.
956 if (!concurrent_marking_in_progress() || !_should_gray_objects)
957 // We're done with marking and waiting for remark. We do not need to
958 // push anything else on the region stack.
959 return;
961 HeapWord* finger = _finger;
963 if (verbose_low())
964 gclog_or_tty->print_cr("[global] attempting to push "
965 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
966 PTR_FORMAT, mr.start(), mr.end(), finger);
968 if (mr.start() < finger) {
969 // The finger is always heap region aligned and it is not possible
970 // for mr to span heap regions.
971 assert(mr.end() <= finger, "invariant");
973 // Separated the asserts so that we know which one fires.
974 assert(mr.start() <= mr.end(),
975 "region boundaries should fall within the committed space");
976 assert(_heap_start <= mr.start(),
977 "region boundaries should fall within the committed space");
978 assert(mr.end() <= _heap_end,
979 "region boundaries should fall within the committed space");
980 if (verbose_low())
981 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
982 "below the finger, pushing it",
983 mr.start(), mr.end());
985 if (!region_stack_push(mr)) {
986 if (verbose_low())
987 gclog_or_tty->print_cr("[global] region stack has overflown.");
988 }
989 }
990 }
992 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
993 // The object is not marked by the caller. We need to at least mark
994 // it and maybe push in on the stack.
996 HeapWord* addr = (HeapWord*)p;
997 if (!_nextMarkBitMap->isMarked(addr)) {
998 // We definitely need to mark it, irrespective whether we bail out
999 // because we're done with marking.
1000 if (_nextMarkBitMap->parMark(addr)) {
1001 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1002 // If we're done with concurrent marking and we're waiting for
1003 // remark, then we're not pushing anything on the stack.
1004 return;
1006 // No OrderAccess:store_load() is needed. It is implicit in the
1007 // CAS done in parMark(addr) above
1008 HeapWord* finger = _finger;
1010 if (addr < finger) {
1011 if (!mark_stack_push(oop(addr))) {
1012 if (verbose_low())
1013 gclog_or_tty->print_cr("[global] global stack overflow "
1014 "during parMark");
1015 }
1016 }
1017 }
1018 }
1019 }
1021 class CMConcurrentMarkingTask: public AbstractGangTask {
1022 private:
1023 ConcurrentMark* _cm;
1024 ConcurrentMarkThread* _cmt;
1026 public:
1027 void work(int worker_i) {
1028 assert(Thread::current()->is_ConcurrentGC_thread(),
1029 "this should only be done by a conc GC thread");
1031 double start_vtime = os::elapsedVTime();
1033 ConcurrentGCThread::stsJoin();
1035 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1036 CMTask* the_task = _cm->task(worker_i);
1037 the_task->record_start_time();
1038 if (!_cm->has_aborted()) {
1039 do {
1040 double start_vtime_sec = os::elapsedVTime();
1041 double start_time_sec = os::elapsedTime();
1042 the_task->do_marking_step(10.0);
1043 double end_time_sec = os::elapsedTime();
1044 double end_vtime_sec = os::elapsedVTime();
1045 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1046 double elapsed_time_sec = end_time_sec - start_time_sec;
1047 _cm->clear_has_overflown();
1049 bool ret = _cm->do_yield_check(worker_i);
1051 jlong sleep_time_ms;
1052 if (!_cm->has_aborted() && the_task->has_aborted()) {
1053 sleep_time_ms =
1054 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1055 ConcurrentGCThread::stsLeave();
1056 os::sleep(Thread::current(), sleep_time_ms, false);
1057 ConcurrentGCThread::stsJoin();
1058 }
1059 double end_time2_sec = os::elapsedTime();
1060 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1062 #if 0
1063 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1064 "overhead %1.4lf",
1065 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1066 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1067 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1068 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1069 #endif
1070 } while (!_cm->has_aborted() && the_task->has_aborted());
1071 }
1072 the_task->record_end_time();
1073 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1075 ConcurrentGCThread::stsLeave();
1077 double end_vtime = os::elapsedVTime();
1078 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1079 }
1081 CMConcurrentMarkingTask(ConcurrentMark* cm,
1082 ConcurrentMarkThread* cmt) :
1083 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1085 ~CMConcurrentMarkingTask() { }
1086 };
1088 void ConcurrentMark::markFromRoots() {
1089 // we might be tempted to assert that:
1090 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1091 // "inconsistent argument?");
1092 // However that wouldn't be right, because it's possible that
1093 // a safepoint is indeed in progress as a younger generation
1094 // stop-the-world GC happens even as we mark in this generation.
1096 _restart_for_overflow = false;
1098 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1100 CMConcurrentMarkingTask markingTask(this, cmThread());
1101 if (parallel_marking_threads() > 0)
1102 _parallel_workers->run_task(&markingTask);
1103 else
1104 markingTask.work(0);
1105 print_stats();
1106 }
1108 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1109 // world is stopped at this checkpoint
1110 assert(SafepointSynchronize::is_at_safepoint(),
1111 "world should be stopped");
1112 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1114 // If a full collection has happened, we shouldn't do this.
1115 if (has_aborted()) {
1116 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1117 return;
1118 }
1120 if (VerifyDuringGC) {
1121 HandleMark hm; // handle scope
1122 gclog_or_tty->print(" VerifyDuringGC:(before)");
1123 Universe::heap()->prepare_for_verify();
1124 Universe::verify(true, false, true);
1125 }
1127 G1CollectorPolicy* g1p = g1h->g1_policy();
1128 g1p->record_concurrent_mark_remark_start();
1130 double start = os::elapsedTime();
1132 checkpointRootsFinalWork();
1134 double mark_work_end = os::elapsedTime();
1136 weakRefsWork(clear_all_soft_refs);
1138 if (has_overflown()) {
1139 // Oops. We overflowed. Restart concurrent marking.
1140 _restart_for_overflow = true;
1141 // Clear the flag. We do not need it any more.
1142 clear_has_overflown();
1143 if (G1TraceMarkStackOverflow)
1144 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145 } else {
1146 // We're done with marking.
1147 // This is the end of the marking cycle, we're expected all
1148 // threads to have SATB queues with active set to true.
1149 JavaThread::satb_mark_queue_set().set_active_all_threads(
1150 false, /* new active value */
1151 true /* expected_active */);
1153 if (VerifyDuringGC) {
1154 HandleMark hm; // handle scope
1155 gclog_or_tty->print(" VerifyDuringGC:(after)");
1156 Universe::heap()->prepare_for_verify();
1157 Universe::heap()->verify(/* allow_dirty */ true,
1158 /* silent */ false,
1159 /* use_prev_marking */ false);
1160 }
1161 }
1163 #if VERIFY_OBJS_PROCESSED
1164 _scan_obj_cl.objs_processed = 0;
1165 ThreadLocalObjQueue::objs_enqueued = 0;
1166 #endif
1168 // Statistics
1169 double now = os::elapsedTime();
1170 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1171 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1172 _remark_times.add((now - start) * 1000.0);
1174 g1p->record_concurrent_mark_remark_end();
1175 }
1178 #define CARD_BM_TEST_MODE 0
1180 class CalcLiveObjectsClosure: public HeapRegionClosure {
1182 CMBitMapRO* _bm;
1183 ConcurrentMark* _cm;
1184 bool _changed;
1185 bool _yield;
1186 size_t _words_done;
1187 size_t _tot_live;
1188 size_t _tot_used;
1189 size_t _regions_done;
1190 double _start_vtime_sec;
1192 BitMap* _region_bm;
1193 BitMap* _card_bm;
1194 intptr_t _bottom_card_num;
1195 bool _final;
1197 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1198 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1199 #if CARD_BM_TEST_MODE
1200 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1201 #else
1202 _card_bm->par_at_put(i - _bottom_card_num, 1);
1203 #endif
1204 }
1205 }
1207 public:
1208 CalcLiveObjectsClosure(bool final,
1209 CMBitMapRO *bm, ConcurrentMark *cm,
1210 BitMap* region_bm, BitMap* card_bm) :
1211 _bm(bm), _cm(cm), _changed(false), _yield(true),
1212 _words_done(0), _tot_live(0), _tot_used(0),
1213 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1214 _regions_done(0), _start_vtime_sec(0.0)
1215 {
1216 _bottom_card_num =
1217 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1218 CardTableModRefBS::card_shift);
1219 }
1221 // It takes a region that's not empty (i.e., it has at least one
1222 // live object in it and sets its corresponding bit on the region
1223 // bitmap to 1. If the region is "starts humongous" it will also set
1224 // to 1 the bits on the region bitmap that correspond to its
1225 // associated "continues humongous" regions.
1226 void set_bit_for_region(HeapRegion* hr) {
1227 assert(!hr->continuesHumongous(), "should have filtered those out");
1229 size_t index = hr->hrs_index();
1230 if (!hr->startsHumongous()) {
1231 // Normal (non-humongous) case: just set the bit.
1232 _region_bm->par_at_put((BitMap::idx_t) index, true);
1233 } else {
1234 // Starts humongous case: calculate how many regions are part of
1235 // this humongous region and then set the bit range. It might
1236 // have been a bit more efficient to look at the object that
1237 // spans these humongous regions to calculate their number from
1238 // the object's size. However, it's a good idea to calculate
1239 // this based on the metadata itself, and not the region
1240 // contents, so that this code is not aware of what goes into
1241 // the humongous regions (in case this changes in the future).
1242 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1243 size_t end_index = index + 1;
1244 while (end_index < g1h->n_regions()) {
1245 HeapRegion* chr = g1h->region_at(end_index);
1246 if (!chr->continuesHumongous()) {
1247 break;
1248 }
1249 end_index += 1;
1250 }
1251 _region_bm->par_at_put_range((BitMap::idx_t) index,
1252 (BitMap::idx_t) end_index, true);
1253 }
1254 }
1256 bool doHeapRegion(HeapRegion* hr) {
1257 if (!_final && _regions_done == 0)
1258 _start_vtime_sec = os::elapsedVTime();
1260 if (hr->continuesHumongous()) {
1261 // We will ignore these here and process them when their
1262 // associated "starts humongous" region is processed (see
1263 // set_bit_for_heap_region()). Note that we cannot rely on their
1264 // associated "starts humongous" region to have their bit set to
1265 // 1 since, due to the region chunking in the parallel region
1266 // iteration, a "continues humongous" region might be visited
1267 // before its associated "starts humongous".
1268 return false;
1269 }
1271 HeapWord* nextTop = hr->next_top_at_mark_start();
1272 HeapWord* start = hr->top_at_conc_mark_count();
1273 assert(hr->bottom() <= start && start <= hr->end() &&
1274 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1275 start <= nextTop,
1276 "Preconditions.");
1277 // Otherwise, record the number of word's we'll examine.
1278 size_t words_done = (nextTop - start);
1279 // Find the first marked object at or after "start".
1280 start = _bm->getNextMarkedWordAddress(start, nextTop);
1281 size_t marked_bytes = 0;
1283 // Below, the term "card num" means the result of shifting an address
1284 // by the card shift -- address 0 corresponds to card number 0. One
1285 // must subtract the card num of the bottom of the heap to obtain a
1286 // card table index.
1287 // The first card num of the sequence of live cards currently being
1288 // constructed. -1 ==> no sequence.
1289 intptr_t start_card_num = -1;
1290 // The last card num of the sequence of live cards currently being
1291 // constructed. -1 ==> no sequence.
1292 intptr_t last_card_num = -1;
1294 while (start < nextTop) {
1295 if (_yield && _cm->do_yield_check()) {
1296 // We yielded. It might be for a full collection, in which case
1297 // all bets are off; terminate the traversal.
1298 if (_cm->has_aborted()) {
1299 _changed = false;
1300 return true;
1301 } else {
1302 // Otherwise, it might be a collection pause, and the region
1303 // we're looking at might be in the collection set. We'll
1304 // abandon this region.
1305 return false;
1306 }
1307 }
1308 oop obj = oop(start);
1309 int obj_sz = obj->size();
1310 // The card num of the start of the current object.
1311 intptr_t obj_card_num =
1312 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1314 HeapWord* obj_last = start + obj_sz - 1;
1315 intptr_t obj_last_card_num =
1316 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1318 if (obj_card_num != last_card_num) {
1319 if (start_card_num == -1) {
1320 assert(last_card_num == -1, "Both or neither.");
1321 start_card_num = obj_card_num;
1322 } else {
1323 assert(last_card_num != -1, "Both or neither.");
1324 assert(obj_card_num >= last_card_num, "Inv");
1325 if ((obj_card_num - last_card_num) > 1) {
1326 // Mark the last run, and start a new one.
1327 mark_card_num_range(start_card_num, last_card_num);
1328 start_card_num = obj_card_num;
1329 }
1330 }
1331 #if CARD_BM_TEST_MODE
1332 /*
1333 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1334 obj_card_num - _bottom_card_num,
1335 obj_last_card_num - _bottom_card_num);
1336 */
1337 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1338 _card_bm->par_at_put(j - _bottom_card_num, 1);
1339 }
1340 #endif
1341 }
1342 // In any case, we set the last card num.
1343 last_card_num = obj_last_card_num;
1345 marked_bytes += (size_t)obj_sz * HeapWordSize;
1346 // Find the next marked object after this one.
1347 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1348 _changed = true;
1349 }
1350 // Handle the last range, if any.
1351 if (start_card_num != -1)
1352 mark_card_num_range(start_card_num, last_card_num);
1353 if (_final) {
1354 // Mark the allocated-since-marking portion...
1355 HeapWord* tp = hr->top();
1356 if (nextTop < tp) {
1357 start_card_num =
1358 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1359 last_card_num =
1360 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1361 mark_card_num_range(start_card_num, last_card_num);
1362 // This definitely means the region has live objects.
1363 set_bit_for_region(hr);
1364 }
1365 }
1367 hr->add_to_marked_bytes(marked_bytes);
1368 // Update the live region bitmap.
1369 if (marked_bytes > 0) {
1370 set_bit_for_region(hr);
1371 }
1372 hr->set_top_at_conc_mark_count(nextTop);
1373 _tot_live += hr->next_live_bytes();
1374 _tot_used += hr->used();
1375 _words_done = words_done;
1377 if (!_final) {
1378 ++_regions_done;
1379 if (_regions_done % 10 == 0) {
1380 double end_vtime_sec = os::elapsedVTime();
1381 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1382 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1383 jlong sleep_time_ms =
1384 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1385 os::sleep(Thread::current(), sleep_time_ms, false);
1386 _start_vtime_sec = end_vtime_sec;
1387 }
1388 }
1389 }
1391 return false;
1392 }
1394 bool changed() { return _changed; }
1395 void reset() { _changed = false; _words_done = 0; }
1396 void no_yield() { _yield = false; }
1397 size_t words_done() { return _words_done; }
1398 size_t tot_live() { return _tot_live; }
1399 size_t tot_used() { return _tot_used; }
1400 };
1403 void ConcurrentMark::calcDesiredRegions() {
1404 _region_bm.clear();
1405 _card_bm.clear();
1406 CalcLiveObjectsClosure calccl(false /*final*/,
1407 nextMarkBitMap(), this,
1408 &_region_bm, &_card_bm);
1409 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1410 g1h->heap_region_iterate(&calccl);
1412 do {
1413 calccl.reset();
1414 g1h->heap_region_iterate(&calccl);
1415 } while (calccl.changed());
1416 }
1418 class G1ParFinalCountTask: public AbstractGangTask {
1419 protected:
1420 G1CollectedHeap* _g1h;
1421 CMBitMap* _bm;
1422 size_t _n_workers;
1423 size_t *_live_bytes;
1424 size_t *_used_bytes;
1425 BitMap* _region_bm;
1426 BitMap* _card_bm;
1427 public:
1428 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1429 BitMap* region_bm, BitMap* card_bm) :
1430 AbstractGangTask("G1 final counting"), _g1h(g1h),
1431 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1432 {
1433 if (ParallelGCThreads > 0)
1434 _n_workers = _g1h->workers()->total_workers();
1435 else
1436 _n_workers = 1;
1437 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1438 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1439 }
1441 ~G1ParFinalCountTask() {
1442 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1443 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1444 }
1446 void work(int i) {
1447 CalcLiveObjectsClosure calccl(true /*final*/,
1448 _bm, _g1h->concurrent_mark(),
1449 _region_bm, _card_bm);
1450 calccl.no_yield();
1451 if (ParallelGCThreads > 0) {
1452 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1453 HeapRegion::FinalCountClaimValue);
1454 } else {
1455 _g1h->heap_region_iterate(&calccl);
1456 }
1457 assert(calccl.complete(), "Shouldn't have yielded!");
1459 assert((size_t) i < _n_workers, "invariant");
1460 _live_bytes[i] = calccl.tot_live();
1461 _used_bytes[i] = calccl.tot_used();
1462 }
1463 size_t live_bytes() {
1464 size_t live_bytes = 0;
1465 for (size_t i = 0; i < _n_workers; ++i)
1466 live_bytes += _live_bytes[i];
1467 return live_bytes;
1468 }
1469 size_t used_bytes() {
1470 size_t used_bytes = 0;
1471 for (size_t i = 0; i < _n_workers; ++i)
1472 used_bytes += _used_bytes[i];
1473 return used_bytes;
1474 }
1475 };
1477 class G1ParNoteEndTask;
1479 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1480 G1CollectedHeap* _g1;
1481 int _worker_num;
1482 size_t _max_live_bytes;
1483 size_t _regions_claimed;
1484 size_t _freed_bytes;
1485 size_t _cleared_h_regions;
1486 size_t _freed_regions;
1487 UncleanRegionList* _unclean_region_list;
1488 double _claimed_region_time;
1489 double _max_region_time;
1491 public:
1492 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1493 UncleanRegionList* list,
1494 int worker_num);
1495 size_t freed_bytes() { return _freed_bytes; }
1496 size_t cleared_h_regions() { return _cleared_h_regions; }
1497 size_t freed_regions() { return _freed_regions; }
1498 UncleanRegionList* unclean_region_list() {
1499 return _unclean_region_list;
1500 }
1502 bool doHeapRegion(HeapRegion *r);
1504 size_t max_live_bytes() { return _max_live_bytes; }
1505 size_t regions_claimed() { return _regions_claimed; }
1506 double claimed_region_time_sec() { return _claimed_region_time; }
1507 double max_region_time_sec() { return _max_region_time; }
1508 };
1510 class G1ParNoteEndTask: public AbstractGangTask {
1511 friend class G1NoteEndOfConcMarkClosure;
1512 protected:
1513 G1CollectedHeap* _g1h;
1514 size_t _max_live_bytes;
1515 size_t _freed_bytes;
1516 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1517 public:
1518 G1ParNoteEndTask(G1CollectedHeap* g1h,
1519 ConcurrentMark::ParCleanupThreadState**
1520 par_cleanup_thread_state) :
1521 AbstractGangTask("G1 note end"), _g1h(g1h),
1522 _max_live_bytes(0), _freed_bytes(0),
1523 _par_cleanup_thread_state(par_cleanup_thread_state)
1524 {}
1526 void work(int i) {
1527 double start = os::elapsedTime();
1528 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1529 &_par_cleanup_thread_state[i]->list,
1530 i);
1531 if (ParallelGCThreads > 0) {
1532 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1533 HeapRegion::NoteEndClaimValue);
1534 } else {
1535 _g1h->heap_region_iterate(&g1_note_end);
1536 }
1537 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1539 // Now finish up freeing the current thread's regions.
1540 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1541 g1_note_end.cleared_h_regions(),
1542 0, NULL);
1543 {
1544 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1545 _max_live_bytes += g1_note_end.max_live_bytes();
1546 _freed_bytes += g1_note_end.freed_bytes();
1547 }
1548 double end = os::elapsedTime();
1549 if (G1PrintParCleanupStats) {
1550 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1551 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1552 i, start, end, (end-start)*1000.0,
1553 g1_note_end.regions_claimed(),
1554 g1_note_end.claimed_region_time_sec()*1000.0,
1555 g1_note_end.max_region_time_sec()*1000.0);
1556 }
1557 }
1558 size_t max_live_bytes() { return _max_live_bytes; }
1559 size_t freed_bytes() { return _freed_bytes; }
1560 };
1562 class G1ParScrubRemSetTask: public AbstractGangTask {
1563 protected:
1564 G1RemSet* _g1rs;
1565 BitMap* _region_bm;
1566 BitMap* _card_bm;
1567 public:
1568 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1569 BitMap* region_bm, BitMap* card_bm) :
1570 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1571 _region_bm(region_bm), _card_bm(card_bm)
1572 {}
1574 void work(int i) {
1575 if (ParallelGCThreads > 0) {
1576 _g1rs->scrub_par(_region_bm, _card_bm, i,
1577 HeapRegion::ScrubRemSetClaimValue);
1578 } else {
1579 _g1rs->scrub(_region_bm, _card_bm);
1580 }
1581 }
1583 };
1585 G1NoteEndOfConcMarkClosure::
1586 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1587 UncleanRegionList* list,
1588 int worker_num)
1589 : _g1(g1), _worker_num(worker_num),
1590 _max_live_bytes(0), _regions_claimed(0),
1591 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1592 _claimed_region_time(0.0), _max_region_time(0.0),
1593 _unclean_region_list(list)
1594 {}
1596 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1597 // We use a claim value of zero here because all regions
1598 // were claimed with value 1 in the FinalCount task.
1599 r->reset_gc_time_stamp();
1600 if (!r->continuesHumongous()) {
1601 double start = os::elapsedTime();
1602 _regions_claimed++;
1603 r->note_end_of_marking();
1604 _max_live_bytes += r->max_live_bytes();
1605 _g1->free_region_if_totally_empty_work(r,
1606 _freed_bytes,
1607 _cleared_h_regions,
1608 _freed_regions,
1609 _unclean_region_list,
1610 true /*par*/);
1611 double region_time = (os::elapsedTime() - start);
1612 _claimed_region_time += region_time;
1613 if (region_time > _max_region_time) _max_region_time = region_time;
1614 }
1615 return false;
1616 }
1618 void ConcurrentMark::cleanup() {
1619 // world is stopped at this checkpoint
1620 assert(SafepointSynchronize::is_at_safepoint(),
1621 "world should be stopped");
1622 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1624 // If a full collection has happened, we shouldn't do this.
1625 if (has_aborted()) {
1626 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1627 return;
1628 }
1630 if (VerifyDuringGC) {
1631 HandleMark hm; // handle scope
1632 gclog_or_tty->print(" VerifyDuringGC:(before)");
1633 Universe::heap()->prepare_for_verify();
1634 Universe::verify(/* allow dirty */ true,
1635 /* silent */ false,
1636 /* prev marking */ true);
1637 }
1639 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1640 g1p->record_concurrent_mark_cleanup_start();
1642 double start = os::elapsedTime();
1644 // Do counting once more with the world stopped for good measure.
1645 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1646 &_region_bm, &_card_bm);
1647 if (ParallelGCThreads > 0) {
1648 assert(g1h->check_heap_region_claim_values(
1649 HeapRegion::InitialClaimValue),
1650 "sanity check");
1652 int n_workers = g1h->workers()->total_workers();
1653 g1h->set_par_threads(n_workers);
1654 g1h->workers()->run_task(&g1_par_count_task);
1655 g1h->set_par_threads(0);
1657 assert(g1h->check_heap_region_claim_values(
1658 HeapRegion::FinalCountClaimValue),
1659 "sanity check");
1660 } else {
1661 g1_par_count_task.work(0);
1662 }
1664 size_t known_garbage_bytes =
1665 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1666 #if 0
1667 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1668 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1669 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1670 (double) known_garbage_bytes / (double) (1024 * 1024));
1671 #endif // 0
1672 g1p->set_known_garbage_bytes(known_garbage_bytes);
1674 size_t start_used_bytes = g1h->used();
1675 _at_least_one_mark_complete = true;
1676 g1h->set_marking_complete();
1678 double count_end = os::elapsedTime();
1679 double this_final_counting_time = (count_end - start);
1680 if (G1PrintParCleanupStats) {
1681 gclog_or_tty->print_cr("Cleanup:");
1682 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1683 this_final_counting_time*1000.0);
1684 }
1685 _total_counting_time += this_final_counting_time;
1687 // Install newly created mark bitMap as "prev".
1688 swapMarkBitMaps();
1690 g1h->reset_gc_time_stamp();
1692 // Note end of marking in all heap regions.
1693 double note_end_start = os::elapsedTime();
1694 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1695 if (ParallelGCThreads > 0) {
1696 int n_workers = g1h->workers()->total_workers();
1697 g1h->set_par_threads(n_workers);
1698 g1h->workers()->run_task(&g1_par_note_end_task);
1699 g1h->set_par_threads(0);
1701 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1702 "sanity check");
1703 } else {
1704 g1_par_note_end_task.work(0);
1705 }
1706 g1h->set_unclean_regions_coming(true);
1707 double note_end_end = os::elapsedTime();
1708 // Tell the mutators that there might be unclean regions coming...
1709 if (G1PrintParCleanupStats) {
1710 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1711 (note_end_end - note_end_start)*1000.0);
1712 }
1715 // call below, since it affects the metric by which we sort the heap
1716 // regions.
1717 if (G1ScrubRemSets) {
1718 double rs_scrub_start = os::elapsedTime();
1719 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1720 if (ParallelGCThreads > 0) {
1721 int n_workers = g1h->workers()->total_workers();
1722 g1h->set_par_threads(n_workers);
1723 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1724 g1h->set_par_threads(0);
1726 assert(g1h->check_heap_region_claim_values(
1727 HeapRegion::ScrubRemSetClaimValue),
1728 "sanity check");
1729 } else {
1730 g1_par_scrub_rs_task.work(0);
1731 }
1733 double rs_scrub_end = os::elapsedTime();
1734 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1735 _total_rs_scrub_time += this_rs_scrub_time;
1736 }
1738 // this will also free any regions totally full of garbage objects,
1739 // and sort the regions.
1740 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1741 g1_par_note_end_task.freed_bytes(),
1742 g1_par_note_end_task.max_live_bytes());
1744 // Statistics.
1745 double end = os::elapsedTime();
1746 _cleanup_times.add((end - start) * 1000.0);
1748 // G1CollectedHeap::heap()->print();
1749 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1750 // G1CollectedHeap::heap()->get_gc_time_stamp());
1752 if (PrintGC || PrintGCDetails) {
1753 g1h->print_size_transition(gclog_or_tty,
1754 start_used_bytes,
1755 g1h->used(),
1756 g1h->capacity());
1757 }
1759 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1760 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1762 // We need to make this be a "collection" so any collection pause that
1763 // races with it goes around and waits for completeCleanup to finish.
1764 g1h->increment_total_collections();
1766 if (VerifyDuringGC) {
1767 HandleMark hm; // handle scope
1768 gclog_or_tty->print(" VerifyDuringGC:(after)");
1769 Universe::heap()->prepare_for_verify();
1770 Universe::verify(/* allow dirty */ true,
1771 /* silent */ false,
1772 /* prev marking */ true);
1773 }
1774 }
1776 void ConcurrentMark::completeCleanup() {
1777 // A full collection intervened.
1778 if (has_aborted()) return;
1780 int first = 0;
1781 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1782 for (int t = 0; t < last; t++) {
1783 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1784 assert(list->well_formed(), "Inv");
1785 HeapRegion* hd = list->hd();
1786 while (hd != NULL) {
1787 // Now finish up the other stuff.
1788 hd->rem_set()->clear();
1789 HeapRegion* next_hd = hd->next_from_unclean_list();
1790 (void)list->pop();
1791 assert(list->hd() == next_hd, "how not?");
1792 _g1h->put_region_on_unclean_list(hd);
1793 if (!hd->isHumongous()) {
1794 // Add this to the _free_regions count by 1.
1795 _g1h->finish_free_region_work(0, 0, 1, NULL);
1796 }
1797 hd = list->hd();
1798 assert(hd == next_hd, "how not?");
1799 }
1800 }
1801 }
1804 class G1CMIsAliveClosure: public BoolObjectClosure {
1805 G1CollectedHeap* _g1;
1806 public:
1807 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1808 _g1(g1)
1809 {}
1811 void do_object(oop obj) {
1812 assert(false, "not to be invoked");
1813 }
1814 bool do_object_b(oop obj) {
1815 HeapWord* addr = (HeapWord*)obj;
1816 return addr != NULL &&
1817 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1818 }
1819 };
1821 class G1CMKeepAliveClosure: public OopClosure {
1822 G1CollectedHeap* _g1;
1823 ConcurrentMark* _cm;
1824 CMBitMap* _bitMap;
1825 public:
1826 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1827 CMBitMap* bitMap) :
1828 _g1(g1), _cm(cm),
1829 _bitMap(bitMap) {}
1831 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1832 virtual void do_oop( oop* p) { do_oop_work(p); }
1834 template <class T> void do_oop_work(T* p) {
1835 oop thisOop = oopDesc::load_decode_heap_oop(p);
1836 HeapWord* addr = (HeapWord*)thisOop;
1837 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1838 _bitMap->mark(addr);
1839 _cm->mark_stack_push(thisOop);
1840 }
1841 }
1842 };
1844 class G1CMDrainMarkingStackClosure: public VoidClosure {
1845 CMMarkStack* _markStack;
1846 CMBitMap* _bitMap;
1847 G1CMKeepAliveClosure* _oopClosure;
1848 public:
1849 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1850 G1CMKeepAliveClosure* oopClosure) :
1851 _bitMap(bitMap),
1852 _markStack(markStack),
1853 _oopClosure(oopClosure)
1854 {}
1856 void do_void() {
1857 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1858 }
1859 };
1861 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1862 ResourceMark rm;
1863 HandleMark hm;
1864 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1865 ReferenceProcessor* rp = g1h->ref_processor();
1867 // Process weak references.
1868 rp->setup_policy(clear_all_soft_refs);
1869 assert(_markStack.isEmpty(), "mark stack should be empty");
1871 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1872 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1873 G1CMDrainMarkingStackClosure
1874 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1875 &g1KeepAliveClosure);
1877 // XXXYYY Also: copy the parallel ref processing code from CMS.
1878 rp->process_discovered_references(&g1IsAliveClosure,
1879 &g1KeepAliveClosure,
1880 &g1DrainMarkingStackClosure,
1881 NULL);
1882 assert(_markStack.overflow() || _markStack.isEmpty(),
1883 "mark stack should be empty (unless it overflowed)");
1884 if (_markStack.overflow()) {
1885 set_has_overflown();
1886 }
1888 rp->enqueue_discovered_references();
1889 rp->verify_no_references_recorded();
1890 assert(!rp->discovery_enabled(), "should have been disabled");
1892 // Now clean up stale oops in SymbolTable and StringTable
1893 SymbolTable::unlink(&g1IsAliveClosure);
1894 StringTable::unlink(&g1IsAliveClosure);
1895 }
1897 void ConcurrentMark::swapMarkBitMaps() {
1898 CMBitMapRO* temp = _prevMarkBitMap;
1899 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1900 _nextMarkBitMap = (CMBitMap*) temp;
1901 }
1903 class CMRemarkTask: public AbstractGangTask {
1904 private:
1905 ConcurrentMark *_cm;
1907 public:
1908 void work(int worker_i) {
1909 // Since all available tasks are actually started, we should
1910 // only proceed if we're supposed to be actived.
1911 if ((size_t)worker_i < _cm->active_tasks()) {
1912 CMTask* task = _cm->task(worker_i);
1913 task->record_start_time();
1914 do {
1915 task->do_marking_step(1000000000.0 /* something very large */);
1916 } while (task->has_aborted() && !_cm->has_overflown());
1917 // If we overflow, then we do not want to restart. We instead
1918 // want to abort remark and do concurrent marking again.
1919 task->record_end_time();
1920 }
1921 }
1923 CMRemarkTask(ConcurrentMark* cm) :
1924 AbstractGangTask("Par Remark"), _cm(cm) { }
1925 };
1927 void ConcurrentMark::checkpointRootsFinalWork() {
1928 ResourceMark rm;
1929 HandleMark hm;
1930 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1932 g1h->ensure_parsability(false);
1934 if (ParallelGCThreads > 0) {
1935 G1CollectedHeap::StrongRootsScope srs(g1h);
1936 // this is remark, so we'll use up all available threads
1937 int active_workers = ParallelGCThreads;
1938 set_phase(active_workers, false);
1940 CMRemarkTask remarkTask(this);
1941 // We will start all available threads, even if we decide that the
1942 // active_workers will be fewer. The extra ones will just bail out
1943 // immediately.
1944 int n_workers = g1h->workers()->total_workers();
1945 g1h->set_par_threads(n_workers);
1946 g1h->workers()->run_task(&remarkTask);
1947 g1h->set_par_threads(0);
1948 } else {
1949 G1CollectedHeap::StrongRootsScope srs(g1h);
1950 // this is remark, so we'll use up all available threads
1951 int active_workers = 1;
1952 set_phase(active_workers, false);
1954 CMRemarkTask remarkTask(this);
1955 // We will start all available threads, even if we decide that the
1956 // active_workers will be fewer. The extra ones will just bail out
1957 // immediately.
1958 remarkTask.work(0);
1959 }
1960 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1961 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1963 print_stats();
1965 if (!restart_for_overflow())
1966 set_non_marking_state();
1968 #if VERIFY_OBJS_PROCESSED
1969 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1970 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1971 _scan_obj_cl.objs_processed,
1972 ThreadLocalObjQueue::objs_enqueued);
1973 guarantee(_scan_obj_cl.objs_processed ==
1974 ThreadLocalObjQueue::objs_enqueued,
1975 "Different number of objs processed and enqueued.");
1976 }
1977 #endif
1978 }
1980 #ifndef PRODUCT
1982 class ReachablePrinterOopClosure: public OopClosure {
1983 private:
1984 G1CollectedHeap* _g1h;
1985 CMBitMapRO* _bitmap;
1986 outputStream* _out;
1987 bool _use_prev_marking;
1989 public:
1990 ReachablePrinterOopClosure(CMBitMapRO* bitmap,
1991 outputStream* out,
1992 bool use_prev_marking) :
1993 _g1h(G1CollectedHeap::heap()),
1994 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
1996 void do_oop(narrowOop* p) { do_oop_work(p); }
1997 void do_oop( oop* p) { do_oop_work(p); }
1999 template <class T> void do_oop_work(T* p) {
2000 oop obj = oopDesc::load_decode_heap_oop(p);
2001 const char* str = NULL;
2002 const char* str2 = "";
2004 if (!_g1h->is_in_g1_reserved(obj))
2005 str = "outside G1 reserved";
2006 else {
2007 HeapRegion* hr = _g1h->heap_region_containing(obj);
2008 guarantee(hr != NULL, "invariant");
2009 bool over_tams = false;
2010 if (_use_prev_marking) {
2011 over_tams = hr->obj_allocated_since_prev_marking(obj);
2012 } else {
2013 over_tams = hr->obj_allocated_since_next_marking(obj);
2014 }
2016 if (over_tams) {
2017 str = "over TAMS";
2018 if (_bitmap->isMarked((HeapWord*) obj)) {
2019 str2 = " AND MARKED";
2020 }
2021 } else if (_bitmap->isMarked((HeapWord*) obj)) {
2022 str = "marked";
2023 } else {
2024 str = "#### NOT MARKED ####";
2025 }
2026 }
2028 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2029 p, (void*) obj, str, str2);
2030 }
2031 };
2033 class ReachablePrinterClosure: public BitMapClosure {
2034 private:
2035 CMBitMapRO* _bitmap;
2036 outputStream* _out;
2037 bool _use_prev_marking;
2039 public:
2040 ReachablePrinterClosure(CMBitMapRO* bitmap,
2041 outputStream* out,
2042 bool use_prev_marking) :
2043 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2045 bool do_bit(size_t offset) {
2046 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2047 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
2049 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2050 oop(addr)->oop_iterate(&oopCl);
2051 _out->print_cr("");
2053 return true;
2054 }
2055 };
2057 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2058 private:
2059 CMBitMapRO* _bitmap;
2060 outputStream* _out;
2061 bool _use_prev_marking;
2063 public:
2064 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap,
2065 outputStream* out,
2066 bool use_prev_marking) :
2067 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2069 void do_object(oop o) {
2070 ReachablePrinterOopClosure oopCl(_bitmap, _out, _use_prev_marking);
2072 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2073 o->oop_iterate(&oopCl);
2074 _out->print_cr("");
2075 }
2076 };
2078 class RegionReachablePrinterClosure : public HeapRegionClosure {
2079 private:
2080 CMBitMapRO* _bitmap;
2081 outputStream* _out;
2082 bool _use_prev_marking;
2084 public:
2085 bool doHeapRegion(HeapRegion* hr) {
2086 HeapWord* b = hr->bottom();
2087 HeapWord* e = hr->end();
2088 HeapWord* t = hr->top();
2089 HeapWord* p = NULL;
2090 if (_use_prev_marking) {
2091 p = hr->prev_top_at_mark_start();
2092 } else {
2093 p = hr->next_top_at_mark_start();
2094 }
2095 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2096 "TAMS: "PTR_FORMAT, b, e, t, p);
2097 _out->print_cr("");
2099 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out, _use_prev_marking);
2100 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2102 return false;
2103 }
2105 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2106 outputStream* out,
2107 bool use_prev_marking) :
2108 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking) { }
2109 };
2111 void ConcurrentMark::print_reachable(bool use_prev_marking, const char* str) {
2112 gclog_or_tty->print_cr("== Doing reachable object dump... ");
2114 if (G1PrintReachableBaseFile == NULL) {
2115 gclog_or_tty->print_cr(" #### error: no base file defined");
2116 return;
2117 }
2119 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2120 (JVM_MAXPATHLEN - 1)) {
2121 gclog_or_tty->print_cr(" #### error: file name too long");
2122 return;
2123 }
2125 char file_name[JVM_MAXPATHLEN];
2126 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2127 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2129 fileStream fout(file_name);
2130 if (!fout.is_open()) {
2131 gclog_or_tty->print_cr(" #### error: could not open file");
2132 return;
2133 }
2135 outputStream* out = &fout;
2137 CMBitMapRO* bitmap = NULL;
2138 if (use_prev_marking) {
2139 bitmap = _prevMarkBitMap;
2140 } else {
2141 bitmap = _nextMarkBitMap;
2142 }
2144 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2145 out->cr();
2147 RegionReachablePrinterClosure rcl(bitmap, out, use_prev_marking);
2148 out->print_cr("--- ITERATING OVER REGIONS WITH TAMS < TOP");
2149 out->cr();
2150 _g1h->heap_region_iterate(&rcl);
2151 out->cr();
2153 ReachablePrinterClosure cl(bitmap, out, use_prev_marking);
2154 out->print_cr("--- ITERATING OVER MARKED OBJECTS ON THE BITMAP");
2155 out->cr();
2156 bitmap->iterate(&cl);
2157 out->cr();
2159 gclog_or_tty->print_cr(" done");
2160 }
2162 #endif // PRODUCT
2164 // This note is for drainAllSATBBuffers and the code in between.
2165 // In the future we could reuse a task to do this work during an
2166 // evacuation pause (since now tasks are not active and can be claimed
2167 // during an evacuation pause). This was a late change to the code and
2168 // is currently not being taken advantage of.
2170 class CMGlobalObjectClosure : public ObjectClosure {
2171 private:
2172 ConcurrentMark* _cm;
2174 public:
2175 void do_object(oop obj) {
2176 _cm->deal_with_reference(obj);
2177 }
2179 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2180 };
2182 void ConcurrentMark::deal_with_reference(oop obj) {
2183 if (verbose_high())
2184 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2185 (void*) obj);
2188 HeapWord* objAddr = (HeapWord*) obj;
2189 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2190 if (_g1h->is_in_g1_reserved(objAddr)) {
2191 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2192 HeapRegion* hr = _g1h->heap_region_containing(obj);
2193 if (_g1h->is_obj_ill(obj, hr)) {
2194 if (verbose_high())
2195 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2196 "marked", (void*) obj);
2198 // we need to mark it first
2199 if (_nextMarkBitMap->parMark(objAddr)) {
2200 // No OrderAccess:store_load() is needed. It is implicit in the
2201 // CAS done in parMark(objAddr) above
2202 HeapWord* finger = _finger;
2203 if (objAddr < finger) {
2204 if (verbose_high())
2205 gclog_or_tty->print_cr("[global] below the global finger "
2206 "("PTR_FORMAT"), pushing it", finger);
2207 if (!mark_stack_push(obj)) {
2208 if (verbose_low())
2209 gclog_or_tty->print_cr("[global] global stack overflow during "
2210 "deal_with_reference");
2211 }
2212 }
2213 }
2214 }
2215 }
2216 }
2218 void ConcurrentMark::drainAllSATBBuffers() {
2219 CMGlobalObjectClosure oc(this);
2220 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2221 satb_mq_set.set_closure(&oc);
2223 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2224 if (verbose_medium())
2225 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2226 }
2228 // no need to check whether we should do this, as this is only
2229 // called during an evacuation pause
2230 satb_mq_set.iterate_closure_all_threads();
2232 satb_mq_set.set_closure(NULL);
2233 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2234 }
2236 void ConcurrentMark::markPrev(oop p) {
2237 // Note we are overriding the read-only view of the prev map here, via
2238 // the cast.
2239 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2240 }
2242 void ConcurrentMark::clear(oop p) {
2243 assert(p != NULL && p->is_oop(), "expected an oop");
2244 HeapWord* addr = (HeapWord*)p;
2245 assert(addr >= _nextMarkBitMap->startWord() ||
2246 addr < _nextMarkBitMap->endWord(), "in a region");
2248 _nextMarkBitMap->clear(addr);
2249 }
2251 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2252 // Note we are overriding the read-only view of the prev map here, via
2253 // the cast.
2254 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2255 _nextMarkBitMap->clearRange(mr);
2256 }
2258 HeapRegion*
2259 ConcurrentMark::claim_region(int task_num) {
2260 // "checkpoint" the finger
2261 HeapWord* finger = _finger;
2263 // _heap_end will not change underneath our feet; it only changes at
2264 // yield points.
2265 while (finger < _heap_end) {
2266 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2268 // is the gap between reading the finger and doing the CAS too long?
2270 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2271 HeapWord* bottom = curr_region->bottom();
2272 HeapWord* end = curr_region->end();
2273 HeapWord* limit = curr_region->next_top_at_mark_start();
2275 if (verbose_low())
2276 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2277 "["PTR_FORMAT", "PTR_FORMAT"), "
2278 "limit = "PTR_FORMAT,
2279 task_num, curr_region, bottom, end, limit);
2281 HeapWord* res =
2282 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2283 if (res == finger) {
2284 // we succeeded
2286 // notice that _finger == end cannot be guaranteed here since,
2287 // someone else might have moved the finger even further
2288 assert(_finger >= end, "the finger should have moved forward");
2290 if (verbose_low())
2291 gclog_or_tty->print_cr("[%d] we were successful with region = "
2292 PTR_FORMAT, task_num, curr_region);
2294 if (limit > bottom) {
2295 if (verbose_low())
2296 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2297 "returning it ", task_num, curr_region);
2298 return curr_region;
2299 } else {
2300 assert(limit == bottom,
2301 "the region limit should be at bottom");
2302 if (verbose_low())
2303 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2304 "returning NULL", task_num, curr_region);
2305 // we return NULL and the caller should try calling
2306 // claim_region() again.
2307 return NULL;
2308 }
2309 } else {
2310 assert(_finger > finger, "the finger should have moved forward");
2311 if (verbose_low())
2312 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2313 "global finger = "PTR_FORMAT", "
2314 "our finger = "PTR_FORMAT,
2315 task_num, _finger, finger);
2317 // read it again
2318 finger = _finger;
2319 }
2320 }
2322 return NULL;
2323 }
2325 void ConcurrentMark::oops_do(OopClosure* cl) {
2326 if (_markStack.size() > 0 && verbose_low())
2327 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2328 "size = %d", _markStack.size());
2329 // we first iterate over the contents of the mark stack...
2330 _markStack.oops_do(cl);
2332 for (int i = 0; i < (int)_max_task_num; ++i) {
2333 OopTaskQueue* queue = _task_queues->queue((int)i);
2335 if (queue->size() > 0 && verbose_low())
2336 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2337 "size = %d", i, queue->size());
2339 // ...then over the contents of the all the task queues.
2340 queue->oops_do(cl);
2341 }
2343 // finally, invalidate any entries that in the region stack that
2344 // point into the collection set
2345 if (_regionStack.invalidate_entries_into_cset()) {
2346 // otherwise, any gray objects copied during the evacuation pause
2347 // might not be visited.
2348 assert(_should_gray_objects, "invariant");
2349 }
2350 }
2352 void ConcurrentMark::clear_marking_state() {
2353 _markStack.setEmpty();
2354 _markStack.clear_overflow();
2355 _regionStack.setEmpty();
2356 _regionStack.clear_overflow();
2357 clear_has_overflown();
2358 _finger = _heap_start;
2360 for (int i = 0; i < (int)_max_task_num; ++i) {
2361 OopTaskQueue* queue = _task_queues->queue(i);
2362 queue->set_empty();
2363 }
2364 }
2366 void ConcurrentMark::print_stats() {
2367 if (verbose_stats()) {
2368 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2369 for (size_t i = 0; i < _active_tasks; ++i) {
2370 _tasks[i]->print_stats();
2371 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2372 }
2373 }
2374 }
2376 class CSMarkOopClosure: public OopClosure {
2377 friend class CSMarkBitMapClosure;
2379 G1CollectedHeap* _g1h;
2380 CMBitMap* _bm;
2381 ConcurrentMark* _cm;
2382 oop* _ms;
2383 jint* _array_ind_stack;
2384 int _ms_size;
2385 int _ms_ind;
2386 int _array_increment;
2388 bool push(oop obj, int arr_ind = 0) {
2389 if (_ms_ind == _ms_size) {
2390 gclog_or_tty->print_cr("Mark stack is full.");
2391 return false;
2392 }
2393 _ms[_ms_ind] = obj;
2394 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2395 _ms_ind++;
2396 return true;
2397 }
2399 oop pop() {
2400 if (_ms_ind == 0) return NULL;
2401 else {
2402 _ms_ind--;
2403 return _ms[_ms_ind];
2404 }
2405 }
2407 template <class T> bool drain() {
2408 while (_ms_ind > 0) {
2409 oop obj = pop();
2410 assert(obj != NULL, "Since index was non-zero.");
2411 if (obj->is_objArray()) {
2412 jint arr_ind = _array_ind_stack[_ms_ind];
2413 objArrayOop aobj = objArrayOop(obj);
2414 jint len = aobj->length();
2415 jint next_arr_ind = arr_ind + _array_increment;
2416 if (next_arr_ind < len) {
2417 push(obj, next_arr_ind);
2418 }
2419 // Now process this portion of this one.
2420 int lim = MIN2(next_arr_ind, len);
2421 for (int j = arr_ind; j < lim; j++) {
2422 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2423 }
2425 } else {
2426 obj->oop_iterate(this);
2427 }
2428 if (abort()) return false;
2429 }
2430 return true;
2431 }
2433 public:
2434 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2435 _g1h(G1CollectedHeap::heap()),
2436 _cm(cm),
2437 _bm(cm->nextMarkBitMap()),
2438 _ms_size(ms_size), _ms_ind(0),
2439 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2440 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2441 _array_increment(MAX2(ms_size/8, 16))
2442 {}
2444 ~CSMarkOopClosure() {
2445 FREE_C_HEAP_ARRAY(oop, _ms);
2446 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2447 }
2449 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2450 virtual void do_oop( oop* p) { do_oop_work(p); }
2452 template <class T> void do_oop_work(T* p) {
2453 T heap_oop = oopDesc::load_heap_oop(p);
2454 if (oopDesc::is_null(heap_oop)) return;
2455 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2456 if (obj->is_forwarded()) {
2457 // If the object has already been forwarded, we have to make sure
2458 // that it's marked. So follow the forwarding pointer. Note that
2459 // this does the right thing for self-forwarding pointers in the
2460 // evacuation failure case.
2461 obj = obj->forwardee();
2462 }
2463 HeapRegion* hr = _g1h->heap_region_containing(obj);
2464 if (hr != NULL) {
2465 if (hr->in_collection_set()) {
2466 if (_g1h->is_obj_ill(obj)) {
2467 _bm->mark((HeapWord*)obj);
2468 if (!push(obj)) {
2469 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2470 set_abort();
2471 }
2472 }
2473 } else {
2474 // Outside the collection set; we need to gray it
2475 _cm->deal_with_reference(obj);
2476 }
2477 }
2478 }
2479 };
2481 class CSMarkBitMapClosure: public BitMapClosure {
2482 G1CollectedHeap* _g1h;
2483 CMBitMap* _bitMap;
2484 ConcurrentMark* _cm;
2485 CSMarkOopClosure _oop_cl;
2486 public:
2487 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2488 _g1h(G1CollectedHeap::heap()),
2489 _bitMap(cm->nextMarkBitMap()),
2490 _oop_cl(cm, ms_size)
2491 {}
2493 ~CSMarkBitMapClosure() {}
2495 bool do_bit(size_t offset) {
2496 // convert offset into a HeapWord*
2497 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2498 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2499 "address out of range");
2500 assert(_bitMap->isMarked(addr), "tautology");
2501 oop obj = oop(addr);
2502 if (!obj->is_forwarded()) {
2503 if (!_oop_cl.push(obj)) return false;
2504 if (UseCompressedOops) {
2505 if (!_oop_cl.drain<narrowOop>()) return false;
2506 } else {
2507 if (!_oop_cl.drain<oop>()) return false;
2508 }
2509 }
2510 // Otherwise...
2511 return true;
2512 }
2513 };
2516 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2517 CMBitMap* _bm;
2518 CSMarkBitMapClosure _bit_cl;
2519 enum SomePrivateConstants {
2520 MSSize = 1000
2521 };
2522 bool _completed;
2523 public:
2524 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2525 _bm(cm->nextMarkBitMap()),
2526 _bit_cl(cm, MSSize),
2527 _completed(true)
2528 {}
2530 ~CompleteMarkingInCSHRClosure() {}
2532 bool doHeapRegion(HeapRegion* r) {
2533 if (!r->evacuation_failed()) {
2534 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2535 if (!mr.is_empty()) {
2536 if (!_bm->iterate(&_bit_cl, mr)) {
2537 _completed = false;
2538 return true;
2539 }
2540 }
2541 }
2542 return false;
2543 }
2545 bool completed() { return _completed; }
2546 };
2548 class ClearMarksInHRClosure: public HeapRegionClosure {
2549 CMBitMap* _bm;
2550 public:
2551 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2553 bool doHeapRegion(HeapRegion* r) {
2554 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2555 MemRegion usedMR = r->used_region();
2556 _bm->clearRange(r->used_region());
2557 }
2558 return false;
2559 }
2560 };
2562 void ConcurrentMark::complete_marking_in_collection_set() {
2563 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2565 if (!g1h->mark_in_progress()) {
2566 g1h->g1_policy()->record_mark_closure_time(0.0);
2567 return;
2568 }
2570 int i = 1;
2571 double start = os::elapsedTime();
2572 while (true) {
2573 i++;
2574 CompleteMarkingInCSHRClosure cmplt(this);
2575 g1h->collection_set_iterate(&cmplt);
2576 if (cmplt.completed()) break;
2577 }
2578 double end_time = os::elapsedTime();
2579 double elapsed_time_ms = (end_time - start) * 1000.0;
2580 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2581 if (PrintGCDetails) {
2582 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2583 }
2585 ClearMarksInHRClosure clr(nextMarkBitMap());
2586 g1h->collection_set_iterate(&clr);
2587 }
2589 // The next two methods deal with the following optimisation. Some
2590 // objects are gray by being marked and located above the finger. If
2591 // they are copied, during an evacuation pause, below the finger then
2592 // the need to be pushed on the stack. The observation is that, if
2593 // there are no regions in the collection set located above the
2594 // finger, then the above cannot happen, hence we do not need to
2595 // explicitly gray any objects when copying them to below the
2596 // finger. The global stack will be scanned to ensure that, if it
2597 // points to objects being copied, it will update their
2598 // location. There is a tricky situation with the gray objects in
2599 // region stack that are being coped, however. See the comment in
2600 // newCSet().
2602 void ConcurrentMark::newCSet() {
2603 if (!concurrent_marking_in_progress())
2604 // nothing to do if marking is not in progress
2605 return;
2607 // find what the lowest finger is among the global and local fingers
2608 _min_finger = _finger;
2609 for (int i = 0; i < (int)_max_task_num; ++i) {
2610 CMTask* task = _tasks[i];
2611 HeapWord* task_finger = task->finger();
2612 if (task_finger != NULL && task_finger < _min_finger)
2613 _min_finger = task_finger;
2614 }
2616 _should_gray_objects = false;
2618 // This fixes a very subtle and fustrating bug. It might be the case
2619 // that, during en evacuation pause, heap regions that contain
2620 // objects that are gray (by being in regions contained in the
2621 // region stack) are included in the collection set. Since such gray
2622 // objects will be moved, and because it's not easy to redirect
2623 // region stack entries to point to a new location (because objects
2624 // in one region might be scattered to multiple regions after they
2625 // are copied), one option is to ensure that all marked objects
2626 // copied during a pause are pushed on the stack. Notice, however,
2627 // that this problem can only happen when the region stack is not
2628 // empty during an evacuation pause. So, we make the fix a bit less
2629 // conservative and ensure that regions are pushed on the stack,
2630 // irrespective whether all collection set regions are below the
2631 // finger, if the region stack is not empty. This is expected to be
2632 // a rare case, so I don't think it's necessary to be smarted about it.
2633 if (!region_stack_empty())
2634 _should_gray_objects = true;
2635 }
2637 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2638 if (!concurrent_marking_in_progress())
2639 return;
2641 HeapWord* region_end = hr->end();
2642 if (region_end > _min_finger)
2643 _should_gray_objects = true;
2644 }
2646 // abandon current marking iteration due to a Full GC
2647 void ConcurrentMark::abort() {
2648 // Clear all marks to force marking thread to do nothing
2649 _nextMarkBitMap->clearAll();
2650 // Empty mark stack
2651 clear_marking_state();
2652 for (int i = 0; i < (int)_max_task_num; ++i)
2653 _tasks[i]->clear_region_fields();
2654 _has_aborted = true;
2656 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2657 satb_mq_set.abandon_partial_marking();
2658 // This can be called either during or outside marking, we'll read
2659 // the expected_active value from the SATB queue set.
2660 satb_mq_set.set_active_all_threads(
2661 false, /* new active value */
2662 satb_mq_set.is_active() /* expected_active */);
2663 }
2665 static void print_ms_time_info(const char* prefix, const char* name,
2666 NumberSeq& ns) {
2667 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2668 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2669 if (ns.num() > 0) {
2670 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2671 prefix, ns.sd(), ns.maximum());
2672 }
2673 }
2675 void ConcurrentMark::print_summary_info() {
2676 gclog_or_tty->print_cr(" Concurrent marking:");
2677 print_ms_time_info(" ", "init marks", _init_times);
2678 print_ms_time_info(" ", "remarks", _remark_times);
2679 {
2680 print_ms_time_info(" ", "final marks", _remark_mark_times);
2681 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2683 }
2684 print_ms_time_info(" ", "cleanups", _cleanup_times);
2685 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2686 _total_counting_time,
2687 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2688 (double)_cleanup_times.num()
2689 : 0.0));
2690 if (G1ScrubRemSets) {
2691 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2692 _total_rs_scrub_time,
2693 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2694 (double)_cleanup_times.num()
2695 : 0.0));
2696 }
2697 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2698 (_init_times.sum() + _remark_times.sum() +
2699 _cleanup_times.sum())/1000.0);
2700 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2701 "(%8.2f s marking, %8.2f s counting).",
2702 cmThread()->vtime_accum(),
2703 cmThread()->vtime_mark_accum(),
2704 cmThread()->vtime_count_accum());
2705 }
2707 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2708 _parallel_workers->print_worker_threads_on(st);
2709 }
2711 // Closures
2712 // XXX: there seems to be a lot of code duplication here;
2713 // should refactor and consolidate the shared code.
2715 // This closure is used to mark refs into the CMS generation in
2716 // the CMS bit map. Called at the first checkpoint.
2718 // We take a break if someone is trying to stop the world.
2719 bool ConcurrentMark::do_yield_check(int worker_i) {
2720 if (should_yield()) {
2721 if (worker_i == 0)
2722 _g1h->g1_policy()->record_concurrent_pause();
2723 cmThread()->yield();
2724 if (worker_i == 0)
2725 _g1h->g1_policy()->record_concurrent_pause_end();
2726 return true;
2727 } else {
2728 return false;
2729 }
2730 }
2732 bool ConcurrentMark::should_yield() {
2733 return cmThread()->should_yield();
2734 }
2736 bool ConcurrentMark::containing_card_is_marked(void* p) {
2737 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2738 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2739 }
2741 bool ConcurrentMark::containing_cards_are_marked(void* start,
2742 void* last) {
2743 return
2744 containing_card_is_marked(start) &&
2745 containing_card_is_marked(last);
2746 }
2748 #ifndef PRODUCT
2749 // for debugging purposes
2750 void ConcurrentMark::print_finger() {
2751 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2752 _heap_start, _heap_end, _finger);
2753 for (int i = 0; i < (int) _max_task_num; ++i) {
2754 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2755 }
2756 gclog_or_tty->print_cr("");
2757 }
2758 #endif
2760 // Closure for iteration over bitmaps
2761 class CMBitMapClosure : public BitMapClosure {
2762 private:
2763 // the bitmap that is being iterated over
2764 CMBitMap* _nextMarkBitMap;
2765 ConcurrentMark* _cm;
2766 CMTask* _task;
2767 // true if we're scanning a heap region claimed by the task (so that
2768 // we move the finger along), false if we're not, i.e. currently when
2769 // scanning a heap region popped from the region stack (so that we
2770 // do not move the task finger along; it'd be a mistake if we did so).
2771 bool _scanning_heap_region;
2773 public:
2774 CMBitMapClosure(CMTask *task,
2775 ConcurrentMark* cm,
2776 CMBitMap* nextMarkBitMap)
2777 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2779 void set_scanning_heap_region(bool scanning_heap_region) {
2780 _scanning_heap_region = scanning_heap_region;
2781 }
2783 bool do_bit(size_t offset) {
2784 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2785 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2786 assert( addr < _cm->finger(), "invariant");
2788 if (_scanning_heap_region) {
2789 statsOnly( _task->increase_objs_found_on_bitmap() );
2790 assert(addr >= _task->finger(), "invariant");
2791 // We move that task's local finger along.
2792 _task->move_finger_to(addr);
2793 } else {
2794 // We move the task's region finger along.
2795 _task->move_region_finger_to(addr);
2796 }
2798 _task->scan_object(oop(addr));
2799 // we only partially drain the local queue and global stack
2800 _task->drain_local_queue(true);
2801 _task->drain_global_stack(true);
2803 // if the has_aborted flag has been raised, we need to bail out of
2804 // the iteration
2805 return !_task->has_aborted();
2806 }
2807 };
2809 // Closure for iterating over objects, currently only used for
2810 // processing SATB buffers.
2811 class CMObjectClosure : public ObjectClosure {
2812 private:
2813 CMTask* _task;
2815 public:
2816 void do_object(oop obj) {
2817 _task->deal_with_reference(obj);
2818 }
2820 CMObjectClosure(CMTask* task) : _task(task) { }
2821 };
2823 // Closure for iterating over object fields
2824 class CMOopClosure : public OopClosure {
2825 private:
2826 G1CollectedHeap* _g1h;
2827 ConcurrentMark* _cm;
2828 CMTask* _task;
2830 public:
2831 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2832 virtual void do_oop( oop* p) { do_oop_work(p); }
2834 template <class T> void do_oop_work(T* p) {
2835 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2836 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2837 "invariant");
2839 oop obj = oopDesc::load_decode_heap_oop(p);
2840 if (_cm->verbose_high())
2841 gclog_or_tty->print_cr("[%d] we're looking at location "
2842 "*"PTR_FORMAT" = "PTR_FORMAT,
2843 _task->task_id(), p, (void*) obj);
2844 _task->deal_with_reference(obj);
2845 }
2847 CMOopClosure(G1CollectedHeap* g1h,
2848 ConcurrentMark* cm,
2849 CMTask* task)
2850 : _g1h(g1h), _cm(cm), _task(task) { }
2851 };
2853 void CMTask::setup_for_region(HeapRegion* hr) {
2854 // Separated the asserts so that we know which one fires.
2855 assert(hr != NULL,
2856 "claim_region() should have filtered out continues humongous regions");
2857 assert(!hr->continuesHumongous(),
2858 "claim_region() should have filtered out continues humongous regions");
2860 if (_cm->verbose_low())
2861 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2862 _task_id, hr);
2864 _curr_region = hr;
2865 _finger = hr->bottom();
2866 update_region_limit();
2867 }
2869 void CMTask::update_region_limit() {
2870 HeapRegion* hr = _curr_region;
2871 HeapWord* bottom = hr->bottom();
2872 HeapWord* limit = hr->next_top_at_mark_start();
2874 if (limit == bottom) {
2875 if (_cm->verbose_low())
2876 gclog_or_tty->print_cr("[%d] found an empty region "
2877 "["PTR_FORMAT", "PTR_FORMAT")",
2878 _task_id, bottom, limit);
2879 // The region was collected underneath our feet.
2880 // We set the finger to bottom to ensure that the bitmap
2881 // iteration that will follow this will not do anything.
2882 // (this is not a condition that holds when we set the region up,
2883 // as the region is not supposed to be empty in the first place)
2884 _finger = bottom;
2885 } else if (limit >= _region_limit) {
2886 assert(limit >= _finger, "peace of mind");
2887 } else {
2888 assert(limit < _region_limit, "only way to get here");
2889 // This can happen under some pretty unusual circumstances. An
2890 // evacuation pause empties the region underneath our feet (NTAMS
2891 // at bottom). We then do some allocation in the region (NTAMS
2892 // stays at bottom), followed by the region being used as a GC
2893 // alloc region (NTAMS will move to top() and the objects
2894 // originally below it will be grayed). All objects now marked in
2895 // the region are explicitly grayed, if below the global finger,
2896 // and we do not need in fact to scan anything else. So, we simply
2897 // set _finger to be limit to ensure that the bitmap iteration
2898 // doesn't do anything.
2899 _finger = limit;
2900 }
2902 _region_limit = limit;
2903 }
2905 void CMTask::giveup_current_region() {
2906 assert(_curr_region != NULL, "invariant");
2907 if (_cm->verbose_low())
2908 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2909 _task_id, _curr_region);
2910 clear_region_fields();
2911 }
2913 void CMTask::clear_region_fields() {
2914 // Values for these three fields that indicate that we're not
2915 // holding on to a region.
2916 _curr_region = NULL;
2917 _finger = NULL;
2918 _region_limit = NULL;
2920 _region_finger = NULL;
2921 }
2923 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2924 guarantee(nextMarkBitMap != NULL, "invariant");
2926 if (_cm->verbose_low())
2927 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2929 _nextMarkBitMap = nextMarkBitMap;
2930 clear_region_fields();
2932 _calls = 0;
2933 _elapsed_time_ms = 0.0;
2934 _termination_time_ms = 0.0;
2935 _termination_start_time_ms = 0.0;
2937 #if _MARKING_STATS_
2938 _local_pushes = 0;
2939 _local_pops = 0;
2940 _local_max_size = 0;
2941 _objs_scanned = 0;
2942 _global_pushes = 0;
2943 _global_pops = 0;
2944 _global_max_size = 0;
2945 _global_transfers_to = 0;
2946 _global_transfers_from = 0;
2947 _region_stack_pops = 0;
2948 _regions_claimed = 0;
2949 _objs_found_on_bitmap = 0;
2950 _satb_buffers_processed = 0;
2951 _steal_attempts = 0;
2952 _steals = 0;
2953 _aborted = 0;
2954 _aborted_overflow = 0;
2955 _aborted_cm_aborted = 0;
2956 _aborted_yield = 0;
2957 _aborted_timed_out = 0;
2958 _aborted_satb = 0;
2959 _aborted_termination = 0;
2960 #endif // _MARKING_STATS_
2961 }
2963 bool CMTask::should_exit_termination() {
2964 regular_clock_call();
2965 // This is called when we are in the termination protocol. We should
2966 // quit if, for some reason, this task wants to abort or the global
2967 // stack is not empty (this means that we can get work from it).
2968 return !_cm->mark_stack_empty() || has_aborted();
2969 }
2971 // This determines whether the method below will check both the local
2972 // and global fingers when determining whether to push on the stack a
2973 // gray object (value 1) or whether it will only check the global one
2974 // (value 0). The tradeoffs are that the former will be a bit more
2975 // accurate and possibly push less on the stack, but it might also be
2976 // a little bit slower.
2978 #define _CHECK_BOTH_FINGERS_ 1
2980 void CMTask::deal_with_reference(oop obj) {
2981 if (_cm->verbose_high())
2982 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2983 _task_id, (void*) obj);
2985 ++_refs_reached;
2987 HeapWord* objAddr = (HeapWord*) obj;
2988 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2989 if (_g1h->is_in_g1_reserved(objAddr)) {
2990 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2991 HeapRegion* hr = _g1h->heap_region_containing(obj);
2992 if (_g1h->is_obj_ill(obj, hr)) {
2993 if (_cm->verbose_high())
2994 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2995 _task_id, (void*) obj);
2997 // we need to mark it first
2998 if (_nextMarkBitMap->parMark(objAddr)) {
2999 // No OrderAccess:store_load() is needed. It is implicit in the
3000 // CAS done in parMark(objAddr) above
3001 HeapWord* global_finger = _cm->finger();
3003 #if _CHECK_BOTH_FINGERS_
3004 // we will check both the local and global fingers
3006 if (_finger != NULL && objAddr < _finger) {
3007 if (_cm->verbose_high())
3008 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3009 "pushing it", _task_id, _finger);
3010 push(obj);
3011 } else if (_curr_region != NULL && objAddr < _region_limit) {
3012 // do nothing
3013 } else if (objAddr < global_finger) {
3014 // Notice that the global finger might be moving forward
3015 // concurrently. This is not a problem. In the worst case, we
3016 // mark the object while it is above the global finger and, by
3017 // the time we read the global finger, it has moved forward
3018 // passed this object. In this case, the object will probably
3019 // be visited when a task is scanning the region and will also
3020 // be pushed on the stack. So, some duplicate work, but no
3021 // correctness problems.
3023 if (_cm->verbose_high())
3024 gclog_or_tty->print_cr("[%d] below the global finger "
3025 "("PTR_FORMAT"), pushing it",
3026 _task_id, global_finger);
3027 push(obj);
3028 } else {
3029 // do nothing
3030 }
3031 #else // _CHECK_BOTH_FINGERS_
3032 // we will only check the global finger
3034 if (objAddr < global_finger) {
3035 // see long comment above
3037 if (_cm->verbose_high())
3038 gclog_or_tty->print_cr("[%d] below the global finger "
3039 "("PTR_FORMAT"), pushing it",
3040 _task_id, global_finger);
3041 push(obj);
3042 }
3043 #endif // _CHECK_BOTH_FINGERS_
3044 }
3045 }
3046 }
3047 }
3049 void CMTask::push(oop obj) {
3050 HeapWord* objAddr = (HeapWord*) obj;
3051 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3052 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3053 "invariant");
3054 assert(!_g1h->is_obj_ill(obj), "invariant");
3055 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3057 if (_cm->verbose_high())
3058 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3060 if (!_task_queue->push(obj)) {
3061 // The local task queue looks full. We need to push some entries
3062 // to the global stack.
3064 if (_cm->verbose_medium())
3065 gclog_or_tty->print_cr("[%d] task queue overflow, "
3066 "moving entries to the global stack",
3067 _task_id);
3068 move_entries_to_global_stack();
3070 // this should succeed since, even if we overflow the global
3071 // stack, we should have definitely removed some entries from the
3072 // local queue. So, there must be space on it.
3073 bool success = _task_queue->push(obj);
3074 assert(success, "invariant");
3075 }
3077 statsOnly( int tmp_size = _task_queue->size();
3078 if (tmp_size > _local_max_size)
3079 _local_max_size = tmp_size;
3080 ++_local_pushes );
3081 }
3083 void CMTask::reached_limit() {
3084 assert(_words_scanned >= _words_scanned_limit ||
3085 _refs_reached >= _refs_reached_limit ,
3086 "shouldn't have been called otherwise");
3087 regular_clock_call();
3088 }
3090 void CMTask::regular_clock_call() {
3091 if (has_aborted())
3092 return;
3094 // First, we need to recalculate the words scanned and refs reached
3095 // limits for the next clock call.
3096 recalculate_limits();
3098 // During the regular clock call we do the following
3100 // (1) If an overflow has been flagged, then we abort.
3101 if (_cm->has_overflown()) {
3102 set_has_aborted();
3103 return;
3104 }
3106 // If we are not concurrent (i.e. we're doing remark) we don't need
3107 // to check anything else. The other steps are only needed during
3108 // the concurrent marking phase.
3109 if (!concurrent())
3110 return;
3112 // (2) If marking has been aborted for Full GC, then we also abort.
3113 if (_cm->has_aborted()) {
3114 set_has_aborted();
3115 statsOnly( ++_aborted_cm_aborted );
3116 return;
3117 }
3119 double curr_time_ms = os::elapsedVTime() * 1000.0;
3121 // (3) If marking stats are enabled, then we update the step history.
3122 #if _MARKING_STATS_
3123 if (_words_scanned >= _words_scanned_limit)
3124 ++_clock_due_to_scanning;
3125 if (_refs_reached >= _refs_reached_limit)
3126 ++_clock_due_to_marking;
3128 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3129 _interval_start_time_ms = curr_time_ms;
3130 _all_clock_intervals_ms.add(last_interval_ms);
3132 if (_cm->verbose_medium()) {
3133 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3134 "scanned = %d%s, refs reached = %d%s",
3135 _task_id, last_interval_ms,
3136 _words_scanned,
3137 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3138 _refs_reached,
3139 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3140 }
3141 #endif // _MARKING_STATS_
3143 // (4) We check whether we should yield. If we have to, then we abort.
3144 if (_cm->should_yield()) {
3145 // We should yield. To do this we abort the task. The caller is
3146 // responsible for yielding.
3147 set_has_aborted();
3148 statsOnly( ++_aborted_yield );
3149 return;
3150 }
3152 // (5) We check whether we've reached our time quota. If we have,
3153 // then we abort.
3154 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3155 if (elapsed_time_ms > _time_target_ms) {
3156 set_has_aborted();
3157 _has_aborted_timed_out = true;
3158 statsOnly( ++_aborted_timed_out );
3159 return;
3160 }
3162 // (6) Finally, we check whether there are enough completed STAB
3163 // buffers available for processing. If there are, we abort.
3164 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3165 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3166 if (_cm->verbose_low())
3167 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3168 _task_id);
3169 // we do need to process SATB buffers, we'll abort and restart
3170 // the marking task to do so
3171 set_has_aborted();
3172 statsOnly( ++_aborted_satb );
3173 return;
3174 }
3175 }
3177 void CMTask::recalculate_limits() {
3178 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3179 _words_scanned_limit = _real_words_scanned_limit;
3181 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3182 _refs_reached_limit = _real_refs_reached_limit;
3183 }
3185 void CMTask::decrease_limits() {
3186 // This is called when we believe that we're going to do an infrequent
3187 // operation which will increase the per byte scanned cost (i.e. move
3188 // entries to/from the global stack). It basically tries to decrease the
3189 // scanning limit so that the clock is called earlier.
3191 if (_cm->verbose_medium())
3192 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3194 _words_scanned_limit = _real_words_scanned_limit -
3195 3 * words_scanned_period / 4;
3196 _refs_reached_limit = _real_refs_reached_limit -
3197 3 * refs_reached_period / 4;
3198 }
3200 void CMTask::move_entries_to_global_stack() {
3201 // local array where we'll store the entries that will be popped
3202 // from the local queue
3203 oop buffer[global_stack_transfer_size];
3205 int n = 0;
3206 oop obj;
3207 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3208 buffer[n] = obj;
3209 ++n;
3210 }
3212 if (n > 0) {
3213 // we popped at least one entry from the local queue
3215 statsOnly( ++_global_transfers_to; _local_pops += n );
3217 if (!_cm->mark_stack_push(buffer, n)) {
3218 if (_cm->verbose_low())
3219 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3220 set_has_aborted();
3221 } else {
3222 // the transfer was successful
3224 if (_cm->verbose_medium())
3225 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3226 _task_id, n);
3227 statsOnly( int tmp_size = _cm->mark_stack_size();
3228 if (tmp_size > _global_max_size)
3229 _global_max_size = tmp_size;
3230 _global_pushes += n );
3231 }
3232 }
3234 // this operation was quite expensive, so decrease the limits
3235 decrease_limits();
3236 }
3238 void CMTask::get_entries_from_global_stack() {
3239 // local array where we'll store the entries that will be popped
3240 // from the global stack.
3241 oop buffer[global_stack_transfer_size];
3242 int n;
3243 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3244 assert(n <= global_stack_transfer_size,
3245 "we should not pop more than the given limit");
3246 if (n > 0) {
3247 // yes, we did actually pop at least one entry
3249 statsOnly( ++_global_transfers_from; _global_pops += n );
3250 if (_cm->verbose_medium())
3251 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3252 _task_id, n);
3253 for (int i = 0; i < n; ++i) {
3254 bool success = _task_queue->push(buffer[i]);
3255 // We only call this when the local queue is empty or under a
3256 // given target limit. So, we do not expect this push to fail.
3257 assert(success, "invariant");
3258 }
3260 statsOnly( int tmp_size = _task_queue->size();
3261 if (tmp_size > _local_max_size)
3262 _local_max_size = tmp_size;
3263 _local_pushes += n );
3264 }
3266 // this operation was quite expensive, so decrease the limits
3267 decrease_limits();
3268 }
3270 void CMTask::drain_local_queue(bool partially) {
3271 if (has_aborted())
3272 return;
3274 // Decide what the target size is, depending whether we're going to
3275 // drain it partially (so that other tasks can steal if they run out
3276 // of things to do) or totally (at the very end).
3277 size_t target_size;
3278 if (partially)
3279 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3280 else
3281 target_size = 0;
3283 if (_task_queue->size() > target_size) {
3284 if (_cm->verbose_high())
3285 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3286 _task_id, target_size);
3288 oop obj;
3289 bool ret = _task_queue->pop_local(obj);
3290 while (ret) {
3291 statsOnly( ++_local_pops );
3293 if (_cm->verbose_high())
3294 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3295 (void*) obj);
3297 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3298 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3299 "invariant");
3301 scan_object(obj);
3303 if (_task_queue->size() <= target_size || has_aborted())
3304 ret = false;
3305 else
3306 ret = _task_queue->pop_local(obj);
3307 }
3309 if (_cm->verbose_high())
3310 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3311 _task_id, _task_queue->size());
3312 }
3313 }
3315 void CMTask::drain_global_stack(bool partially) {
3316 if (has_aborted())
3317 return;
3319 // We have a policy to drain the local queue before we attempt to
3320 // drain the global stack.
3321 assert(partially || _task_queue->size() == 0, "invariant");
3323 // Decide what the target size is, depending whether we're going to
3324 // drain it partially (so that other tasks can steal if they run out
3325 // of things to do) or totally (at the very end). Notice that,
3326 // because we move entries from the global stack in chunks or
3327 // because another task might be doing the same, we might in fact
3328 // drop below the target. But, this is not a problem.
3329 size_t target_size;
3330 if (partially)
3331 target_size = _cm->partial_mark_stack_size_target();
3332 else
3333 target_size = 0;
3335 if (_cm->mark_stack_size() > target_size) {
3336 if (_cm->verbose_low())
3337 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3338 _task_id, target_size);
3340 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3341 get_entries_from_global_stack();
3342 drain_local_queue(partially);
3343 }
3345 if (_cm->verbose_low())
3346 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3347 _task_id, _cm->mark_stack_size());
3348 }
3349 }
3351 // SATB Queue has several assumptions on whether to call the par or
3352 // non-par versions of the methods. this is why some of the code is
3353 // replicated. We should really get rid of the single-threaded version
3354 // of the code to simplify things.
3355 void CMTask::drain_satb_buffers() {
3356 if (has_aborted())
3357 return;
3359 // We set this so that the regular clock knows that we're in the
3360 // middle of draining buffers and doesn't set the abort flag when it
3361 // notices that SATB buffers are available for draining. It'd be
3362 // very counter productive if it did that. :-)
3363 _draining_satb_buffers = true;
3365 CMObjectClosure oc(this);
3366 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3367 if (ParallelGCThreads > 0)
3368 satb_mq_set.set_par_closure(_task_id, &oc);
3369 else
3370 satb_mq_set.set_closure(&oc);
3372 // This keeps claiming and applying the closure to completed buffers
3373 // until we run out of buffers or we need to abort.
3374 if (ParallelGCThreads > 0) {
3375 while (!has_aborted() &&
3376 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3377 if (_cm->verbose_medium())
3378 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3379 statsOnly( ++_satb_buffers_processed );
3380 regular_clock_call();
3381 }
3382 } else {
3383 while (!has_aborted() &&
3384 satb_mq_set.apply_closure_to_completed_buffer()) {
3385 if (_cm->verbose_medium())
3386 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3387 statsOnly( ++_satb_buffers_processed );
3388 regular_clock_call();
3389 }
3390 }
3392 if (!concurrent() && !has_aborted()) {
3393 // We should only do this during remark.
3394 if (ParallelGCThreads > 0)
3395 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3396 else
3397 satb_mq_set.iterate_closure_all_threads();
3398 }
3400 _draining_satb_buffers = false;
3402 assert(has_aborted() ||
3403 concurrent() ||
3404 satb_mq_set.completed_buffers_num() == 0, "invariant");
3406 if (ParallelGCThreads > 0)
3407 satb_mq_set.set_par_closure(_task_id, NULL);
3408 else
3409 satb_mq_set.set_closure(NULL);
3411 // again, this was a potentially expensive operation, decrease the
3412 // limits to get the regular clock call early
3413 decrease_limits();
3414 }
3416 void CMTask::drain_region_stack(BitMapClosure* bc) {
3417 if (has_aborted())
3418 return;
3420 assert(_region_finger == NULL,
3421 "it should be NULL when we're not scanning a region");
3423 if (!_cm->region_stack_empty()) {
3424 if (_cm->verbose_low())
3425 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3426 _task_id, _cm->region_stack_size());
3428 MemRegion mr = _cm->region_stack_pop_with_lock();
3429 // it returns MemRegion() if the pop fails
3430 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3432 while (mr.start() != NULL) {
3433 if (_cm->verbose_medium())
3434 gclog_or_tty->print_cr("[%d] we are scanning region "
3435 "["PTR_FORMAT", "PTR_FORMAT")",
3436 _task_id, mr.start(), mr.end());
3437 assert(mr.end() <= _cm->finger(),
3438 "otherwise the region shouldn't be on the stack");
3439 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3440 if (_nextMarkBitMap->iterate(bc, mr)) {
3441 assert(!has_aborted(),
3442 "cannot abort the task without aborting the bitmap iteration");
3444 // We finished iterating over the region without aborting.
3445 regular_clock_call();
3446 if (has_aborted())
3447 mr = MemRegion();
3448 else {
3449 mr = _cm->region_stack_pop_with_lock();
3450 // it returns MemRegion() if the pop fails
3451 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3452 }
3453 } else {
3454 assert(has_aborted(), "currently the only way to do so");
3456 // The only way to abort the bitmap iteration is to return
3457 // false from the do_bit() method. However, inside the
3458 // do_bit() method we move the _region_finger to point to the
3459 // object currently being looked at. So, if we bail out, we
3460 // have definitely set _region_finger to something non-null.
3461 assert(_region_finger != NULL, "invariant");
3463 // The iteration was actually aborted. So now _region_finger
3464 // points to the address of the object we last scanned. If we
3465 // leave it there, when we restart this task, we will rescan
3466 // the object. It is easy to avoid this. We move the finger by
3467 // enough to point to the next possible object header (the
3468 // bitmap knows by how much we need to move it as it knows its
3469 // granularity).
3470 MemRegion newRegion =
3471 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3473 if (!newRegion.is_empty()) {
3474 if (_cm->verbose_low()) {
3475 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3476 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3477 _task_id,
3478 newRegion.start(), newRegion.end());
3479 }
3480 // Now push the part of the region we didn't scan on the
3481 // region stack to make sure a task scans it later.
3482 _cm->region_stack_push_with_lock(newRegion);
3483 }
3484 // break from while
3485 mr = MemRegion();
3486 }
3487 _region_finger = NULL;
3488 }
3490 if (_cm->verbose_low())
3491 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3492 _task_id, _cm->region_stack_size());
3493 }
3494 }
3496 void CMTask::print_stats() {
3497 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3498 _task_id, _calls);
3499 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3500 _elapsed_time_ms, _termination_time_ms);
3501 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3502 _step_times_ms.num(), _step_times_ms.avg(),
3503 _step_times_ms.sd());
3504 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3505 _step_times_ms.maximum(), _step_times_ms.sum());
3507 #if _MARKING_STATS_
3508 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3509 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3510 _all_clock_intervals_ms.sd());
3511 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3512 _all_clock_intervals_ms.maximum(),
3513 _all_clock_intervals_ms.sum());
3514 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3515 _clock_due_to_scanning, _clock_due_to_marking);
3516 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3517 _objs_scanned, _objs_found_on_bitmap);
3518 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3519 _local_pushes, _local_pops, _local_max_size);
3520 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3521 _global_pushes, _global_pops, _global_max_size);
3522 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3523 _global_transfers_to,_global_transfers_from);
3524 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3525 _regions_claimed, _region_stack_pops);
3526 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3527 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3528 _steal_attempts, _steals);
3529 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3530 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3531 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3532 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3533 _aborted_timed_out, _aborted_satb, _aborted_termination);
3534 #endif // _MARKING_STATS_
3535 }
3537 /*****************************************************************************
3539 The do_marking_step(time_target_ms) method is the building block
3540 of the parallel marking framework. It can be called in parallel
3541 with other invocations of do_marking_step() on different tasks
3542 (but only one per task, obviously) and concurrently with the
3543 mutator threads, or during remark, hence it eliminates the need
3544 for two versions of the code. When called during remark, it will
3545 pick up from where the task left off during the concurrent marking
3546 phase. Interestingly, tasks are also claimable during evacuation
3547 pauses too, since do_marking_step() ensures that it aborts before
3548 it needs to yield.
3550 The data structures that is uses to do marking work are the
3551 following:
3553 (1) Marking Bitmap. If there are gray objects that appear only
3554 on the bitmap (this happens either when dealing with an overflow
3555 or when the initial marking phase has simply marked the roots
3556 and didn't push them on the stack), then tasks claim heap
3557 regions whose bitmap they then scan to find gray objects. A
3558 global finger indicates where the end of the last claimed region
3559 is. A local finger indicates how far into the region a task has
3560 scanned. The two fingers are used to determine how to gray an
3561 object (i.e. whether simply marking it is OK, as it will be
3562 visited by a task in the future, or whether it needs to be also
3563 pushed on a stack).
3565 (2) Local Queue. The local queue of the task which is accessed
3566 reasonably efficiently by the task. Other tasks can steal from
3567 it when they run out of work. Throughout the marking phase, a
3568 task attempts to keep its local queue short but not totally
3569 empty, so that entries are available for stealing by other
3570 tasks. Only when there is no more work, a task will totally
3571 drain its local queue.
3573 (3) Global Mark Stack. This handles local queue overflow. During
3574 marking only sets of entries are moved between it and the local
3575 queues, as access to it requires a mutex and more fine-grain
3576 interaction with it which might cause contention. If it
3577 overflows, then the marking phase should restart and iterate
3578 over the bitmap to identify gray objects. Throughout the marking
3579 phase, tasks attempt to keep the global mark stack at a small
3580 length but not totally empty, so that entries are available for
3581 popping by other tasks. Only when there is no more work, tasks
3582 will totally drain the global mark stack.
3584 (4) Global Region Stack. Entries on it correspond to areas of
3585 the bitmap that need to be scanned since they contain gray
3586 objects. Pushes on the region stack only happen during
3587 evacuation pauses and typically correspond to areas covered by
3588 GC LABS. If it overflows, then the marking phase should restart
3589 and iterate over the bitmap to identify gray objects. Tasks will
3590 try to totally drain the region stack as soon as possible.
3592 (5) SATB Buffer Queue. This is where completed SATB buffers are
3593 made available. Buffers are regularly removed from this queue
3594 and scanned for roots, so that the queue doesn't get too
3595 long. During remark, all completed buffers are processed, as
3596 well as the filled in parts of any uncompleted buffers.
3598 The do_marking_step() method tries to abort when the time target
3599 has been reached. There are a few other cases when the
3600 do_marking_step() method also aborts:
3602 (1) When the marking phase has been aborted (after a Full GC).
3604 (2) When a global overflow (either on the global stack or the
3605 region stack) has been triggered. Before the task aborts, it
3606 will actually sync up with the other tasks to ensure that all
3607 the marking data structures (local queues, stacks, fingers etc.)
3608 are re-initialised so that when do_marking_step() completes,
3609 the marking phase can immediately restart.
3611 (3) When enough completed SATB buffers are available. The
3612 do_marking_step() method only tries to drain SATB buffers right
3613 at the beginning. So, if enough buffers are available, the
3614 marking step aborts and the SATB buffers are processed at
3615 the beginning of the next invocation.
3617 (4) To yield. when we have to yield then we abort and yield
3618 right at the end of do_marking_step(). This saves us from a lot
3619 of hassle as, by yielding we might allow a Full GC. If this
3620 happens then objects will be compacted underneath our feet, the
3621 heap might shrink, etc. We save checking for this by just
3622 aborting and doing the yield right at the end.
3624 From the above it follows that the do_marking_step() method should
3625 be called in a loop (or, otherwise, regularly) until it completes.
3627 If a marking step completes without its has_aborted() flag being
3628 true, it means it has completed the current marking phase (and
3629 also all other marking tasks have done so and have all synced up).
3631 A method called regular_clock_call() is invoked "regularly" (in
3632 sub ms intervals) throughout marking. It is this clock method that
3633 checks all the abort conditions which were mentioned above and
3634 decides when the task should abort. A work-based scheme is used to
3635 trigger this clock method: when the number of object words the
3636 marking phase has scanned or the number of references the marking
3637 phase has visited reach a given limit. Additional invocations to
3638 the method clock have been planted in a few other strategic places
3639 too. The initial reason for the clock method was to avoid calling
3640 vtime too regularly, as it is quite expensive. So, once it was in
3641 place, it was natural to piggy-back all the other conditions on it
3642 too and not constantly check them throughout the code.
3644 *****************************************************************************/
3646 void CMTask::do_marking_step(double time_target_ms) {
3647 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3648 assert(concurrent() == _cm->concurrent(), "they should be the same");
3650 assert(concurrent() || _cm->region_stack_empty(),
3651 "the region stack should have been cleared before remark");
3652 assert(_region_finger == NULL,
3653 "this should be non-null only when a region is being scanned");
3655 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3656 assert(_task_queues != NULL, "invariant");
3657 assert(_task_queue != NULL, "invariant");
3658 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3660 assert(!_claimed,
3661 "only one thread should claim this task at any one time");
3663 // OK, this doesn't safeguard again all possible scenarios, as it is
3664 // possible for two threads to set the _claimed flag at the same
3665 // time. But it is only for debugging purposes anyway and it will
3666 // catch most problems.
3667 _claimed = true;
3669 _start_time_ms = os::elapsedVTime() * 1000.0;
3670 statsOnly( _interval_start_time_ms = _start_time_ms );
3672 double diff_prediction_ms =
3673 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3674 _time_target_ms = time_target_ms - diff_prediction_ms;
3676 // set up the variables that are used in the work-based scheme to
3677 // call the regular clock method
3678 _words_scanned = 0;
3679 _refs_reached = 0;
3680 recalculate_limits();
3682 // clear all flags
3683 clear_has_aborted();
3684 _has_aborted_timed_out = false;
3685 _draining_satb_buffers = false;
3687 ++_calls;
3689 if (_cm->verbose_low())
3690 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3691 "target = %1.2lfms >>>>>>>>>>",
3692 _task_id, _calls, _time_target_ms);
3694 // Set up the bitmap and oop closures. Anything that uses them is
3695 // eventually called from this method, so it is OK to allocate these
3696 // statically.
3697 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3698 CMOopClosure oop_closure(_g1h, _cm, this);
3699 set_oop_closure(&oop_closure);
3701 if (_cm->has_overflown()) {
3702 // This can happen if the region stack or the mark stack overflows
3703 // during a GC pause and this task, after a yield point,
3704 // restarts. We have to abort as we need to get into the overflow
3705 // protocol which happens right at the end of this task.
3706 set_has_aborted();
3707 }
3709 // First drain any available SATB buffers. After this, we will not
3710 // look at SATB buffers before the next invocation of this method.
3711 // If enough completed SATB buffers are queued up, the regular clock
3712 // will abort this task so that it restarts.
3713 drain_satb_buffers();
3714 // ...then partially drain the local queue and the global stack
3715 drain_local_queue(true);
3716 drain_global_stack(true);
3718 // Then totally drain the region stack. We will not look at
3719 // it again before the next invocation of this method. Entries on
3720 // the region stack are only added during evacuation pauses, for
3721 // which we have to yield. When we do, we abort the task anyway so
3722 // it will look at the region stack again when it restarts.
3723 bitmap_closure.set_scanning_heap_region(false);
3724 drain_region_stack(&bitmap_closure);
3725 // ...then partially drain the local queue and the global stack
3726 drain_local_queue(true);
3727 drain_global_stack(true);
3729 do {
3730 if (!has_aborted() && _curr_region != NULL) {
3731 // This means that we're already holding on to a region.
3732 assert(_finger != NULL, "if region is not NULL, then the finger "
3733 "should not be NULL either");
3735 // We might have restarted this task after an evacuation pause
3736 // which might have evacuated the region we're holding on to
3737 // underneath our feet. Let's read its limit again to make sure
3738 // that we do not iterate over a region of the heap that
3739 // contains garbage (update_region_limit() will also move
3740 // _finger to the start of the region if it is found empty).
3741 update_region_limit();
3742 // We will start from _finger not from the start of the region,
3743 // as we might be restarting this task after aborting half-way
3744 // through scanning this region. In this case, _finger points to
3745 // the address where we last found a marked object. If this is a
3746 // fresh region, _finger points to start().
3747 MemRegion mr = MemRegion(_finger, _region_limit);
3749 if (_cm->verbose_low())
3750 gclog_or_tty->print_cr("[%d] we're scanning part "
3751 "["PTR_FORMAT", "PTR_FORMAT") "
3752 "of region "PTR_FORMAT,
3753 _task_id, _finger, _region_limit, _curr_region);
3755 // Let's iterate over the bitmap of the part of the
3756 // region that is left.
3757 bitmap_closure.set_scanning_heap_region(true);
3758 if (mr.is_empty() ||
3759 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3760 // We successfully completed iterating over the region. Now,
3761 // let's give up the region.
3762 giveup_current_region();
3763 regular_clock_call();
3764 } else {
3765 assert(has_aborted(), "currently the only way to do so");
3766 // The only way to abort the bitmap iteration is to return
3767 // false from the do_bit() method. However, inside the
3768 // do_bit() method we move the _finger to point to the
3769 // object currently being looked at. So, if we bail out, we
3770 // have definitely set _finger to something non-null.
3771 assert(_finger != NULL, "invariant");
3773 // Region iteration was actually aborted. So now _finger
3774 // points to the address of the object we last scanned. If we
3775 // leave it there, when we restart this task, we will rescan
3776 // the object. It is easy to avoid this. We move the finger by
3777 // enough to point to the next possible object header (the
3778 // bitmap knows by how much we need to move it as it knows its
3779 // granularity).
3780 assert(_finger < _region_limit, "invariant");
3781 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3782 // Check if bitmap iteration was aborted while scanning the last object
3783 if (new_finger >= _region_limit) {
3784 giveup_current_region();
3785 } else {
3786 move_finger_to(new_finger);
3787 }
3788 }
3789 }
3790 // At this point we have either completed iterating over the
3791 // region we were holding on to, or we have aborted.
3793 // We then partially drain the local queue and the global stack.
3794 // (Do we really need this?)
3795 drain_local_queue(true);
3796 drain_global_stack(true);
3798 // Read the note on the claim_region() method on why it might
3799 // return NULL with potentially more regions available for
3800 // claiming and why we have to check out_of_regions() to determine
3801 // whether we're done or not.
3802 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3803 // We are going to try to claim a new region. We should have
3804 // given up on the previous one.
3805 // Separated the asserts so that we know which one fires.
3806 assert(_curr_region == NULL, "invariant");
3807 assert(_finger == NULL, "invariant");
3808 assert(_region_limit == NULL, "invariant");
3809 if (_cm->verbose_low())
3810 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3811 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3812 if (claimed_region != NULL) {
3813 // Yes, we managed to claim one
3814 statsOnly( ++_regions_claimed );
3816 if (_cm->verbose_low())
3817 gclog_or_tty->print_cr("[%d] we successfully claimed "
3818 "region "PTR_FORMAT,
3819 _task_id, claimed_region);
3821 setup_for_region(claimed_region);
3822 assert(_curr_region == claimed_region, "invariant");
3823 }
3824 // It is important to call the regular clock here. It might take
3825 // a while to claim a region if, for example, we hit a large
3826 // block of empty regions. So we need to call the regular clock
3827 // method once round the loop to make sure it's called
3828 // frequently enough.
3829 regular_clock_call();
3830 }
3832 if (!has_aborted() && _curr_region == NULL) {
3833 assert(_cm->out_of_regions(),
3834 "at this point we should be out of regions");
3835 }
3836 } while ( _curr_region != NULL && !has_aborted());
3838 if (!has_aborted()) {
3839 // We cannot check whether the global stack is empty, since other
3840 // tasks might be pushing objects to it concurrently. We also cannot
3841 // check if the region stack is empty because if a thread is aborting
3842 // it can push a partially done region back.
3843 assert(_cm->out_of_regions(),
3844 "at this point we should be out of regions");
3846 if (_cm->verbose_low())
3847 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3849 // Try to reduce the number of available SATB buffers so that
3850 // remark has less work to do.
3851 drain_satb_buffers();
3852 }
3854 // Since we've done everything else, we can now totally drain the
3855 // local queue and global stack.
3856 drain_local_queue(false);
3857 drain_global_stack(false);
3859 // Attempt at work stealing from other task's queues.
3860 if (!has_aborted()) {
3861 // We have not aborted. This means that we have finished all that
3862 // we could. Let's try to do some stealing...
3864 // We cannot check whether the global stack is empty, since other
3865 // tasks might be pushing objects to it concurrently. We also cannot
3866 // check if the region stack is empty because if a thread is aborting
3867 // it can push a partially done region back.
3868 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3869 "only way to reach here");
3871 if (_cm->verbose_low())
3872 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3874 while (!has_aborted()) {
3875 oop obj;
3876 statsOnly( ++_steal_attempts );
3878 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3879 if (_cm->verbose_medium())
3880 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3881 _task_id, (void*) obj);
3883 statsOnly( ++_steals );
3885 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3886 "any stolen object should be marked");
3887 scan_object(obj);
3889 // And since we're towards the end, let's totally drain the
3890 // local queue and global stack.
3891 drain_local_queue(false);
3892 drain_global_stack(false);
3893 } else {
3894 break;
3895 }
3896 }
3897 }
3899 // We still haven't aborted. Now, let's try to get into the
3900 // termination protocol.
3901 if (!has_aborted()) {
3902 // We cannot check whether the global stack is empty, since other
3903 // tasks might be concurrently pushing objects on it. We also cannot
3904 // check if the region stack is empty because if a thread is aborting
3905 // it can push a partially done region back.
3906 // Separated the asserts so that we know which one fires.
3907 assert(_cm->out_of_regions(), "only way to reach here");
3908 assert(_task_queue->size() == 0, "only way to reach here");
3910 if (_cm->verbose_low())
3911 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3913 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3914 // The CMTask class also extends the TerminatorTerminator class,
3915 // hence its should_exit_termination() method will also decide
3916 // whether to exit the termination protocol or not.
3917 bool finished = _cm->terminator()->offer_termination(this);
3918 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3919 _termination_time_ms +=
3920 termination_end_time_ms - _termination_start_time_ms;
3922 if (finished) {
3923 // We're all done.
3925 if (_task_id == 0) {
3926 // let's allow task 0 to do this
3927 if (concurrent()) {
3928 assert(_cm->concurrent_marking_in_progress(), "invariant");
3929 // we need to set this to false before the next
3930 // safepoint. This way we ensure that the marking phase
3931 // doesn't observe any more heap expansions.
3932 _cm->clear_concurrent_marking_in_progress();
3933 }
3934 }
3936 // We can now guarantee that the global stack is empty, since
3937 // all other tasks have finished. We separated the guarantees so
3938 // that, if a condition is false, we can immediately find out
3939 // which one.
3940 guarantee(_cm->out_of_regions(), "only way to reach here");
3941 guarantee(_cm->region_stack_empty(), "only way to reach here");
3942 guarantee(_cm->mark_stack_empty(), "only way to reach here");
3943 guarantee(_task_queue->size() == 0, "only way to reach here");
3944 guarantee(!_cm->has_overflown(), "only way to reach here");
3945 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3946 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3948 if (_cm->verbose_low())
3949 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3950 } else {
3951 // Apparently there's more work to do. Let's abort this task. It
3952 // will restart it and we can hopefully find more things to do.
3954 if (_cm->verbose_low())
3955 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3957 set_has_aborted();
3958 statsOnly( ++_aborted_termination );
3959 }
3960 }
3962 // Mainly for debugging purposes to make sure that a pointer to the
3963 // closure which was statically allocated in this frame doesn't
3964 // escape it by accident.
3965 set_oop_closure(NULL);
3966 double end_time_ms = os::elapsedVTime() * 1000.0;
3967 double elapsed_time_ms = end_time_ms - _start_time_ms;
3968 // Update the step history.
3969 _step_times_ms.add(elapsed_time_ms);
3971 if (has_aborted()) {
3972 // The task was aborted for some reason.
3974 statsOnly( ++_aborted );
3976 if (_has_aborted_timed_out) {
3977 double diff_ms = elapsed_time_ms - _time_target_ms;
3978 // Keep statistics of how well we did with respect to hitting
3979 // our target only if we actually timed out (if we aborted for
3980 // other reasons, then the results might get skewed).
3981 _marking_step_diffs_ms.add(diff_ms);
3982 }
3984 if (_cm->has_overflown()) {
3985 // This is the interesting one. We aborted because a global
3986 // overflow was raised. This means we have to restart the
3987 // marking phase and start iterating over regions. However, in
3988 // order to do this we have to make sure that all tasks stop
3989 // what they are doing and re-initialise in a safe manner. We
3990 // will achieve this with the use of two barrier sync points.
3992 if (_cm->verbose_low())
3993 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3995 _cm->enter_first_sync_barrier(_task_id);
3996 // When we exit this sync barrier we know that all tasks have
3997 // stopped doing marking work. So, it's now safe to
3998 // re-initialise our data structures. At the end of this method,
3999 // task 0 will clear the global data structures.
4001 statsOnly( ++_aborted_overflow );
4003 // We clear the local state of this task...
4004 clear_region_fields();
4006 // ...and enter the second barrier.
4007 _cm->enter_second_sync_barrier(_task_id);
4008 // At this point everything has bee re-initialised and we're
4009 // ready to restart.
4010 }
4012 if (_cm->verbose_low()) {
4013 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4014 "elapsed = %1.2lfms <<<<<<<<<<",
4015 _task_id, _time_target_ms, elapsed_time_ms);
4016 if (_cm->has_aborted())
4017 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4018 _task_id);
4019 }
4020 } else {
4021 if (_cm->verbose_low())
4022 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4023 "elapsed = %1.2lfms <<<<<<<<<<",
4024 _task_id, _time_target_ms, elapsed_time_ms);
4025 }
4027 _claimed = false;
4028 }
4030 CMTask::CMTask(int task_id,
4031 ConcurrentMark* cm,
4032 CMTaskQueue* task_queue,
4033 CMTaskQueueSet* task_queues)
4034 : _g1h(G1CollectedHeap::heap()),
4035 _task_id(task_id), _cm(cm),
4036 _claimed(false),
4037 _nextMarkBitMap(NULL), _hash_seed(17),
4038 _task_queue(task_queue),
4039 _task_queues(task_queues),
4040 _oop_closure(NULL) {
4041 guarantee(task_queue != NULL, "invariant");
4042 guarantee(task_queues != NULL, "invariant");
4044 statsOnly( _clock_due_to_scanning = 0;
4045 _clock_due_to_marking = 0 );
4047 _marking_step_diffs_ms.add(0.5);
4048 }