Mon, 02 Aug 2010 12:51:43 -0700
6814437: G1: remove the _new_refs array
Summary: The per-worker _new_refs array is used to hold references that point into the collection set. It is populated during RSet updating and subsequently processed. In the event of an evacuation failure it processed again to recreate the RSets of regions in the collection set. Remove the per-worker _new_refs array by processing the references directly. Use a DirtyCardQueue to hold the cards containing the references so that the RSets of regions in the collection set can be recreated when handling an evacuation failure.
Reviewed-by: iveresov, jmasa, tonyp
1 /*
2 * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 // Currently we do not call this at all. Normally we would call it
301 // during the concurrent marking / remark phases but we now call
302 // the lock-based version instead. But we might want to resurrect this
303 // code in the future. So, we'll leave it here commented out.
304 #if 0
305 MemRegion CMRegionStack::pop() {
306 while (true) {
307 // Otherwise...
308 jint index = _index;
310 if (index == 0) {
311 return MemRegion();
312 }
313 jint next_index = index-1;
314 jint res = Atomic::cmpxchg(next_index, &_index, index);
315 if (res == index) {
316 MemRegion mr = _base[next_index];
317 if (mr.start() != NULL) {
318 assert(mr.end() != NULL, "invariant");
319 assert(mr.word_size() > 0, "invariant");
320 return mr;
321 } else {
322 // that entry was invalidated... let's skip it
323 assert(mr.end() == NULL, "invariant");
324 }
325 }
326 // Otherwise, we need to try again.
327 }
328 }
329 #endif // 0
331 void CMRegionStack::push_with_lock(MemRegion mr) {
332 assert(mr.word_size() > 0, "Precondition");
333 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
335 if (isFull()) {
336 _overflow = true;
337 return;
338 }
340 _base[_index] = mr;
341 _index += 1;
342 }
344 MemRegion CMRegionStack::pop_with_lock() {
345 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
347 while (true) {
348 if (_index == 0) {
349 return MemRegion();
350 }
351 _index -= 1;
353 MemRegion mr = _base[_index];
354 if (mr.start() != NULL) {
355 assert(mr.end() != NULL, "invariant");
356 assert(mr.word_size() > 0, "invariant");
357 return mr;
358 } else {
359 // that entry was invalidated... let's skip it
360 assert(mr.end() == NULL, "invariant");
361 }
362 }
363 }
365 bool CMRegionStack::invalidate_entries_into_cset() {
366 bool result = false;
367 G1CollectedHeap* g1h = G1CollectedHeap::heap();
368 for (int i = 0; i < _oops_do_bound; ++i) {
369 MemRegion mr = _base[i];
370 if (mr.start() != NULL) {
371 assert(mr.end() != NULL, "invariant");
372 assert(mr.word_size() > 0, "invariant");
373 HeapRegion* hr = g1h->heap_region_containing(mr.start());
374 assert(hr != NULL, "invariant");
375 if (hr->in_collection_set()) {
376 // The region points into the collection set
377 _base[i] = MemRegion();
378 result = true;
379 }
380 } else {
381 // that entry was invalidated... let's skip it
382 assert(mr.end() == NULL, "invariant");
383 }
384 }
385 return result;
386 }
388 template<class OopClosureClass>
389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
391 || SafepointSynchronize::is_at_safepoint(),
392 "Drain recursion must be yield-safe.");
393 bool res = true;
394 debug_only(_drain_in_progress = true);
395 debug_only(_drain_in_progress_yields = yield_after);
396 while (!isEmpty()) {
397 oop newOop = pop();
398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
399 assert(newOop->is_oop(), "Expected an oop");
400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
401 "only grey objects on this stack");
402 // iterate over the oops in this oop, marking and pushing
403 // the ones in CMS generation.
404 newOop->oop_iterate(cl);
405 if (yield_after && _cm->do_yield_check()) {
406 res = false; break;
407 }
408 }
409 debug_only(_drain_in_progress = false);
410 return res;
411 }
413 void CMMarkStack::oops_do(OopClosure* f) {
414 if (_index == 0) return;
415 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
416 "Bound must be set.");
417 for (int i = 0; i < _oops_do_bound; i++) {
418 f->do_oop(&_base[i]);
419 }
420 _oops_do_bound = -1;
421 }
423 bool ConcurrentMark::not_yet_marked(oop obj) const {
424 return (_g1h->is_obj_ill(obj)
425 || (_g1h->is_in_permanent(obj)
426 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
427 }
429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
431 #endif // _MSC_VER
433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
434 int max_regions) :
435 _markBitMap1(rs, MinObjAlignment - 1),
436 _markBitMap2(rs, MinObjAlignment - 1),
438 _parallel_marking_threads(0),
439 _sleep_factor(0.0),
440 _marking_task_overhead(1.0),
441 _cleanup_sleep_factor(0.0),
442 _cleanup_task_overhead(1.0),
443 _region_bm(max_regions, false /* in_resource_area*/),
444 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
445 CardTableModRefBS::card_shift,
446 false /* in_resource_area*/),
447 _prevMarkBitMap(&_markBitMap1),
448 _nextMarkBitMap(&_markBitMap2),
449 _at_least_one_mark_complete(false),
451 _markStack(this),
452 _regionStack(),
453 // _finger set in set_non_marking_state
455 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
456 // _active_tasks set in set_non_marking_state
457 // _tasks set inside the constructor
458 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
459 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _concurrent_marking_in_progress(false),
466 _should_gray_objects(false),
468 // _verbose_level set below
470 _init_times(),
471 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
472 _cleanup_times(),
473 _total_counting_time(0.0),
474 _total_rs_scrub_time(0.0),
476 _parallel_workers(NULL)
477 {
478 CMVerboseLevel verbose_level =
479 (CMVerboseLevel) G1MarkingVerboseLevel;
480 if (verbose_level < no_verbose)
481 verbose_level = no_verbose;
482 if (verbose_level > high_verbose)
483 verbose_level = high_verbose;
484 _verbose_level = verbose_level;
486 if (verbose_low())
487 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
488 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
490 _markStack.allocate(MarkStackSize);
491 _regionStack.allocate(G1MarkRegionStackSize);
493 // Create & start a ConcurrentMark thread.
494 _cmThread = new ConcurrentMarkThread(this);
495 assert(cmThread() != NULL, "CM Thread should have been created");
496 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
498 _g1h = G1CollectedHeap::heap();
499 assert(CGC_lock != NULL, "Where's the CGC_lock?");
500 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
501 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
503 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
504 satb_qs.set_buffer_size(G1SATBBufferSize);
506 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
507 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
508 for (int i = 0 ; i < size; i++) {
509 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
510 }
512 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
513 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _active_tasks = _max_task_num;
517 for (int i = 0; i < (int) _max_task_num; ++i) {
518 CMTaskQueue* task_queue = new CMTaskQueue();
519 task_queue->initialize();
520 _task_queues->register_queue(i, task_queue);
522 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
523 _accum_task_vtime[i] = 0.0;
524 }
526 if (ConcGCThreads > ParallelGCThreads) {
527 vm_exit_during_initialization("Can't have more ConcGCThreads "
528 "than ParallelGCThreads.");
529 }
530 if (ParallelGCThreads == 0) {
531 // if we are not running with any parallel GC threads we will not
532 // spawn any marking threads either
533 _parallel_marking_threads = 0;
534 _sleep_factor = 0.0;
535 _marking_task_overhead = 1.0;
536 } else {
537 if (ConcGCThreads > 0) {
538 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
539 // if both are set
541 _parallel_marking_threads = ConcGCThreads;
542 _sleep_factor = 0.0;
543 _marking_task_overhead = 1.0;
544 } else if (G1MarkingOverheadPercent > 0) {
545 // we will calculate the number of parallel marking threads
546 // based on a target overhead with respect to the soft real-time
547 // goal
549 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
550 double overall_cm_overhead =
551 (double) MaxGCPauseMillis * marking_overhead /
552 (double) GCPauseIntervalMillis;
553 double cpu_ratio = 1.0 / (double) os::processor_count();
554 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
555 double marking_task_overhead =
556 overall_cm_overhead / marking_thread_num *
557 (double) os::processor_count();
558 double sleep_factor =
559 (1.0 - marking_task_overhead) / marking_task_overhead;
561 _parallel_marking_threads = (size_t) marking_thread_num;
562 _sleep_factor = sleep_factor;
563 _marking_task_overhead = marking_task_overhead;
564 } else {
565 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
566 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0;
568 }
570 if (parallel_marking_threads() > 1)
571 _cleanup_task_overhead = 1.0;
572 else
573 _cleanup_task_overhead = marking_task_overhead();
574 _cleanup_sleep_factor =
575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
577 #if 0
578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
583 #endif
585 guarantee(parallel_marking_threads() > 0, "peace of mind");
586 _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
587 (int) parallel_marking_threads(), false, true);
588 if (_parallel_workers == NULL)
589 vm_exit_during_initialization("Failed necessary allocation.");
590 }
592 // so that the call below can read a sensible value
593 _heap_start = (HeapWord*) rs.base();
594 set_non_marking_state();
595 }
597 void ConcurrentMark::update_g1_committed(bool force) {
598 // If concurrent marking is not in progress, then we do not need to
599 // update _heap_end. This has a subtle and important
600 // side-effect. Imagine that two evacuation pauses happen between
601 // marking completion and remark. The first one can grow the
602 // heap (hence now the finger is below the heap end). Then, the
603 // second one could unnecessarily push regions on the region
604 // stack. This causes the invariant that the region stack is empty
605 // at the beginning of remark to be false. By ensuring that we do
606 // not observe heap expansions after marking is complete, then we do
607 // not have this problem.
608 if (!concurrent_marking_in_progress() && !force)
609 return;
611 MemRegion committed = _g1h->g1_committed();
612 assert(committed.start() == _heap_start, "start shouldn't change");
613 HeapWord* new_end = committed.end();
614 if (new_end > _heap_end) {
615 // The heap has been expanded.
617 _heap_end = new_end;
618 }
619 // Notice that the heap can also shrink. However, this only happens
620 // during a Full GC (at least currently) and the entire marking
621 // phase will bail out and the task will not be restarted. So, let's
622 // do nothing.
623 }
625 void ConcurrentMark::reset() {
626 // Starting values for these two. This should be called in a STW
627 // phase. CM will be notified of any future g1_committed expansions
628 // will be at the end of evacuation pauses, when tasks are
629 // inactive.
630 MemRegion committed = _g1h->g1_committed();
631 _heap_start = committed.start();
632 _heap_end = committed.end();
634 // Separated the asserts so that we know which one fires.
635 assert(_heap_start != NULL, "heap bounds should look ok");
636 assert(_heap_end != NULL, "heap bounds should look ok");
637 assert(_heap_start < _heap_end, "heap bounds should look ok");
639 // reset all the marking data structures and any necessary flags
640 clear_marking_state();
642 if (verbose_low())
643 gclog_or_tty->print_cr("[global] resetting");
645 // We do reset all of them, since different phases will use
646 // different number of active threads. So, it's easiest to have all
647 // of them ready.
648 for (int i = 0; i < (int) _max_task_num; ++i)
649 _tasks[i]->reset(_nextMarkBitMap);
651 // we need this to make sure that the flag is on during the evac
652 // pause with initial mark piggy-backed
653 set_concurrent_marking_in_progress();
654 }
656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
657 assert(active_tasks <= _max_task_num, "we should not have more");
659 _active_tasks = active_tasks;
660 // Need to update the three data structures below according to the
661 // number of active threads for this phase.
662 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
663 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
664 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
666 _concurrent = concurrent;
667 // We propagate this to all tasks, not just the active ones.
668 for (int i = 0; i < (int) _max_task_num; ++i)
669 _tasks[i]->set_concurrent(concurrent);
671 if (concurrent) {
672 set_concurrent_marking_in_progress();
673 } else {
674 // We currently assume that the concurrent flag has been set to
675 // false before we start remark. At this point we should also be
676 // in a STW phase.
677 assert(!concurrent_marking_in_progress(), "invariant");
678 assert(_finger == _heap_end, "only way to get here");
679 update_g1_committed(true);
680 }
681 }
683 void ConcurrentMark::set_non_marking_state() {
684 // We set the global marking state to some default values when we're
685 // not doing marking.
686 clear_marking_state();
687 _active_tasks = 0;
688 clear_concurrent_marking_in_progress();
689 }
691 ConcurrentMark::~ConcurrentMark() {
692 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
693 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
694 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
695 _par_cleanup_thread_state);
697 for (int i = 0; i < (int) _max_task_num; ++i) {
698 delete _task_queues->queue(i);
699 delete _tasks[i];
700 }
701 delete _task_queues;
702 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
703 }
705 // This closure is used to mark refs into the g1 generation
706 // from external roots in the CMS bit map.
707 // Called at the first checkpoint.
708 //
710 void ConcurrentMark::clearNextBitmap() {
711 G1CollectedHeap* g1h = G1CollectedHeap::heap();
712 G1CollectorPolicy* g1p = g1h->g1_policy();
714 // Make sure that the concurrent mark thread looks to still be in
715 // the current cycle.
716 guarantee(cmThread()->during_cycle(), "invariant");
718 // We are finishing up the current cycle by clearing the next
719 // marking bitmap and getting it ready for the next cycle. During
720 // this time no other cycle can start. So, let's make sure that this
721 // is the case.
722 guarantee(!g1h->mark_in_progress(), "invariant");
724 // clear the mark bitmap (no grey objects to start with).
725 // We need to do this in chunks and offer to yield in between
726 // each chunk.
727 HeapWord* start = _nextMarkBitMap->startWord();
728 HeapWord* end = _nextMarkBitMap->endWord();
729 HeapWord* cur = start;
730 size_t chunkSize = M;
731 while (cur < end) {
732 HeapWord* next = cur + chunkSize;
733 if (next > end)
734 next = end;
735 MemRegion mr(cur,next);
736 _nextMarkBitMap->clearRange(mr);
737 cur = next;
738 do_yield_check();
740 // Repeat the asserts from above. We'll do them as asserts here to
741 // minimize their overhead on the product. However, we'll have
742 // them as guarantees at the beginning / end of the bitmap
743 // clearing to get some checking in the product.
744 assert(cmThread()->during_cycle(), "invariant");
745 assert(!g1h->mark_in_progress(), "invariant");
746 }
748 // Repeat the asserts from above.
749 guarantee(cmThread()->during_cycle(), "invariant");
750 guarantee(!g1h->mark_in_progress(), "invariant");
751 }
753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
754 public:
755 bool doHeapRegion(HeapRegion* r) {
756 if (!r->continuesHumongous()) {
757 r->note_start_of_marking(true);
758 }
759 return false;
760 }
761 };
763 void ConcurrentMark::checkpointRootsInitialPre() {
764 G1CollectedHeap* g1h = G1CollectedHeap::heap();
765 G1CollectorPolicy* g1p = g1h->g1_policy();
767 _has_aborted = false;
769 #ifndef PRODUCT
770 if (G1PrintReachableAtInitialMark) {
771 print_reachable("at-cycle-start",
772 true /* use_prev_marking */, true /* all */);
773 }
774 #endif
776 // Initialise marking structures. This has to be done in a STW phase.
777 reset();
778 }
780 class CMMarkRootsClosure: public OopsInGenClosure {
781 private:
782 ConcurrentMark* _cm;
783 G1CollectedHeap* _g1h;
784 bool _do_barrier;
786 public:
787 CMMarkRootsClosure(ConcurrentMark* cm,
788 G1CollectedHeap* g1h,
789 bool do_barrier) : _cm(cm), _g1h(g1h),
790 _do_barrier(do_barrier) { }
792 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
793 virtual void do_oop( oop* p) { do_oop_work(p); }
795 template <class T> void do_oop_work(T* p) {
796 T heap_oop = oopDesc::load_heap_oop(p);
797 if (!oopDesc::is_null(heap_oop)) {
798 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
799 assert(obj->is_oop() || obj->mark() == NULL,
800 "expected an oop, possibly with mark word displaced");
801 HeapWord* addr = (HeapWord*)obj;
802 if (_g1h->is_in_g1_reserved(addr)) {
803 _cm->grayRoot(obj);
804 }
805 }
806 if (_do_barrier) {
807 assert(!_g1h->is_in_g1_reserved(p),
808 "Should be called on external roots");
809 do_barrier(p);
810 }
811 }
812 };
814 void ConcurrentMark::checkpointRootsInitialPost() {
815 G1CollectedHeap* g1h = G1CollectedHeap::heap();
817 // For each region note start of marking.
818 NoteStartOfMarkHRClosure startcl;
819 g1h->heap_region_iterate(&startcl);
821 // Start weak-reference discovery.
822 ReferenceProcessor* rp = g1h->ref_processor();
823 rp->verify_no_references_recorded();
824 rp->enable_discovery(); // enable ("weak") refs discovery
825 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
827 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
828 // This is the start of the marking cycle, we're expected all
829 // threads to have SATB queues with active set to false.
830 satb_mq_set.set_active_all_threads(true, /* new active value */
831 false /* expected_active */);
833 // update_g1_committed() will be called at the end of an evac pause
834 // when marking is on. So, it's also called at the end of the
835 // initial-mark pause to update the heap end, if the heap expands
836 // during it. No need to call it here.
837 }
839 // Checkpoint the roots into this generation from outside
840 // this generation. [Note this initial checkpoint need only
841 // be approximate -- we'll do a catch up phase subsequently.]
842 void ConcurrentMark::checkpointRootsInitial() {
843 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
844 G1CollectedHeap* g1h = G1CollectedHeap::heap();
846 double start = os::elapsedTime();
848 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
849 g1p->record_concurrent_mark_init_start();
850 checkpointRootsInitialPre();
852 // YSR: when concurrent precleaning is in place, we'll
853 // need to clear the cached card table here
855 ResourceMark rm;
856 HandleMark hm;
858 g1h->ensure_parsability(false);
859 g1h->perm_gen()->save_marks();
861 CMMarkRootsClosure notOlder(this, g1h, false);
862 CMMarkRootsClosure older(this, g1h, true);
864 g1h->set_marking_started();
865 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
867 g1h->process_strong_roots(true, // activate StrongRootsScope
868 false, // fake perm gen collection
869 SharedHeap::SO_AllClasses,
870 ¬Older, // Regular roots
871 NULL, // do not visit active blobs
872 &older // Perm Gen Roots
873 );
874 checkpointRootsInitialPost();
876 // Statistics.
877 double end = os::elapsedTime();
878 _init_times.add((end - start) * 1000.0);
880 g1p->record_concurrent_mark_init_end();
881 }
883 /*
884 Notice that in the next two methods, we actually leave the STS
885 during the barrier sync and join it immediately afterwards. If we
886 do not do this, this then the following deadlock can occur: one
887 thread could be in the barrier sync code, waiting for the other
888 thread to also sync up, whereas another one could be trying to
889 yield, while also waiting for the other threads to sync up too.
891 Because the thread that does the sync barrier has left the STS, it
892 is possible to be suspended for a Full GC or an evacuation pause
893 could occur. This is actually safe, since the entering the sync
894 barrier is one of the last things do_marking_step() does, and it
895 doesn't manipulate any data structures afterwards.
896 */
898 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
899 if (verbose_low())
900 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
902 ConcurrentGCThread::stsLeave();
903 _first_overflow_barrier_sync.enter();
904 ConcurrentGCThread::stsJoin();
905 // at this point everyone should have synced up and not be doing any
906 // more work
908 if (verbose_low())
909 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
911 // let task 0 do this
912 if (task_num == 0) {
913 // task 0 is responsible for clearing the global data structures
914 clear_marking_state();
916 if (PrintGC) {
917 gclog_or_tty->date_stamp(PrintGCDateStamps);
918 gclog_or_tty->stamp(PrintGCTimeStamps);
919 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
920 }
921 }
923 // after this, each task should reset its own data structures then
924 // then go into the second barrier
925 }
927 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
928 if (verbose_low())
929 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
931 ConcurrentGCThread::stsLeave();
932 _second_overflow_barrier_sync.enter();
933 ConcurrentGCThread::stsJoin();
934 // at this point everything should be re-initialised and ready to go
936 if (verbose_low())
937 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
938 }
940 void ConcurrentMark::grayRoot(oop p) {
941 HeapWord* addr = (HeapWord*) p;
942 // We can't really check against _heap_start and _heap_end, since it
943 // is possible during an evacuation pause with piggy-backed
944 // initial-mark that the committed space is expanded during the
945 // pause without CM observing this change. So the assertions below
946 // is a bit conservative; but better than nothing.
947 assert(_g1h->g1_committed().contains(addr),
948 "address should be within the heap bounds");
950 if (!_nextMarkBitMap->isMarked(addr))
951 _nextMarkBitMap->parMark(addr);
952 }
954 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
955 // The objects on the region have already been marked "in bulk" by
956 // the caller. We only need to decide whether to push the region on
957 // the region stack or not.
959 if (!concurrent_marking_in_progress() || !_should_gray_objects)
960 // We're done with marking and waiting for remark. We do not need to
961 // push anything else on the region stack.
962 return;
964 HeapWord* finger = _finger;
966 if (verbose_low())
967 gclog_or_tty->print_cr("[global] attempting to push "
968 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
969 PTR_FORMAT, mr.start(), mr.end(), finger);
971 if (mr.start() < finger) {
972 // The finger is always heap region aligned and it is not possible
973 // for mr to span heap regions.
974 assert(mr.end() <= finger, "invariant");
976 // Separated the asserts so that we know which one fires.
977 assert(mr.start() <= mr.end(),
978 "region boundaries should fall within the committed space");
979 assert(_heap_start <= mr.start(),
980 "region boundaries should fall within the committed space");
981 assert(mr.end() <= _heap_end,
982 "region boundaries should fall within the committed space");
983 if (verbose_low())
984 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
985 "below the finger, pushing it",
986 mr.start(), mr.end());
988 if (!region_stack_push(mr)) {
989 if (verbose_low())
990 gclog_or_tty->print_cr("[global] region stack has overflown.");
991 }
992 }
993 }
995 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
996 // The object is not marked by the caller. We need to at least mark
997 // it and maybe push in on the stack.
999 HeapWord* addr = (HeapWord*)p;
1000 if (!_nextMarkBitMap->isMarked(addr)) {
1001 // We definitely need to mark it, irrespective whether we bail out
1002 // because we're done with marking.
1003 if (_nextMarkBitMap->parMark(addr)) {
1004 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1005 // If we're done with concurrent marking and we're waiting for
1006 // remark, then we're not pushing anything on the stack.
1007 return;
1009 // No OrderAccess:store_load() is needed. It is implicit in the
1010 // CAS done in parMark(addr) above
1011 HeapWord* finger = _finger;
1013 if (addr < finger) {
1014 if (!mark_stack_push(oop(addr))) {
1015 if (verbose_low())
1016 gclog_or_tty->print_cr("[global] global stack overflow "
1017 "during parMark");
1018 }
1019 }
1020 }
1021 }
1022 }
1024 class CMConcurrentMarkingTask: public AbstractGangTask {
1025 private:
1026 ConcurrentMark* _cm;
1027 ConcurrentMarkThread* _cmt;
1029 public:
1030 void work(int worker_i) {
1031 assert(Thread::current()->is_ConcurrentGC_thread(),
1032 "this should only be done by a conc GC thread");
1034 double start_vtime = os::elapsedVTime();
1036 ConcurrentGCThread::stsJoin();
1038 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1039 CMTask* the_task = _cm->task(worker_i);
1040 the_task->record_start_time();
1041 if (!_cm->has_aborted()) {
1042 do {
1043 double start_vtime_sec = os::elapsedVTime();
1044 double start_time_sec = os::elapsedTime();
1045 the_task->do_marking_step(10.0);
1046 double end_time_sec = os::elapsedTime();
1047 double end_vtime_sec = os::elapsedVTime();
1048 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1049 double elapsed_time_sec = end_time_sec - start_time_sec;
1050 _cm->clear_has_overflown();
1052 bool ret = _cm->do_yield_check(worker_i);
1054 jlong sleep_time_ms;
1055 if (!_cm->has_aborted() && the_task->has_aborted()) {
1056 sleep_time_ms =
1057 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1058 ConcurrentGCThread::stsLeave();
1059 os::sleep(Thread::current(), sleep_time_ms, false);
1060 ConcurrentGCThread::stsJoin();
1061 }
1062 double end_time2_sec = os::elapsedTime();
1063 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1065 #if 0
1066 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1067 "overhead %1.4lf",
1068 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1069 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1070 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1071 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1072 #endif
1073 } while (!_cm->has_aborted() && the_task->has_aborted());
1074 }
1075 the_task->record_end_time();
1076 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1078 ConcurrentGCThread::stsLeave();
1080 double end_vtime = os::elapsedVTime();
1081 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1082 }
1084 CMConcurrentMarkingTask(ConcurrentMark* cm,
1085 ConcurrentMarkThread* cmt) :
1086 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1088 ~CMConcurrentMarkingTask() { }
1089 };
1091 void ConcurrentMark::markFromRoots() {
1092 // we might be tempted to assert that:
1093 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1094 // "inconsistent argument?");
1095 // However that wouldn't be right, because it's possible that
1096 // a safepoint is indeed in progress as a younger generation
1097 // stop-the-world GC happens even as we mark in this generation.
1099 _restart_for_overflow = false;
1101 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1103 CMConcurrentMarkingTask markingTask(this, cmThread());
1104 if (parallel_marking_threads() > 0)
1105 _parallel_workers->run_task(&markingTask);
1106 else
1107 markingTask.work(0);
1108 print_stats();
1109 }
1111 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1112 // world is stopped at this checkpoint
1113 assert(SafepointSynchronize::is_at_safepoint(),
1114 "world should be stopped");
1115 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1117 // If a full collection has happened, we shouldn't do this.
1118 if (has_aborted()) {
1119 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1120 return;
1121 }
1123 if (VerifyDuringGC) {
1124 HandleMark hm; // handle scope
1125 gclog_or_tty->print(" VerifyDuringGC:(before)");
1126 Universe::heap()->prepare_for_verify();
1127 Universe::verify(true, false, true);
1128 }
1130 G1CollectorPolicy* g1p = g1h->g1_policy();
1131 g1p->record_concurrent_mark_remark_start();
1133 double start = os::elapsedTime();
1135 checkpointRootsFinalWork();
1137 double mark_work_end = os::elapsedTime();
1139 weakRefsWork(clear_all_soft_refs);
1141 if (has_overflown()) {
1142 // Oops. We overflowed. Restart concurrent marking.
1143 _restart_for_overflow = true;
1144 // Clear the flag. We do not need it any more.
1145 clear_has_overflown();
1146 if (G1TraceMarkStackOverflow)
1147 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1148 } else {
1149 // We're done with marking.
1150 // This is the end of the marking cycle, we're expected all
1151 // threads to have SATB queues with active set to true.
1152 JavaThread::satb_mark_queue_set().set_active_all_threads(
1153 false, /* new active value */
1154 true /* expected_active */);
1156 if (VerifyDuringGC) {
1157 HandleMark hm; // handle scope
1158 gclog_or_tty->print(" VerifyDuringGC:(after)");
1159 Universe::heap()->prepare_for_verify();
1160 Universe::heap()->verify(/* allow_dirty */ true,
1161 /* silent */ false,
1162 /* use_prev_marking */ false);
1163 }
1164 }
1166 #if VERIFY_OBJS_PROCESSED
1167 _scan_obj_cl.objs_processed = 0;
1168 ThreadLocalObjQueue::objs_enqueued = 0;
1169 #endif
1171 // Statistics
1172 double now = os::elapsedTime();
1173 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1174 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1175 _remark_times.add((now - start) * 1000.0);
1177 g1p->record_concurrent_mark_remark_end();
1178 }
1181 #define CARD_BM_TEST_MODE 0
1183 class CalcLiveObjectsClosure: public HeapRegionClosure {
1185 CMBitMapRO* _bm;
1186 ConcurrentMark* _cm;
1187 bool _changed;
1188 bool _yield;
1189 size_t _words_done;
1190 size_t _tot_live;
1191 size_t _tot_used;
1192 size_t _regions_done;
1193 double _start_vtime_sec;
1195 BitMap* _region_bm;
1196 BitMap* _card_bm;
1197 intptr_t _bottom_card_num;
1198 bool _final;
1200 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1201 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1202 #if CARD_BM_TEST_MODE
1203 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1204 #else
1205 _card_bm->par_at_put(i - _bottom_card_num, 1);
1206 #endif
1207 }
1208 }
1210 public:
1211 CalcLiveObjectsClosure(bool final,
1212 CMBitMapRO *bm, ConcurrentMark *cm,
1213 BitMap* region_bm, BitMap* card_bm) :
1214 _bm(bm), _cm(cm), _changed(false), _yield(true),
1215 _words_done(0), _tot_live(0), _tot_used(0),
1216 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1217 _regions_done(0), _start_vtime_sec(0.0)
1218 {
1219 _bottom_card_num =
1220 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1221 CardTableModRefBS::card_shift);
1222 }
1224 // It takes a region that's not empty (i.e., it has at least one
1225 // live object in it and sets its corresponding bit on the region
1226 // bitmap to 1. If the region is "starts humongous" it will also set
1227 // to 1 the bits on the region bitmap that correspond to its
1228 // associated "continues humongous" regions.
1229 void set_bit_for_region(HeapRegion* hr) {
1230 assert(!hr->continuesHumongous(), "should have filtered those out");
1232 size_t index = hr->hrs_index();
1233 if (!hr->startsHumongous()) {
1234 // Normal (non-humongous) case: just set the bit.
1235 _region_bm->par_at_put((BitMap::idx_t) index, true);
1236 } else {
1237 // Starts humongous case: calculate how many regions are part of
1238 // this humongous region and then set the bit range. It might
1239 // have been a bit more efficient to look at the object that
1240 // spans these humongous regions to calculate their number from
1241 // the object's size. However, it's a good idea to calculate
1242 // this based on the metadata itself, and not the region
1243 // contents, so that this code is not aware of what goes into
1244 // the humongous regions (in case this changes in the future).
1245 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1246 size_t end_index = index + 1;
1247 while (end_index < g1h->n_regions()) {
1248 HeapRegion* chr = g1h->region_at(end_index);
1249 if (!chr->continuesHumongous()) {
1250 break;
1251 }
1252 end_index += 1;
1253 }
1254 _region_bm->par_at_put_range((BitMap::idx_t) index,
1255 (BitMap::idx_t) end_index, true);
1256 }
1257 }
1259 bool doHeapRegion(HeapRegion* hr) {
1260 if (!_final && _regions_done == 0)
1261 _start_vtime_sec = os::elapsedVTime();
1263 if (hr->continuesHumongous()) {
1264 // We will ignore these here and process them when their
1265 // associated "starts humongous" region is processed (see
1266 // set_bit_for_heap_region()). Note that we cannot rely on their
1267 // associated "starts humongous" region to have their bit set to
1268 // 1 since, due to the region chunking in the parallel region
1269 // iteration, a "continues humongous" region might be visited
1270 // before its associated "starts humongous".
1271 return false;
1272 }
1274 HeapWord* nextTop = hr->next_top_at_mark_start();
1275 HeapWord* start = hr->top_at_conc_mark_count();
1276 assert(hr->bottom() <= start && start <= hr->end() &&
1277 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1278 start <= nextTop,
1279 "Preconditions.");
1280 // Otherwise, record the number of word's we'll examine.
1281 size_t words_done = (nextTop - start);
1282 // Find the first marked object at or after "start".
1283 start = _bm->getNextMarkedWordAddress(start, nextTop);
1284 size_t marked_bytes = 0;
1286 // Below, the term "card num" means the result of shifting an address
1287 // by the card shift -- address 0 corresponds to card number 0. One
1288 // must subtract the card num of the bottom of the heap to obtain a
1289 // card table index.
1290 // The first card num of the sequence of live cards currently being
1291 // constructed. -1 ==> no sequence.
1292 intptr_t start_card_num = -1;
1293 // The last card num of the sequence of live cards currently being
1294 // constructed. -1 ==> no sequence.
1295 intptr_t last_card_num = -1;
1297 while (start < nextTop) {
1298 if (_yield && _cm->do_yield_check()) {
1299 // We yielded. It might be for a full collection, in which case
1300 // all bets are off; terminate the traversal.
1301 if (_cm->has_aborted()) {
1302 _changed = false;
1303 return true;
1304 } else {
1305 // Otherwise, it might be a collection pause, and the region
1306 // we're looking at might be in the collection set. We'll
1307 // abandon this region.
1308 return false;
1309 }
1310 }
1311 oop obj = oop(start);
1312 int obj_sz = obj->size();
1313 // The card num of the start of the current object.
1314 intptr_t obj_card_num =
1315 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1317 HeapWord* obj_last = start + obj_sz - 1;
1318 intptr_t obj_last_card_num =
1319 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1321 if (obj_card_num != last_card_num) {
1322 if (start_card_num == -1) {
1323 assert(last_card_num == -1, "Both or neither.");
1324 start_card_num = obj_card_num;
1325 } else {
1326 assert(last_card_num != -1, "Both or neither.");
1327 assert(obj_card_num >= last_card_num, "Inv");
1328 if ((obj_card_num - last_card_num) > 1) {
1329 // Mark the last run, and start a new one.
1330 mark_card_num_range(start_card_num, last_card_num);
1331 start_card_num = obj_card_num;
1332 }
1333 }
1334 #if CARD_BM_TEST_MODE
1335 /*
1336 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1337 obj_card_num - _bottom_card_num,
1338 obj_last_card_num - _bottom_card_num);
1339 */
1340 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1341 _card_bm->par_at_put(j - _bottom_card_num, 1);
1342 }
1343 #endif
1344 }
1345 // In any case, we set the last card num.
1346 last_card_num = obj_last_card_num;
1348 marked_bytes += (size_t)obj_sz * HeapWordSize;
1349 // Find the next marked object after this one.
1350 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1351 _changed = true;
1352 }
1353 // Handle the last range, if any.
1354 if (start_card_num != -1)
1355 mark_card_num_range(start_card_num, last_card_num);
1356 if (_final) {
1357 // Mark the allocated-since-marking portion...
1358 HeapWord* tp = hr->top();
1359 if (nextTop < tp) {
1360 start_card_num =
1361 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1362 last_card_num =
1363 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1364 mark_card_num_range(start_card_num, last_card_num);
1365 // This definitely means the region has live objects.
1366 set_bit_for_region(hr);
1367 }
1368 }
1370 hr->add_to_marked_bytes(marked_bytes);
1371 // Update the live region bitmap.
1372 if (marked_bytes > 0) {
1373 set_bit_for_region(hr);
1374 }
1375 hr->set_top_at_conc_mark_count(nextTop);
1376 _tot_live += hr->next_live_bytes();
1377 _tot_used += hr->used();
1378 _words_done = words_done;
1380 if (!_final) {
1381 ++_regions_done;
1382 if (_regions_done % 10 == 0) {
1383 double end_vtime_sec = os::elapsedVTime();
1384 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1385 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1386 jlong sleep_time_ms =
1387 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1388 os::sleep(Thread::current(), sleep_time_ms, false);
1389 _start_vtime_sec = end_vtime_sec;
1390 }
1391 }
1392 }
1394 return false;
1395 }
1397 bool changed() { return _changed; }
1398 void reset() { _changed = false; _words_done = 0; }
1399 void no_yield() { _yield = false; }
1400 size_t words_done() { return _words_done; }
1401 size_t tot_live() { return _tot_live; }
1402 size_t tot_used() { return _tot_used; }
1403 };
1406 void ConcurrentMark::calcDesiredRegions() {
1407 _region_bm.clear();
1408 _card_bm.clear();
1409 CalcLiveObjectsClosure calccl(false /*final*/,
1410 nextMarkBitMap(), this,
1411 &_region_bm, &_card_bm);
1412 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1413 g1h->heap_region_iterate(&calccl);
1415 do {
1416 calccl.reset();
1417 g1h->heap_region_iterate(&calccl);
1418 } while (calccl.changed());
1419 }
1421 class G1ParFinalCountTask: public AbstractGangTask {
1422 protected:
1423 G1CollectedHeap* _g1h;
1424 CMBitMap* _bm;
1425 size_t _n_workers;
1426 size_t *_live_bytes;
1427 size_t *_used_bytes;
1428 BitMap* _region_bm;
1429 BitMap* _card_bm;
1430 public:
1431 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1432 BitMap* region_bm, BitMap* card_bm) :
1433 AbstractGangTask("G1 final counting"), _g1h(g1h),
1434 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1435 {
1436 if (ParallelGCThreads > 0)
1437 _n_workers = _g1h->workers()->total_workers();
1438 else
1439 _n_workers = 1;
1440 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1441 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1442 }
1444 ~G1ParFinalCountTask() {
1445 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1446 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1447 }
1449 void work(int i) {
1450 CalcLiveObjectsClosure calccl(true /*final*/,
1451 _bm, _g1h->concurrent_mark(),
1452 _region_bm, _card_bm);
1453 calccl.no_yield();
1454 if (ParallelGCThreads > 0) {
1455 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1456 HeapRegion::FinalCountClaimValue);
1457 } else {
1458 _g1h->heap_region_iterate(&calccl);
1459 }
1460 assert(calccl.complete(), "Shouldn't have yielded!");
1462 assert((size_t) i < _n_workers, "invariant");
1463 _live_bytes[i] = calccl.tot_live();
1464 _used_bytes[i] = calccl.tot_used();
1465 }
1466 size_t live_bytes() {
1467 size_t live_bytes = 0;
1468 for (size_t i = 0; i < _n_workers; ++i)
1469 live_bytes += _live_bytes[i];
1470 return live_bytes;
1471 }
1472 size_t used_bytes() {
1473 size_t used_bytes = 0;
1474 for (size_t i = 0; i < _n_workers; ++i)
1475 used_bytes += _used_bytes[i];
1476 return used_bytes;
1477 }
1478 };
1480 class G1ParNoteEndTask;
1482 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1483 G1CollectedHeap* _g1;
1484 int _worker_num;
1485 size_t _max_live_bytes;
1486 size_t _regions_claimed;
1487 size_t _freed_bytes;
1488 size_t _cleared_h_regions;
1489 size_t _freed_regions;
1490 UncleanRegionList* _unclean_region_list;
1491 double _claimed_region_time;
1492 double _max_region_time;
1494 public:
1495 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1496 UncleanRegionList* list,
1497 int worker_num);
1498 size_t freed_bytes() { return _freed_bytes; }
1499 size_t cleared_h_regions() { return _cleared_h_regions; }
1500 size_t freed_regions() { return _freed_regions; }
1501 UncleanRegionList* unclean_region_list() {
1502 return _unclean_region_list;
1503 }
1505 bool doHeapRegion(HeapRegion *r);
1507 size_t max_live_bytes() { return _max_live_bytes; }
1508 size_t regions_claimed() { return _regions_claimed; }
1509 double claimed_region_time_sec() { return _claimed_region_time; }
1510 double max_region_time_sec() { return _max_region_time; }
1511 };
1513 class G1ParNoteEndTask: public AbstractGangTask {
1514 friend class G1NoteEndOfConcMarkClosure;
1515 protected:
1516 G1CollectedHeap* _g1h;
1517 size_t _max_live_bytes;
1518 size_t _freed_bytes;
1519 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1520 public:
1521 G1ParNoteEndTask(G1CollectedHeap* g1h,
1522 ConcurrentMark::ParCleanupThreadState**
1523 par_cleanup_thread_state) :
1524 AbstractGangTask("G1 note end"), _g1h(g1h),
1525 _max_live_bytes(0), _freed_bytes(0),
1526 _par_cleanup_thread_state(par_cleanup_thread_state)
1527 {}
1529 void work(int i) {
1530 double start = os::elapsedTime();
1531 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1532 &_par_cleanup_thread_state[i]->list,
1533 i);
1534 if (ParallelGCThreads > 0) {
1535 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1536 HeapRegion::NoteEndClaimValue);
1537 } else {
1538 _g1h->heap_region_iterate(&g1_note_end);
1539 }
1540 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1542 // Now finish up freeing the current thread's regions.
1543 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1544 g1_note_end.cleared_h_regions(),
1545 0, NULL);
1546 {
1547 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1548 _max_live_bytes += g1_note_end.max_live_bytes();
1549 _freed_bytes += g1_note_end.freed_bytes();
1550 }
1551 double end = os::elapsedTime();
1552 if (G1PrintParCleanupStats) {
1553 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1554 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1555 i, start, end, (end-start)*1000.0,
1556 g1_note_end.regions_claimed(),
1557 g1_note_end.claimed_region_time_sec()*1000.0,
1558 g1_note_end.max_region_time_sec()*1000.0);
1559 }
1560 }
1561 size_t max_live_bytes() { return _max_live_bytes; }
1562 size_t freed_bytes() { return _freed_bytes; }
1563 };
1565 class G1ParScrubRemSetTask: public AbstractGangTask {
1566 protected:
1567 G1RemSet* _g1rs;
1568 BitMap* _region_bm;
1569 BitMap* _card_bm;
1570 public:
1571 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1572 BitMap* region_bm, BitMap* card_bm) :
1573 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1574 _region_bm(region_bm), _card_bm(card_bm)
1575 {}
1577 void work(int i) {
1578 if (ParallelGCThreads > 0) {
1579 _g1rs->scrub_par(_region_bm, _card_bm, i,
1580 HeapRegion::ScrubRemSetClaimValue);
1581 } else {
1582 _g1rs->scrub(_region_bm, _card_bm);
1583 }
1584 }
1586 };
1588 G1NoteEndOfConcMarkClosure::
1589 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1590 UncleanRegionList* list,
1591 int worker_num)
1592 : _g1(g1), _worker_num(worker_num),
1593 _max_live_bytes(0), _regions_claimed(0),
1594 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1595 _claimed_region_time(0.0), _max_region_time(0.0),
1596 _unclean_region_list(list)
1597 {}
1599 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1600 // We use a claim value of zero here because all regions
1601 // were claimed with value 1 in the FinalCount task.
1602 r->reset_gc_time_stamp();
1603 if (!r->continuesHumongous()) {
1604 double start = os::elapsedTime();
1605 _regions_claimed++;
1606 r->note_end_of_marking();
1607 _max_live_bytes += r->max_live_bytes();
1608 _g1->free_region_if_totally_empty_work(r,
1609 _freed_bytes,
1610 _cleared_h_regions,
1611 _freed_regions,
1612 _unclean_region_list,
1613 true /*par*/);
1614 double region_time = (os::elapsedTime() - start);
1615 _claimed_region_time += region_time;
1616 if (region_time > _max_region_time) _max_region_time = region_time;
1617 }
1618 return false;
1619 }
1621 void ConcurrentMark::cleanup() {
1622 // world is stopped at this checkpoint
1623 assert(SafepointSynchronize::is_at_safepoint(),
1624 "world should be stopped");
1625 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1627 // If a full collection has happened, we shouldn't do this.
1628 if (has_aborted()) {
1629 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1630 return;
1631 }
1633 if (VerifyDuringGC) {
1634 HandleMark hm; // handle scope
1635 gclog_or_tty->print(" VerifyDuringGC:(before)");
1636 Universe::heap()->prepare_for_verify();
1637 Universe::verify(/* allow dirty */ true,
1638 /* silent */ false,
1639 /* prev marking */ true);
1640 }
1642 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1643 g1p->record_concurrent_mark_cleanup_start();
1645 double start = os::elapsedTime();
1647 // Do counting once more with the world stopped for good measure.
1648 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1649 &_region_bm, &_card_bm);
1650 if (ParallelGCThreads > 0) {
1651 assert(g1h->check_heap_region_claim_values(
1652 HeapRegion::InitialClaimValue),
1653 "sanity check");
1655 int n_workers = g1h->workers()->total_workers();
1656 g1h->set_par_threads(n_workers);
1657 g1h->workers()->run_task(&g1_par_count_task);
1658 g1h->set_par_threads(0);
1660 assert(g1h->check_heap_region_claim_values(
1661 HeapRegion::FinalCountClaimValue),
1662 "sanity check");
1663 } else {
1664 g1_par_count_task.work(0);
1665 }
1667 size_t known_garbage_bytes =
1668 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1669 #if 0
1670 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1671 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1672 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1673 (double) known_garbage_bytes / (double) (1024 * 1024));
1674 #endif // 0
1675 g1p->set_known_garbage_bytes(known_garbage_bytes);
1677 size_t start_used_bytes = g1h->used();
1678 _at_least_one_mark_complete = true;
1679 g1h->set_marking_complete();
1681 double count_end = os::elapsedTime();
1682 double this_final_counting_time = (count_end - start);
1683 if (G1PrintParCleanupStats) {
1684 gclog_or_tty->print_cr("Cleanup:");
1685 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1686 this_final_counting_time*1000.0);
1687 }
1688 _total_counting_time += this_final_counting_time;
1690 // Install newly created mark bitMap as "prev".
1691 swapMarkBitMaps();
1693 g1h->reset_gc_time_stamp();
1695 // Note end of marking in all heap regions.
1696 double note_end_start = os::elapsedTime();
1697 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1698 if (ParallelGCThreads > 0) {
1699 int n_workers = g1h->workers()->total_workers();
1700 g1h->set_par_threads(n_workers);
1701 g1h->workers()->run_task(&g1_par_note_end_task);
1702 g1h->set_par_threads(0);
1704 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1705 "sanity check");
1706 } else {
1707 g1_par_note_end_task.work(0);
1708 }
1709 g1h->set_unclean_regions_coming(true);
1710 double note_end_end = os::elapsedTime();
1711 // Tell the mutators that there might be unclean regions coming...
1712 if (G1PrintParCleanupStats) {
1713 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1714 (note_end_end - note_end_start)*1000.0);
1715 }
1718 // call below, since it affects the metric by which we sort the heap
1719 // regions.
1720 if (G1ScrubRemSets) {
1721 double rs_scrub_start = os::elapsedTime();
1722 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1723 if (ParallelGCThreads > 0) {
1724 int n_workers = g1h->workers()->total_workers();
1725 g1h->set_par_threads(n_workers);
1726 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1727 g1h->set_par_threads(0);
1729 assert(g1h->check_heap_region_claim_values(
1730 HeapRegion::ScrubRemSetClaimValue),
1731 "sanity check");
1732 } else {
1733 g1_par_scrub_rs_task.work(0);
1734 }
1736 double rs_scrub_end = os::elapsedTime();
1737 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1738 _total_rs_scrub_time += this_rs_scrub_time;
1739 }
1741 // this will also free any regions totally full of garbage objects,
1742 // and sort the regions.
1743 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1744 g1_par_note_end_task.freed_bytes(),
1745 g1_par_note_end_task.max_live_bytes());
1747 // Statistics.
1748 double end = os::elapsedTime();
1749 _cleanup_times.add((end - start) * 1000.0);
1751 // G1CollectedHeap::heap()->print();
1752 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1753 // G1CollectedHeap::heap()->get_gc_time_stamp());
1755 if (PrintGC || PrintGCDetails) {
1756 g1h->print_size_transition(gclog_or_tty,
1757 start_used_bytes,
1758 g1h->used(),
1759 g1h->capacity());
1760 }
1762 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1763 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1765 // We need to make this be a "collection" so any collection pause that
1766 // races with it goes around and waits for completeCleanup to finish.
1767 g1h->increment_total_collections();
1769 if (VerifyDuringGC) {
1770 HandleMark hm; // handle scope
1771 gclog_or_tty->print(" VerifyDuringGC:(after)");
1772 Universe::heap()->prepare_for_verify();
1773 Universe::verify(/* allow dirty */ true,
1774 /* silent */ false,
1775 /* prev marking */ true);
1776 }
1777 }
1779 void ConcurrentMark::completeCleanup() {
1780 // A full collection intervened.
1781 if (has_aborted()) return;
1783 int first = 0;
1784 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1785 for (int t = 0; t < last; t++) {
1786 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1787 assert(list->well_formed(), "Inv");
1788 HeapRegion* hd = list->hd();
1789 while (hd != NULL) {
1790 // Now finish up the other stuff.
1791 hd->rem_set()->clear();
1792 HeapRegion* next_hd = hd->next_from_unclean_list();
1793 (void)list->pop();
1794 assert(list->hd() == next_hd, "how not?");
1795 _g1h->put_region_on_unclean_list(hd);
1796 if (!hd->isHumongous()) {
1797 // Add this to the _free_regions count by 1.
1798 _g1h->finish_free_region_work(0, 0, 1, NULL);
1799 }
1800 hd = list->hd();
1801 assert(hd == next_hd, "how not?");
1802 }
1803 }
1804 }
1807 class G1CMIsAliveClosure: public BoolObjectClosure {
1808 G1CollectedHeap* _g1;
1809 public:
1810 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1811 _g1(g1)
1812 {}
1814 void do_object(oop obj) {
1815 assert(false, "not to be invoked");
1816 }
1817 bool do_object_b(oop obj) {
1818 HeapWord* addr = (HeapWord*)obj;
1819 return addr != NULL &&
1820 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1821 }
1822 };
1824 class G1CMKeepAliveClosure: public OopClosure {
1825 G1CollectedHeap* _g1;
1826 ConcurrentMark* _cm;
1827 CMBitMap* _bitMap;
1828 public:
1829 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1830 CMBitMap* bitMap) :
1831 _g1(g1), _cm(cm),
1832 _bitMap(bitMap) {}
1834 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1835 virtual void do_oop( oop* p) { do_oop_work(p); }
1837 template <class T> void do_oop_work(T* p) {
1838 oop thisOop = oopDesc::load_decode_heap_oop(p);
1839 HeapWord* addr = (HeapWord*)thisOop;
1840 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1841 _bitMap->mark(addr);
1842 _cm->mark_stack_push(thisOop);
1843 }
1844 }
1845 };
1847 class G1CMDrainMarkingStackClosure: public VoidClosure {
1848 CMMarkStack* _markStack;
1849 CMBitMap* _bitMap;
1850 G1CMKeepAliveClosure* _oopClosure;
1851 public:
1852 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1853 G1CMKeepAliveClosure* oopClosure) :
1854 _bitMap(bitMap),
1855 _markStack(markStack),
1856 _oopClosure(oopClosure)
1857 {}
1859 void do_void() {
1860 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1861 }
1862 };
1864 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1865 ResourceMark rm;
1866 HandleMark hm;
1867 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1868 ReferenceProcessor* rp = g1h->ref_processor();
1870 // Process weak references.
1871 rp->setup_policy(clear_all_soft_refs);
1872 assert(_markStack.isEmpty(), "mark stack should be empty");
1874 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1875 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1876 G1CMDrainMarkingStackClosure
1877 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1878 &g1KeepAliveClosure);
1880 // XXXYYY Also: copy the parallel ref processing code from CMS.
1881 rp->process_discovered_references(&g1IsAliveClosure,
1882 &g1KeepAliveClosure,
1883 &g1DrainMarkingStackClosure,
1884 NULL);
1885 assert(_markStack.overflow() || _markStack.isEmpty(),
1886 "mark stack should be empty (unless it overflowed)");
1887 if (_markStack.overflow()) {
1888 set_has_overflown();
1889 }
1891 rp->enqueue_discovered_references();
1892 rp->verify_no_references_recorded();
1893 assert(!rp->discovery_enabled(), "should have been disabled");
1895 // Now clean up stale oops in SymbolTable and StringTable
1896 SymbolTable::unlink(&g1IsAliveClosure);
1897 StringTable::unlink(&g1IsAliveClosure);
1898 }
1900 void ConcurrentMark::swapMarkBitMaps() {
1901 CMBitMapRO* temp = _prevMarkBitMap;
1902 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1903 _nextMarkBitMap = (CMBitMap*) temp;
1904 }
1906 class CMRemarkTask: public AbstractGangTask {
1907 private:
1908 ConcurrentMark *_cm;
1910 public:
1911 void work(int worker_i) {
1912 // Since all available tasks are actually started, we should
1913 // only proceed if we're supposed to be actived.
1914 if ((size_t)worker_i < _cm->active_tasks()) {
1915 CMTask* task = _cm->task(worker_i);
1916 task->record_start_time();
1917 do {
1918 task->do_marking_step(1000000000.0 /* something very large */);
1919 } while (task->has_aborted() && !_cm->has_overflown());
1920 // If we overflow, then we do not want to restart. We instead
1921 // want to abort remark and do concurrent marking again.
1922 task->record_end_time();
1923 }
1924 }
1926 CMRemarkTask(ConcurrentMark* cm) :
1927 AbstractGangTask("Par Remark"), _cm(cm) { }
1928 };
1930 void ConcurrentMark::checkpointRootsFinalWork() {
1931 ResourceMark rm;
1932 HandleMark hm;
1933 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1935 g1h->ensure_parsability(false);
1937 if (ParallelGCThreads > 0) {
1938 G1CollectedHeap::StrongRootsScope srs(g1h);
1939 // this is remark, so we'll use up all available threads
1940 int active_workers = ParallelGCThreads;
1941 set_phase(active_workers, false);
1943 CMRemarkTask remarkTask(this);
1944 // We will start all available threads, even if we decide that the
1945 // active_workers will be fewer. The extra ones will just bail out
1946 // immediately.
1947 int n_workers = g1h->workers()->total_workers();
1948 g1h->set_par_threads(n_workers);
1949 g1h->workers()->run_task(&remarkTask);
1950 g1h->set_par_threads(0);
1951 } else {
1952 G1CollectedHeap::StrongRootsScope srs(g1h);
1953 // this is remark, so we'll use up all available threads
1954 int active_workers = 1;
1955 set_phase(active_workers, false);
1957 CMRemarkTask remarkTask(this);
1958 // We will start all available threads, even if we decide that the
1959 // active_workers will be fewer. The extra ones will just bail out
1960 // immediately.
1961 remarkTask.work(0);
1962 }
1963 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1964 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1966 print_stats();
1968 if (!restart_for_overflow())
1969 set_non_marking_state();
1971 #if VERIFY_OBJS_PROCESSED
1972 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1973 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1974 _scan_obj_cl.objs_processed,
1975 ThreadLocalObjQueue::objs_enqueued);
1976 guarantee(_scan_obj_cl.objs_processed ==
1977 ThreadLocalObjQueue::objs_enqueued,
1978 "Different number of objs processed and enqueued.");
1979 }
1980 #endif
1981 }
1983 #ifndef PRODUCT
1985 class PrintReachableOopClosure: public OopClosure {
1986 private:
1987 G1CollectedHeap* _g1h;
1988 CMBitMapRO* _bitmap;
1989 outputStream* _out;
1990 bool _use_prev_marking;
1991 bool _all;
1993 public:
1994 PrintReachableOopClosure(CMBitMapRO* bitmap,
1995 outputStream* out,
1996 bool use_prev_marking,
1997 bool all) :
1998 _g1h(G1CollectedHeap::heap()),
1999 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2001 void do_oop(narrowOop* p) { do_oop_work(p); }
2002 void do_oop( oop* p) { do_oop_work(p); }
2004 template <class T> void do_oop_work(T* p) {
2005 oop obj = oopDesc::load_decode_heap_oop(p);
2006 const char* str = NULL;
2007 const char* str2 = "";
2009 if (obj == NULL) {
2010 str = "";
2011 } else if (!_g1h->is_in_g1_reserved(obj)) {
2012 str = " O";
2013 } else {
2014 HeapRegion* hr = _g1h->heap_region_containing(obj);
2015 guarantee(hr != NULL, "invariant");
2016 bool over_tams = false;
2017 if (_use_prev_marking) {
2018 over_tams = hr->obj_allocated_since_prev_marking(obj);
2019 } else {
2020 over_tams = hr->obj_allocated_since_next_marking(obj);
2021 }
2022 bool marked = _bitmap->isMarked((HeapWord*) obj);
2024 if (over_tams) {
2025 str = " >";
2026 if (marked) {
2027 str2 = " AND MARKED";
2028 }
2029 } else if (marked) {
2030 str = " M";
2031 } else {
2032 str = " NOT";
2033 }
2034 }
2036 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2037 p, (void*) obj, str, str2);
2038 }
2039 };
2041 class PrintReachableObjectClosure : public ObjectClosure {
2042 private:
2043 CMBitMapRO* _bitmap;
2044 outputStream* _out;
2045 bool _use_prev_marking;
2046 bool _all;
2047 HeapRegion* _hr;
2049 public:
2050 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2051 outputStream* out,
2052 bool use_prev_marking,
2053 bool all,
2054 HeapRegion* hr) :
2055 _bitmap(bitmap), _out(out),
2056 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2058 void do_object(oop o) {
2059 bool over_tams;
2060 if (_use_prev_marking) {
2061 over_tams = _hr->obj_allocated_since_prev_marking(o);
2062 } else {
2063 over_tams = _hr->obj_allocated_since_next_marking(o);
2064 }
2065 bool marked = _bitmap->isMarked((HeapWord*) o);
2066 bool print_it = _all || over_tams || marked;
2068 if (print_it) {
2069 _out->print_cr(" "PTR_FORMAT"%s",
2070 o, (over_tams) ? " >" : (marked) ? " M" : "");
2071 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2072 o->oop_iterate(&oopCl);
2073 }
2074 }
2075 };
2077 class PrintReachableRegionClosure : public HeapRegionClosure {
2078 private:
2079 CMBitMapRO* _bitmap;
2080 outputStream* _out;
2081 bool _use_prev_marking;
2082 bool _all;
2084 public:
2085 bool doHeapRegion(HeapRegion* hr) {
2086 HeapWord* b = hr->bottom();
2087 HeapWord* e = hr->end();
2088 HeapWord* t = hr->top();
2089 HeapWord* p = NULL;
2090 if (_use_prev_marking) {
2091 p = hr->prev_top_at_mark_start();
2092 } else {
2093 p = hr->next_top_at_mark_start();
2094 }
2095 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2096 "TAMS: "PTR_FORMAT, b, e, t, p);
2097 _out->cr();
2099 HeapWord* from = b;
2100 HeapWord* to = t;
2102 if (to > from) {
2103 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2104 _out->cr();
2105 PrintReachableObjectClosure ocl(_bitmap, _out,
2106 _use_prev_marking, _all, hr);
2107 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2108 _out->cr();
2109 }
2111 return false;
2112 }
2114 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2115 outputStream* out,
2116 bool use_prev_marking,
2117 bool all) :
2118 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2119 };
2121 void ConcurrentMark::print_reachable(const char* str,
2122 bool use_prev_marking,
2123 bool all) {
2124 gclog_or_tty->cr();
2125 gclog_or_tty->print_cr("== Doing heap dump... ");
2127 if (G1PrintReachableBaseFile == NULL) {
2128 gclog_or_tty->print_cr(" #### error: no base file defined");
2129 return;
2130 }
2132 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2133 (JVM_MAXPATHLEN - 1)) {
2134 gclog_or_tty->print_cr(" #### error: file name too long");
2135 return;
2136 }
2138 char file_name[JVM_MAXPATHLEN];
2139 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2140 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2142 fileStream fout(file_name);
2143 if (!fout.is_open()) {
2144 gclog_or_tty->print_cr(" #### error: could not open file");
2145 return;
2146 }
2148 outputStream* out = &fout;
2150 CMBitMapRO* bitmap = NULL;
2151 if (use_prev_marking) {
2152 bitmap = _prevMarkBitMap;
2153 } else {
2154 bitmap = _nextMarkBitMap;
2155 }
2157 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2158 out->cr();
2160 out->print_cr("--- ITERATING OVER REGIONS");
2161 out->cr();
2162 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2163 _g1h->heap_region_iterate(&rcl);
2164 out->cr();
2166 gclog_or_tty->print_cr(" done");
2167 gclog_or_tty->flush();
2168 }
2170 #endif // PRODUCT
2172 // This note is for drainAllSATBBuffers and the code in between.
2173 // In the future we could reuse a task to do this work during an
2174 // evacuation pause (since now tasks are not active and can be claimed
2175 // during an evacuation pause). This was a late change to the code and
2176 // is currently not being taken advantage of.
2178 class CMGlobalObjectClosure : public ObjectClosure {
2179 private:
2180 ConcurrentMark* _cm;
2182 public:
2183 void do_object(oop obj) {
2184 _cm->deal_with_reference(obj);
2185 }
2187 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2188 };
2190 void ConcurrentMark::deal_with_reference(oop obj) {
2191 if (verbose_high())
2192 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2193 (void*) obj);
2196 HeapWord* objAddr = (HeapWord*) obj;
2197 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2198 if (_g1h->is_in_g1_reserved(objAddr)) {
2199 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2200 HeapRegion* hr = _g1h->heap_region_containing(obj);
2201 if (_g1h->is_obj_ill(obj, hr)) {
2202 if (verbose_high())
2203 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2204 "marked", (void*) obj);
2206 // we need to mark it first
2207 if (_nextMarkBitMap->parMark(objAddr)) {
2208 // No OrderAccess:store_load() is needed. It is implicit in the
2209 // CAS done in parMark(objAddr) above
2210 HeapWord* finger = _finger;
2211 if (objAddr < finger) {
2212 if (verbose_high())
2213 gclog_or_tty->print_cr("[global] below the global finger "
2214 "("PTR_FORMAT"), pushing it", finger);
2215 if (!mark_stack_push(obj)) {
2216 if (verbose_low())
2217 gclog_or_tty->print_cr("[global] global stack overflow during "
2218 "deal_with_reference");
2219 }
2220 }
2221 }
2222 }
2223 }
2224 }
2226 void ConcurrentMark::drainAllSATBBuffers() {
2227 CMGlobalObjectClosure oc(this);
2228 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2229 satb_mq_set.set_closure(&oc);
2231 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2232 if (verbose_medium())
2233 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2234 }
2236 // no need to check whether we should do this, as this is only
2237 // called during an evacuation pause
2238 satb_mq_set.iterate_closure_all_threads();
2240 satb_mq_set.set_closure(NULL);
2241 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2242 }
2244 void ConcurrentMark::markPrev(oop p) {
2245 // Note we are overriding the read-only view of the prev map here, via
2246 // the cast.
2247 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2248 }
2250 void ConcurrentMark::clear(oop p) {
2251 assert(p != NULL && p->is_oop(), "expected an oop");
2252 HeapWord* addr = (HeapWord*)p;
2253 assert(addr >= _nextMarkBitMap->startWord() ||
2254 addr < _nextMarkBitMap->endWord(), "in a region");
2256 _nextMarkBitMap->clear(addr);
2257 }
2259 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2260 // Note we are overriding the read-only view of the prev map here, via
2261 // the cast.
2262 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2263 _nextMarkBitMap->clearRange(mr);
2264 }
2266 HeapRegion*
2267 ConcurrentMark::claim_region(int task_num) {
2268 // "checkpoint" the finger
2269 HeapWord* finger = _finger;
2271 // _heap_end will not change underneath our feet; it only changes at
2272 // yield points.
2273 while (finger < _heap_end) {
2274 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2276 // is the gap between reading the finger and doing the CAS too long?
2278 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2279 HeapWord* bottom = curr_region->bottom();
2280 HeapWord* end = curr_region->end();
2281 HeapWord* limit = curr_region->next_top_at_mark_start();
2283 if (verbose_low())
2284 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2285 "["PTR_FORMAT", "PTR_FORMAT"), "
2286 "limit = "PTR_FORMAT,
2287 task_num, curr_region, bottom, end, limit);
2289 HeapWord* res =
2290 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2291 if (res == finger) {
2292 // we succeeded
2294 // notice that _finger == end cannot be guaranteed here since,
2295 // someone else might have moved the finger even further
2296 assert(_finger >= end, "the finger should have moved forward");
2298 if (verbose_low())
2299 gclog_or_tty->print_cr("[%d] we were successful with region = "
2300 PTR_FORMAT, task_num, curr_region);
2302 if (limit > bottom) {
2303 if (verbose_low())
2304 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2305 "returning it ", task_num, curr_region);
2306 return curr_region;
2307 } else {
2308 assert(limit == bottom,
2309 "the region limit should be at bottom");
2310 if (verbose_low())
2311 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2312 "returning NULL", task_num, curr_region);
2313 // we return NULL and the caller should try calling
2314 // claim_region() again.
2315 return NULL;
2316 }
2317 } else {
2318 assert(_finger > finger, "the finger should have moved forward");
2319 if (verbose_low())
2320 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2321 "global finger = "PTR_FORMAT", "
2322 "our finger = "PTR_FORMAT,
2323 task_num, _finger, finger);
2325 // read it again
2326 finger = _finger;
2327 }
2328 }
2330 return NULL;
2331 }
2333 void ConcurrentMark::oops_do(OopClosure* cl) {
2334 if (_markStack.size() > 0 && verbose_low())
2335 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2336 "size = %d", _markStack.size());
2337 // we first iterate over the contents of the mark stack...
2338 _markStack.oops_do(cl);
2340 for (int i = 0; i < (int)_max_task_num; ++i) {
2341 OopTaskQueue* queue = _task_queues->queue((int)i);
2343 if (queue->size() > 0 && verbose_low())
2344 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2345 "size = %d", i, queue->size());
2347 // ...then over the contents of the all the task queues.
2348 queue->oops_do(cl);
2349 }
2351 // finally, invalidate any entries that in the region stack that
2352 // point into the collection set
2353 if (_regionStack.invalidate_entries_into_cset()) {
2354 // otherwise, any gray objects copied during the evacuation pause
2355 // might not be visited.
2356 assert(_should_gray_objects, "invariant");
2357 }
2358 }
2360 void ConcurrentMark::clear_marking_state() {
2361 _markStack.setEmpty();
2362 _markStack.clear_overflow();
2363 _regionStack.setEmpty();
2364 _regionStack.clear_overflow();
2365 clear_has_overflown();
2366 _finger = _heap_start;
2368 for (int i = 0; i < (int)_max_task_num; ++i) {
2369 OopTaskQueue* queue = _task_queues->queue(i);
2370 queue->set_empty();
2371 }
2372 }
2374 void ConcurrentMark::print_stats() {
2375 if (verbose_stats()) {
2376 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2377 for (size_t i = 0; i < _active_tasks; ++i) {
2378 _tasks[i]->print_stats();
2379 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2380 }
2381 }
2382 }
2384 class CSMarkOopClosure: public OopClosure {
2385 friend class CSMarkBitMapClosure;
2387 G1CollectedHeap* _g1h;
2388 CMBitMap* _bm;
2389 ConcurrentMark* _cm;
2390 oop* _ms;
2391 jint* _array_ind_stack;
2392 int _ms_size;
2393 int _ms_ind;
2394 int _array_increment;
2396 bool push(oop obj, int arr_ind = 0) {
2397 if (_ms_ind == _ms_size) {
2398 gclog_or_tty->print_cr("Mark stack is full.");
2399 return false;
2400 }
2401 _ms[_ms_ind] = obj;
2402 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2403 _ms_ind++;
2404 return true;
2405 }
2407 oop pop() {
2408 if (_ms_ind == 0) return NULL;
2409 else {
2410 _ms_ind--;
2411 return _ms[_ms_ind];
2412 }
2413 }
2415 template <class T> bool drain() {
2416 while (_ms_ind > 0) {
2417 oop obj = pop();
2418 assert(obj != NULL, "Since index was non-zero.");
2419 if (obj->is_objArray()) {
2420 jint arr_ind = _array_ind_stack[_ms_ind];
2421 objArrayOop aobj = objArrayOop(obj);
2422 jint len = aobj->length();
2423 jint next_arr_ind = arr_ind + _array_increment;
2424 if (next_arr_ind < len) {
2425 push(obj, next_arr_ind);
2426 }
2427 // Now process this portion of this one.
2428 int lim = MIN2(next_arr_ind, len);
2429 for (int j = arr_ind; j < lim; j++) {
2430 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2431 }
2433 } else {
2434 obj->oop_iterate(this);
2435 }
2436 if (abort()) return false;
2437 }
2438 return true;
2439 }
2441 public:
2442 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2443 _g1h(G1CollectedHeap::heap()),
2444 _cm(cm),
2445 _bm(cm->nextMarkBitMap()),
2446 _ms_size(ms_size), _ms_ind(0),
2447 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2448 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2449 _array_increment(MAX2(ms_size/8, 16))
2450 {}
2452 ~CSMarkOopClosure() {
2453 FREE_C_HEAP_ARRAY(oop, _ms);
2454 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2455 }
2457 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2458 virtual void do_oop( oop* p) { do_oop_work(p); }
2460 template <class T> void do_oop_work(T* p) {
2461 T heap_oop = oopDesc::load_heap_oop(p);
2462 if (oopDesc::is_null(heap_oop)) return;
2463 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2464 if (obj->is_forwarded()) {
2465 // If the object has already been forwarded, we have to make sure
2466 // that it's marked. So follow the forwarding pointer. Note that
2467 // this does the right thing for self-forwarding pointers in the
2468 // evacuation failure case.
2469 obj = obj->forwardee();
2470 }
2471 HeapRegion* hr = _g1h->heap_region_containing(obj);
2472 if (hr != NULL) {
2473 if (hr->in_collection_set()) {
2474 if (_g1h->is_obj_ill(obj)) {
2475 _bm->mark((HeapWord*)obj);
2476 if (!push(obj)) {
2477 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2478 set_abort();
2479 }
2480 }
2481 } else {
2482 // Outside the collection set; we need to gray it
2483 _cm->deal_with_reference(obj);
2484 }
2485 }
2486 }
2487 };
2489 class CSMarkBitMapClosure: public BitMapClosure {
2490 G1CollectedHeap* _g1h;
2491 CMBitMap* _bitMap;
2492 ConcurrentMark* _cm;
2493 CSMarkOopClosure _oop_cl;
2494 public:
2495 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2496 _g1h(G1CollectedHeap::heap()),
2497 _bitMap(cm->nextMarkBitMap()),
2498 _oop_cl(cm, ms_size)
2499 {}
2501 ~CSMarkBitMapClosure() {}
2503 bool do_bit(size_t offset) {
2504 // convert offset into a HeapWord*
2505 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2506 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2507 "address out of range");
2508 assert(_bitMap->isMarked(addr), "tautology");
2509 oop obj = oop(addr);
2510 if (!obj->is_forwarded()) {
2511 if (!_oop_cl.push(obj)) return false;
2512 if (UseCompressedOops) {
2513 if (!_oop_cl.drain<narrowOop>()) return false;
2514 } else {
2515 if (!_oop_cl.drain<oop>()) return false;
2516 }
2517 }
2518 // Otherwise...
2519 return true;
2520 }
2521 };
2524 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2525 CMBitMap* _bm;
2526 CSMarkBitMapClosure _bit_cl;
2527 enum SomePrivateConstants {
2528 MSSize = 1000
2529 };
2530 bool _completed;
2531 public:
2532 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2533 _bm(cm->nextMarkBitMap()),
2534 _bit_cl(cm, MSSize),
2535 _completed(true)
2536 {}
2538 ~CompleteMarkingInCSHRClosure() {}
2540 bool doHeapRegion(HeapRegion* r) {
2541 if (!r->evacuation_failed()) {
2542 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2543 if (!mr.is_empty()) {
2544 if (!_bm->iterate(&_bit_cl, mr)) {
2545 _completed = false;
2546 return true;
2547 }
2548 }
2549 }
2550 return false;
2551 }
2553 bool completed() { return _completed; }
2554 };
2556 class ClearMarksInHRClosure: public HeapRegionClosure {
2557 CMBitMap* _bm;
2558 public:
2559 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2561 bool doHeapRegion(HeapRegion* r) {
2562 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2563 MemRegion usedMR = r->used_region();
2564 _bm->clearRange(r->used_region());
2565 }
2566 return false;
2567 }
2568 };
2570 void ConcurrentMark::complete_marking_in_collection_set() {
2571 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2573 if (!g1h->mark_in_progress()) {
2574 g1h->g1_policy()->record_mark_closure_time(0.0);
2575 return;
2576 }
2578 int i = 1;
2579 double start = os::elapsedTime();
2580 while (true) {
2581 i++;
2582 CompleteMarkingInCSHRClosure cmplt(this);
2583 g1h->collection_set_iterate(&cmplt);
2584 if (cmplt.completed()) break;
2585 }
2586 double end_time = os::elapsedTime();
2587 double elapsed_time_ms = (end_time - start) * 1000.0;
2588 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2589 if (PrintGCDetails) {
2590 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2591 }
2593 ClearMarksInHRClosure clr(nextMarkBitMap());
2594 g1h->collection_set_iterate(&clr);
2595 }
2597 // The next two methods deal with the following optimisation. Some
2598 // objects are gray by being marked and located above the finger. If
2599 // they are copied, during an evacuation pause, below the finger then
2600 // the need to be pushed on the stack. The observation is that, if
2601 // there are no regions in the collection set located above the
2602 // finger, then the above cannot happen, hence we do not need to
2603 // explicitly gray any objects when copying them to below the
2604 // finger. The global stack will be scanned to ensure that, if it
2605 // points to objects being copied, it will update their
2606 // location. There is a tricky situation with the gray objects in
2607 // region stack that are being coped, however. See the comment in
2608 // newCSet().
2610 void ConcurrentMark::newCSet() {
2611 if (!concurrent_marking_in_progress())
2612 // nothing to do if marking is not in progress
2613 return;
2615 // find what the lowest finger is among the global and local fingers
2616 _min_finger = _finger;
2617 for (int i = 0; i < (int)_max_task_num; ++i) {
2618 CMTask* task = _tasks[i];
2619 HeapWord* task_finger = task->finger();
2620 if (task_finger != NULL && task_finger < _min_finger)
2621 _min_finger = task_finger;
2622 }
2624 _should_gray_objects = false;
2626 // This fixes a very subtle and fustrating bug. It might be the case
2627 // that, during en evacuation pause, heap regions that contain
2628 // objects that are gray (by being in regions contained in the
2629 // region stack) are included in the collection set. Since such gray
2630 // objects will be moved, and because it's not easy to redirect
2631 // region stack entries to point to a new location (because objects
2632 // in one region might be scattered to multiple regions after they
2633 // are copied), one option is to ensure that all marked objects
2634 // copied during a pause are pushed on the stack. Notice, however,
2635 // that this problem can only happen when the region stack is not
2636 // empty during an evacuation pause. So, we make the fix a bit less
2637 // conservative and ensure that regions are pushed on the stack,
2638 // irrespective whether all collection set regions are below the
2639 // finger, if the region stack is not empty. This is expected to be
2640 // a rare case, so I don't think it's necessary to be smarted about it.
2641 if (!region_stack_empty())
2642 _should_gray_objects = true;
2643 }
2645 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2646 if (!concurrent_marking_in_progress())
2647 return;
2649 HeapWord* region_end = hr->end();
2650 if (region_end > _min_finger)
2651 _should_gray_objects = true;
2652 }
2654 // abandon current marking iteration due to a Full GC
2655 void ConcurrentMark::abort() {
2656 // Clear all marks to force marking thread to do nothing
2657 _nextMarkBitMap->clearAll();
2658 // Empty mark stack
2659 clear_marking_state();
2660 for (int i = 0; i < (int)_max_task_num; ++i)
2661 _tasks[i]->clear_region_fields();
2662 _has_aborted = true;
2664 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2665 satb_mq_set.abandon_partial_marking();
2666 // This can be called either during or outside marking, we'll read
2667 // the expected_active value from the SATB queue set.
2668 satb_mq_set.set_active_all_threads(
2669 false, /* new active value */
2670 satb_mq_set.is_active() /* expected_active */);
2671 }
2673 static void print_ms_time_info(const char* prefix, const char* name,
2674 NumberSeq& ns) {
2675 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2676 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2677 if (ns.num() > 0) {
2678 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2679 prefix, ns.sd(), ns.maximum());
2680 }
2681 }
2683 void ConcurrentMark::print_summary_info() {
2684 gclog_or_tty->print_cr(" Concurrent marking:");
2685 print_ms_time_info(" ", "init marks", _init_times);
2686 print_ms_time_info(" ", "remarks", _remark_times);
2687 {
2688 print_ms_time_info(" ", "final marks", _remark_mark_times);
2689 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2691 }
2692 print_ms_time_info(" ", "cleanups", _cleanup_times);
2693 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2694 _total_counting_time,
2695 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2696 (double)_cleanup_times.num()
2697 : 0.0));
2698 if (G1ScrubRemSets) {
2699 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2700 _total_rs_scrub_time,
2701 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2702 (double)_cleanup_times.num()
2703 : 0.0));
2704 }
2705 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2706 (_init_times.sum() + _remark_times.sum() +
2707 _cleanup_times.sum())/1000.0);
2708 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2709 "(%8.2f s marking, %8.2f s counting).",
2710 cmThread()->vtime_accum(),
2711 cmThread()->vtime_mark_accum(),
2712 cmThread()->vtime_count_accum());
2713 }
2715 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2716 _parallel_workers->print_worker_threads_on(st);
2717 }
2719 // Closures
2720 // XXX: there seems to be a lot of code duplication here;
2721 // should refactor and consolidate the shared code.
2723 // This closure is used to mark refs into the CMS generation in
2724 // the CMS bit map. Called at the first checkpoint.
2726 // We take a break if someone is trying to stop the world.
2727 bool ConcurrentMark::do_yield_check(int worker_i) {
2728 if (should_yield()) {
2729 if (worker_i == 0)
2730 _g1h->g1_policy()->record_concurrent_pause();
2731 cmThread()->yield();
2732 if (worker_i == 0)
2733 _g1h->g1_policy()->record_concurrent_pause_end();
2734 return true;
2735 } else {
2736 return false;
2737 }
2738 }
2740 bool ConcurrentMark::should_yield() {
2741 return cmThread()->should_yield();
2742 }
2744 bool ConcurrentMark::containing_card_is_marked(void* p) {
2745 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2746 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2747 }
2749 bool ConcurrentMark::containing_cards_are_marked(void* start,
2750 void* last) {
2751 return
2752 containing_card_is_marked(start) &&
2753 containing_card_is_marked(last);
2754 }
2756 #ifndef PRODUCT
2757 // for debugging purposes
2758 void ConcurrentMark::print_finger() {
2759 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2760 _heap_start, _heap_end, _finger);
2761 for (int i = 0; i < (int) _max_task_num; ++i) {
2762 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2763 }
2764 gclog_or_tty->print_cr("");
2765 }
2766 #endif
2768 // Closure for iteration over bitmaps
2769 class CMBitMapClosure : public BitMapClosure {
2770 private:
2771 // the bitmap that is being iterated over
2772 CMBitMap* _nextMarkBitMap;
2773 ConcurrentMark* _cm;
2774 CMTask* _task;
2775 // true if we're scanning a heap region claimed by the task (so that
2776 // we move the finger along), false if we're not, i.e. currently when
2777 // scanning a heap region popped from the region stack (so that we
2778 // do not move the task finger along; it'd be a mistake if we did so).
2779 bool _scanning_heap_region;
2781 public:
2782 CMBitMapClosure(CMTask *task,
2783 ConcurrentMark* cm,
2784 CMBitMap* nextMarkBitMap)
2785 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2787 void set_scanning_heap_region(bool scanning_heap_region) {
2788 _scanning_heap_region = scanning_heap_region;
2789 }
2791 bool do_bit(size_t offset) {
2792 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2793 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2794 assert( addr < _cm->finger(), "invariant");
2796 if (_scanning_heap_region) {
2797 statsOnly( _task->increase_objs_found_on_bitmap() );
2798 assert(addr >= _task->finger(), "invariant");
2799 // We move that task's local finger along.
2800 _task->move_finger_to(addr);
2801 } else {
2802 // We move the task's region finger along.
2803 _task->move_region_finger_to(addr);
2804 }
2806 _task->scan_object(oop(addr));
2807 // we only partially drain the local queue and global stack
2808 _task->drain_local_queue(true);
2809 _task->drain_global_stack(true);
2811 // if the has_aborted flag has been raised, we need to bail out of
2812 // the iteration
2813 return !_task->has_aborted();
2814 }
2815 };
2817 // Closure for iterating over objects, currently only used for
2818 // processing SATB buffers.
2819 class CMObjectClosure : public ObjectClosure {
2820 private:
2821 CMTask* _task;
2823 public:
2824 void do_object(oop obj) {
2825 _task->deal_with_reference(obj);
2826 }
2828 CMObjectClosure(CMTask* task) : _task(task) { }
2829 };
2831 // Closure for iterating over object fields
2832 class CMOopClosure : public OopClosure {
2833 private:
2834 G1CollectedHeap* _g1h;
2835 ConcurrentMark* _cm;
2836 CMTask* _task;
2838 public:
2839 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2840 virtual void do_oop( oop* p) { do_oop_work(p); }
2842 template <class T> void do_oop_work(T* p) {
2843 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2844 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2845 "invariant");
2847 oop obj = oopDesc::load_decode_heap_oop(p);
2848 if (_cm->verbose_high())
2849 gclog_or_tty->print_cr("[%d] we're looking at location "
2850 "*"PTR_FORMAT" = "PTR_FORMAT,
2851 _task->task_id(), p, (void*) obj);
2852 _task->deal_with_reference(obj);
2853 }
2855 CMOopClosure(G1CollectedHeap* g1h,
2856 ConcurrentMark* cm,
2857 CMTask* task)
2858 : _g1h(g1h), _cm(cm), _task(task) { }
2859 };
2861 void CMTask::setup_for_region(HeapRegion* hr) {
2862 // Separated the asserts so that we know which one fires.
2863 assert(hr != NULL,
2864 "claim_region() should have filtered out continues humongous regions");
2865 assert(!hr->continuesHumongous(),
2866 "claim_region() should have filtered out continues humongous regions");
2868 if (_cm->verbose_low())
2869 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2870 _task_id, hr);
2872 _curr_region = hr;
2873 _finger = hr->bottom();
2874 update_region_limit();
2875 }
2877 void CMTask::update_region_limit() {
2878 HeapRegion* hr = _curr_region;
2879 HeapWord* bottom = hr->bottom();
2880 HeapWord* limit = hr->next_top_at_mark_start();
2882 if (limit == bottom) {
2883 if (_cm->verbose_low())
2884 gclog_or_tty->print_cr("[%d] found an empty region "
2885 "["PTR_FORMAT", "PTR_FORMAT")",
2886 _task_id, bottom, limit);
2887 // The region was collected underneath our feet.
2888 // We set the finger to bottom to ensure that the bitmap
2889 // iteration that will follow this will not do anything.
2890 // (this is not a condition that holds when we set the region up,
2891 // as the region is not supposed to be empty in the first place)
2892 _finger = bottom;
2893 } else if (limit >= _region_limit) {
2894 assert(limit >= _finger, "peace of mind");
2895 } else {
2896 assert(limit < _region_limit, "only way to get here");
2897 // This can happen under some pretty unusual circumstances. An
2898 // evacuation pause empties the region underneath our feet (NTAMS
2899 // at bottom). We then do some allocation in the region (NTAMS
2900 // stays at bottom), followed by the region being used as a GC
2901 // alloc region (NTAMS will move to top() and the objects
2902 // originally below it will be grayed). All objects now marked in
2903 // the region are explicitly grayed, if below the global finger,
2904 // and we do not need in fact to scan anything else. So, we simply
2905 // set _finger to be limit to ensure that the bitmap iteration
2906 // doesn't do anything.
2907 _finger = limit;
2908 }
2910 _region_limit = limit;
2911 }
2913 void CMTask::giveup_current_region() {
2914 assert(_curr_region != NULL, "invariant");
2915 if (_cm->verbose_low())
2916 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2917 _task_id, _curr_region);
2918 clear_region_fields();
2919 }
2921 void CMTask::clear_region_fields() {
2922 // Values for these three fields that indicate that we're not
2923 // holding on to a region.
2924 _curr_region = NULL;
2925 _finger = NULL;
2926 _region_limit = NULL;
2928 _region_finger = NULL;
2929 }
2931 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2932 guarantee(nextMarkBitMap != NULL, "invariant");
2934 if (_cm->verbose_low())
2935 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2937 _nextMarkBitMap = nextMarkBitMap;
2938 clear_region_fields();
2940 _calls = 0;
2941 _elapsed_time_ms = 0.0;
2942 _termination_time_ms = 0.0;
2943 _termination_start_time_ms = 0.0;
2945 #if _MARKING_STATS_
2946 _local_pushes = 0;
2947 _local_pops = 0;
2948 _local_max_size = 0;
2949 _objs_scanned = 0;
2950 _global_pushes = 0;
2951 _global_pops = 0;
2952 _global_max_size = 0;
2953 _global_transfers_to = 0;
2954 _global_transfers_from = 0;
2955 _region_stack_pops = 0;
2956 _regions_claimed = 0;
2957 _objs_found_on_bitmap = 0;
2958 _satb_buffers_processed = 0;
2959 _steal_attempts = 0;
2960 _steals = 0;
2961 _aborted = 0;
2962 _aborted_overflow = 0;
2963 _aborted_cm_aborted = 0;
2964 _aborted_yield = 0;
2965 _aborted_timed_out = 0;
2966 _aborted_satb = 0;
2967 _aborted_termination = 0;
2968 #endif // _MARKING_STATS_
2969 }
2971 bool CMTask::should_exit_termination() {
2972 regular_clock_call();
2973 // This is called when we are in the termination protocol. We should
2974 // quit if, for some reason, this task wants to abort or the global
2975 // stack is not empty (this means that we can get work from it).
2976 return !_cm->mark_stack_empty() || has_aborted();
2977 }
2979 // This determines whether the method below will check both the local
2980 // and global fingers when determining whether to push on the stack a
2981 // gray object (value 1) or whether it will only check the global one
2982 // (value 0). The tradeoffs are that the former will be a bit more
2983 // accurate and possibly push less on the stack, but it might also be
2984 // a little bit slower.
2986 #define _CHECK_BOTH_FINGERS_ 1
2988 void CMTask::deal_with_reference(oop obj) {
2989 if (_cm->verbose_high())
2990 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2991 _task_id, (void*) obj);
2993 ++_refs_reached;
2995 HeapWord* objAddr = (HeapWord*) obj;
2996 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2997 if (_g1h->is_in_g1_reserved(objAddr)) {
2998 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2999 HeapRegion* hr = _g1h->heap_region_containing(obj);
3000 if (_g1h->is_obj_ill(obj, hr)) {
3001 if (_cm->verbose_high())
3002 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3003 _task_id, (void*) obj);
3005 // we need to mark it first
3006 if (_nextMarkBitMap->parMark(objAddr)) {
3007 // No OrderAccess:store_load() is needed. It is implicit in the
3008 // CAS done in parMark(objAddr) above
3009 HeapWord* global_finger = _cm->finger();
3011 #if _CHECK_BOTH_FINGERS_
3012 // we will check both the local and global fingers
3014 if (_finger != NULL && objAddr < _finger) {
3015 if (_cm->verbose_high())
3016 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3017 "pushing it", _task_id, _finger);
3018 push(obj);
3019 } else if (_curr_region != NULL && objAddr < _region_limit) {
3020 // do nothing
3021 } else if (objAddr < global_finger) {
3022 // Notice that the global finger might be moving forward
3023 // concurrently. This is not a problem. In the worst case, we
3024 // mark the object while it is above the global finger and, by
3025 // the time we read the global finger, it has moved forward
3026 // passed this object. In this case, the object will probably
3027 // be visited when a task is scanning the region and will also
3028 // be pushed on the stack. So, some duplicate work, but no
3029 // correctness problems.
3031 if (_cm->verbose_high())
3032 gclog_or_tty->print_cr("[%d] below the global finger "
3033 "("PTR_FORMAT"), pushing it",
3034 _task_id, global_finger);
3035 push(obj);
3036 } else {
3037 // do nothing
3038 }
3039 #else // _CHECK_BOTH_FINGERS_
3040 // we will only check the global finger
3042 if (objAddr < global_finger) {
3043 // see long comment above
3045 if (_cm->verbose_high())
3046 gclog_or_tty->print_cr("[%d] below the global finger "
3047 "("PTR_FORMAT"), pushing it",
3048 _task_id, global_finger);
3049 push(obj);
3050 }
3051 #endif // _CHECK_BOTH_FINGERS_
3052 }
3053 }
3054 }
3055 }
3057 void CMTask::push(oop obj) {
3058 HeapWord* objAddr = (HeapWord*) obj;
3059 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3060 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3061 "invariant");
3062 assert(!_g1h->is_obj_ill(obj), "invariant");
3063 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3065 if (_cm->verbose_high())
3066 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3068 if (!_task_queue->push(obj)) {
3069 // The local task queue looks full. We need to push some entries
3070 // to the global stack.
3072 if (_cm->verbose_medium())
3073 gclog_or_tty->print_cr("[%d] task queue overflow, "
3074 "moving entries to the global stack",
3075 _task_id);
3076 move_entries_to_global_stack();
3078 // this should succeed since, even if we overflow the global
3079 // stack, we should have definitely removed some entries from the
3080 // local queue. So, there must be space on it.
3081 bool success = _task_queue->push(obj);
3082 assert(success, "invariant");
3083 }
3085 statsOnly( int tmp_size = _task_queue->size();
3086 if (tmp_size > _local_max_size)
3087 _local_max_size = tmp_size;
3088 ++_local_pushes );
3089 }
3091 void CMTask::reached_limit() {
3092 assert(_words_scanned >= _words_scanned_limit ||
3093 _refs_reached >= _refs_reached_limit ,
3094 "shouldn't have been called otherwise");
3095 regular_clock_call();
3096 }
3098 void CMTask::regular_clock_call() {
3099 if (has_aborted())
3100 return;
3102 // First, we need to recalculate the words scanned and refs reached
3103 // limits for the next clock call.
3104 recalculate_limits();
3106 // During the regular clock call we do the following
3108 // (1) If an overflow has been flagged, then we abort.
3109 if (_cm->has_overflown()) {
3110 set_has_aborted();
3111 return;
3112 }
3114 // If we are not concurrent (i.e. we're doing remark) we don't need
3115 // to check anything else. The other steps are only needed during
3116 // the concurrent marking phase.
3117 if (!concurrent())
3118 return;
3120 // (2) If marking has been aborted for Full GC, then we also abort.
3121 if (_cm->has_aborted()) {
3122 set_has_aborted();
3123 statsOnly( ++_aborted_cm_aborted );
3124 return;
3125 }
3127 double curr_time_ms = os::elapsedVTime() * 1000.0;
3129 // (3) If marking stats are enabled, then we update the step history.
3130 #if _MARKING_STATS_
3131 if (_words_scanned >= _words_scanned_limit)
3132 ++_clock_due_to_scanning;
3133 if (_refs_reached >= _refs_reached_limit)
3134 ++_clock_due_to_marking;
3136 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3137 _interval_start_time_ms = curr_time_ms;
3138 _all_clock_intervals_ms.add(last_interval_ms);
3140 if (_cm->verbose_medium()) {
3141 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3142 "scanned = %d%s, refs reached = %d%s",
3143 _task_id, last_interval_ms,
3144 _words_scanned,
3145 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3146 _refs_reached,
3147 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3148 }
3149 #endif // _MARKING_STATS_
3151 // (4) We check whether we should yield. If we have to, then we abort.
3152 if (_cm->should_yield()) {
3153 // We should yield. To do this we abort the task. The caller is
3154 // responsible for yielding.
3155 set_has_aborted();
3156 statsOnly( ++_aborted_yield );
3157 return;
3158 }
3160 // (5) We check whether we've reached our time quota. If we have,
3161 // then we abort.
3162 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3163 if (elapsed_time_ms > _time_target_ms) {
3164 set_has_aborted();
3165 _has_aborted_timed_out = true;
3166 statsOnly( ++_aborted_timed_out );
3167 return;
3168 }
3170 // (6) Finally, we check whether there are enough completed STAB
3171 // buffers available for processing. If there are, we abort.
3172 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3173 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3174 if (_cm->verbose_low())
3175 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3176 _task_id);
3177 // we do need to process SATB buffers, we'll abort and restart
3178 // the marking task to do so
3179 set_has_aborted();
3180 statsOnly( ++_aborted_satb );
3181 return;
3182 }
3183 }
3185 void CMTask::recalculate_limits() {
3186 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3187 _words_scanned_limit = _real_words_scanned_limit;
3189 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3190 _refs_reached_limit = _real_refs_reached_limit;
3191 }
3193 void CMTask::decrease_limits() {
3194 // This is called when we believe that we're going to do an infrequent
3195 // operation which will increase the per byte scanned cost (i.e. move
3196 // entries to/from the global stack). It basically tries to decrease the
3197 // scanning limit so that the clock is called earlier.
3199 if (_cm->verbose_medium())
3200 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3202 _words_scanned_limit = _real_words_scanned_limit -
3203 3 * words_scanned_period / 4;
3204 _refs_reached_limit = _real_refs_reached_limit -
3205 3 * refs_reached_period / 4;
3206 }
3208 void CMTask::move_entries_to_global_stack() {
3209 // local array where we'll store the entries that will be popped
3210 // from the local queue
3211 oop buffer[global_stack_transfer_size];
3213 int n = 0;
3214 oop obj;
3215 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3216 buffer[n] = obj;
3217 ++n;
3218 }
3220 if (n > 0) {
3221 // we popped at least one entry from the local queue
3223 statsOnly( ++_global_transfers_to; _local_pops += n );
3225 if (!_cm->mark_stack_push(buffer, n)) {
3226 if (_cm->verbose_low())
3227 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3228 set_has_aborted();
3229 } else {
3230 // the transfer was successful
3232 if (_cm->verbose_medium())
3233 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3234 _task_id, n);
3235 statsOnly( int tmp_size = _cm->mark_stack_size();
3236 if (tmp_size > _global_max_size)
3237 _global_max_size = tmp_size;
3238 _global_pushes += n );
3239 }
3240 }
3242 // this operation was quite expensive, so decrease the limits
3243 decrease_limits();
3244 }
3246 void CMTask::get_entries_from_global_stack() {
3247 // local array where we'll store the entries that will be popped
3248 // from the global stack.
3249 oop buffer[global_stack_transfer_size];
3250 int n;
3251 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3252 assert(n <= global_stack_transfer_size,
3253 "we should not pop more than the given limit");
3254 if (n > 0) {
3255 // yes, we did actually pop at least one entry
3257 statsOnly( ++_global_transfers_from; _global_pops += n );
3258 if (_cm->verbose_medium())
3259 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3260 _task_id, n);
3261 for (int i = 0; i < n; ++i) {
3262 bool success = _task_queue->push(buffer[i]);
3263 // We only call this when the local queue is empty or under a
3264 // given target limit. So, we do not expect this push to fail.
3265 assert(success, "invariant");
3266 }
3268 statsOnly( int tmp_size = _task_queue->size();
3269 if (tmp_size > _local_max_size)
3270 _local_max_size = tmp_size;
3271 _local_pushes += n );
3272 }
3274 // this operation was quite expensive, so decrease the limits
3275 decrease_limits();
3276 }
3278 void CMTask::drain_local_queue(bool partially) {
3279 if (has_aborted())
3280 return;
3282 // Decide what the target size is, depending whether we're going to
3283 // drain it partially (so that other tasks can steal if they run out
3284 // of things to do) or totally (at the very end).
3285 size_t target_size;
3286 if (partially)
3287 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3288 else
3289 target_size = 0;
3291 if (_task_queue->size() > target_size) {
3292 if (_cm->verbose_high())
3293 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3294 _task_id, target_size);
3296 oop obj;
3297 bool ret = _task_queue->pop_local(obj);
3298 while (ret) {
3299 statsOnly( ++_local_pops );
3301 if (_cm->verbose_high())
3302 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3303 (void*) obj);
3305 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3306 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3307 "invariant");
3309 scan_object(obj);
3311 if (_task_queue->size() <= target_size || has_aborted())
3312 ret = false;
3313 else
3314 ret = _task_queue->pop_local(obj);
3315 }
3317 if (_cm->verbose_high())
3318 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3319 _task_id, _task_queue->size());
3320 }
3321 }
3323 void CMTask::drain_global_stack(bool partially) {
3324 if (has_aborted())
3325 return;
3327 // We have a policy to drain the local queue before we attempt to
3328 // drain the global stack.
3329 assert(partially || _task_queue->size() == 0, "invariant");
3331 // Decide what the target size is, depending whether we're going to
3332 // drain it partially (so that other tasks can steal if they run out
3333 // of things to do) or totally (at the very end). Notice that,
3334 // because we move entries from the global stack in chunks or
3335 // because another task might be doing the same, we might in fact
3336 // drop below the target. But, this is not a problem.
3337 size_t target_size;
3338 if (partially)
3339 target_size = _cm->partial_mark_stack_size_target();
3340 else
3341 target_size = 0;
3343 if (_cm->mark_stack_size() > target_size) {
3344 if (_cm->verbose_low())
3345 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3346 _task_id, target_size);
3348 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3349 get_entries_from_global_stack();
3350 drain_local_queue(partially);
3351 }
3353 if (_cm->verbose_low())
3354 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3355 _task_id, _cm->mark_stack_size());
3356 }
3357 }
3359 // SATB Queue has several assumptions on whether to call the par or
3360 // non-par versions of the methods. this is why some of the code is
3361 // replicated. We should really get rid of the single-threaded version
3362 // of the code to simplify things.
3363 void CMTask::drain_satb_buffers() {
3364 if (has_aborted())
3365 return;
3367 // We set this so that the regular clock knows that we're in the
3368 // middle of draining buffers and doesn't set the abort flag when it
3369 // notices that SATB buffers are available for draining. It'd be
3370 // very counter productive if it did that. :-)
3371 _draining_satb_buffers = true;
3373 CMObjectClosure oc(this);
3374 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3375 if (ParallelGCThreads > 0)
3376 satb_mq_set.set_par_closure(_task_id, &oc);
3377 else
3378 satb_mq_set.set_closure(&oc);
3380 // This keeps claiming and applying the closure to completed buffers
3381 // until we run out of buffers or we need to abort.
3382 if (ParallelGCThreads > 0) {
3383 while (!has_aborted() &&
3384 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3385 if (_cm->verbose_medium())
3386 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3387 statsOnly( ++_satb_buffers_processed );
3388 regular_clock_call();
3389 }
3390 } else {
3391 while (!has_aborted() &&
3392 satb_mq_set.apply_closure_to_completed_buffer()) {
3393 if (_cm->verbose_medium())
3394 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3395 statsOnly( ++_satb_buffers_processed );
3396 regular_clock_call();
3397 }
3398 }
3400 if (!concurrent() && !has_aborted()) {
3401 // We should only do this during remark.
3402 if (ParallelGCThreads > 0)
3403 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3404 else
3405 satb_mq_set.iterate_closure_all_threads();
3406 }
3408 _draining_satb_buffers = false;
3410 assert(has_aborted() ||
3411 concurrent() ||
3412 satb_mq_set.completed_buffers_num() == 0, "invariant");
3414 if (ParallelGCThreads > 0)
3415 satb_mq_set.set_par_closure(_task_id, NULL);
3416 else
3417 satb_mq_set.set_closure(NULL);
3419 // again, this was a potentially expensive operation, decrease the
3420 // limits to get the regular clock call early
3421 decrease_limits();
3422 }
3424 void CMTask::drain_region_stack(BitMapClosure* bc) {
3425 if (has_aborted())
3426 return;
3428 assert(_region_finger == NULL,
3429 "it should be NULL when we're not scanning a region");
3431 if (!_cm->region_stack_empty()) {
3432 if (_cm->verbose_low())
3433 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3434 _task_id, _cm->region_stack_size());
3436 MemRegion mr = _cm->region_stack_pop_with_lock();
3437 // it returns MemRegion() if the pop fails
3438 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3440 while (mr.start() != NULL) {
3441 if (_cm->verbose_medium())
3442 gclog_or_tty->print_cr("[%d] we are scanning region "
3443 "["PTR_FORMAT", "PTR_FORMAT")",
3444 _task_id, mr.start(), mr.end());
3445 assert(mr.end() <= _cm->finger(),
3446 "otherwise the region shouldn't be on the stack");
3447 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3448 if (_nextMarkBitMap->iterate(bc, mr)) {
3449 assert(!has_aborted(),
3450 "cannot abort the task without aborting the bitmap iteration");
3452 // We finished iterating over the region without aborting.
3453 regular_clock_call();
3454 if (has_aborted())
3455 mr = MemRegion();
3456 else {
3457 mr = _cm->region_stack_pop_with_lock();
3458 // it returns MemRegion() if the pop fails
3459 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3460 }
3461 } else {
3462 assert(has_aborted(), "currently the only way to do so");
3464 // The only way to abort the bitmap iteration is to return
3465 // false from the do_bit() method. However, inside the
3466 // do_bit() method we move the _region_finger to point to the
3467 // object currently being looked at. So, if we bail out, we
3468 // have definitely set _region_finger to something non-null.
3469 assert(_region_finger != NULL, "invariant");
3471 // The iteration was actually aborted. So now _region_finger
3472 // points to the address of the object we last scanned. If we
3473 // leave it there, when we restart this task, we will rescan
3474 // the object. It is easy to avoid this. We move the finger by
3475 // enough to point to the next possible object header (the
3476 // bitmap knows by how much we need to move it as it knows its
3477 // granularity).
3478 MemRegion newRegion =
3479 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3481 if (!newRegion.is_empty()) {
3482 if (_cm->verbose_low()) {
3483 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3484 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3485 _task_id,
3486 newRegion.start(), newRegion.end());
3487 }
3488 // Now push the part of the region we didn't scan on the
3489 // region stack to make sure a task scans it later.
3490 _cm->region_stack_push_with_lock(newRegion);
3491 }
3492 // break from while
3493 mr = MemRegion();
3494 }
3495 _region_finger = NULL;
3496 }
3498 if (_cm->verbose_low())
3499 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3500 _task_id, _cm->region_stack_size());
3501 }
3502 }
3504 void CMTask::print_stats() {
3505 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3506 _task_id, _calls);
3507 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3508 _elapsed_time_ms, _termination_time_ms);
3509 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3510 _step_times_ms.num(), _step_times_ms.avg(),
3511 _step_times_ms.sd());
3512 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3513 _step_times_ms.maximum(), _step_times_ms.sum());
3515 #if _MARKING_STATS_
3516 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3517 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3518 _all_clock_intervals_ms.sd());
3519 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3520 _all_clock_intervals_ms.maximum(),
3521 _all_clock_intervals_ms.sum());
3522 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3523 _clock_due_to_scanning, _clock_due_to_marking);
3524 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3525 _objs_scanned, _objs_found_on_bitmap);
3526 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3527 _local_pushes, _local_pops, _local_max_size);
3528 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3529 _global_pushes, _global_pops, _global_max_size);
3530 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3531 _global_transfers_to,_global_transfers_from);
3532 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3533 _regions_claimed, _region_stack_pops);
3534 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3535 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3536 _steal_attempts, _steals);
3537 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3538 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3539 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3540 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3541 _aborted_timed_out, _aborted_satb, _aborted_termination);
3542 #endif // _MARKING_STATS_
3543 }
3545 /*****************************************************************************
3547 The do_marking_step(time_target_ms) method is the building block
3548 of the parallel marking framework. It can be called in parallel
3549 with other invocations of do_marking_step() on different tasks
3550 (but only one per task, obviously) and concurrently with the
3551 mutator threads, or during remark, hence it eliminates the need
3552 for two versions of the code. When called during remark, it will
3553 pick up from where the task left off during the concurrent marking
3554 phase. Interestingly, tasks are also claimable during evacuation
3555 pauses too, since do_marking_step() ensures that it aborts before
3556 it needs to yield.
3558 The data structures that is uses to do marking work are the
3559 following:
3561 (1) Marking Bitmap. If there are gray objects that appear only
3562 on the bitmap (this happens either when dealing with an overflow
3563 or when the initial marking phase has simply marked the roots
3564 and didn't push them on the stack), then tasks claim heap
3565 regions whose bitmap they then scan to find gray objects. A
3566 global finger indicates where the end of the last claimed region
3567 is. A local finger indicates how far into the region a task has
3568 scanned. The two fingers are used to determine how to gray an
3569 object (i.e. whether simply marking it is OK, as it will be
3570 visited by a task in the future, or whether it needs to be also
3571 pushed on a stack).
3573 (2) Local Queue. The local queue of the task which is accessed
3574 reasonably efficiently by the task. Other tasks can steal from
3575 it when they run out of work. Throughout the marking phase, a
3576 task attempts to keep its local queue short but not totally
3577 empty, so that entries are available for stealing by other
3578 tasks. Only when there is no more work, a task will totally
3579 drain its local queue.
3581 (3) Global Mark Stack. This handles local queue overflow. During
3582 marking only sets of entries are moved between it and the local
3583 queues, as access to it requires a mutex and more fine-grain
3584 interaction with it which might cause contention. If it
3585 overflows, then the marking phase should restart and iterate
3586 over the bitmap to identify gray objects. Throughout the marking
3587 phase, tasks attempt to keep the global mark stack at a small
3588 length but not totally empty, so that entries are available for
3589 popping by other tasks. Only when there is no more work, tasks
3590 will totally drain the global mark stack.
3592 (4) Global Region Stack. Entries on it correspond to areas of
3593 the bitmap that need to be scanned since they contain gray
3594 objects. Pushes on the region stack only happen during
3595 evacuation pauses and typically correspond to areas covered by
3596 GC LABS. If it overflows, then the marking phase should restart
3597 and iterate over the bitmap to identify gray objects. Tasks will
3598 try to totally drain the region stack as soon as possible.
3600 (5) SATB Buffer Queue. This is where completed SATB buffers are
3601 made available. Buffers are regularly removed from this queue
3602 and scanned for roots, so that the queue doesn't get too
3603 long. During remark, all completed buffers are processed, as
3604 well as the filled in parts of any uncompleted buffers.
3606 The do_marking_step() method tries to abort when the time target
3607 has been reached. There are a few other cases when the
3608 do_marking_step() method also aborts:
3610 (1) When the marking phase has been aborted (after a Full GC).
3612 (2) When a global overflow (either on the global stack or the
3613 region stack) has been triggered. Before the task aborts, it
3614 will actually sync up with the other tasks to ensure that all
3615 the marking data structures (local queues, stacks, fingers etc.)
3616 are re-initialised so that when do_marking_step() completes,
3617 the marking phase can immediately restart.
3619 (3) When enough completed SATB buffers are available. The
3620 do_marking_step() method only tries to drain SATB buffers right
3621 at the beginning. So, if enough buffers are available, the
3622 marking step aborts and the SATB buffers are processed at
3623 the beginning of the next invocation.
3625 (4) To yield. when we have to yield then we abort and yield
3626 right at the end of do_marking_step(). This saves us from a lot
3627 of hassle as, by yielding we might allow a Full GC. If this
3628 happens then objects will be compacted underneath our feet, the
3629 heap might shrink, etc. We save checking for this by just
3630 aborting and doing the yield right at the end.
3632 From the above it follows that the do_marking_step() method should
3633 be called in a loop (or, otherwise, regularly) until it completes.
3635 If a marking step completes without its has_aborted() flag being
3636 true, it means it has completed the current marking phase (and
3637 also all other marking tasks have done so and have all synced up).
3639 A method called regular_clock_call() is invoked "regularly" (in
3640 sub ms intervals) throughout marking. It is this clock method that
3641 checks all the abort conditions which were mentioned above and
3642 decides when the task should abort. A work-based scheme is used to
3643 trigger this clock method: when the number of object words the
3644 marking phase has scanned or the number of references the marking
3645 phase has visited reach a given limit. Additional invocations to
3646 the method clock have been planted in a few other strategic places
3647 too. The initial reason for the clock method was to avoid calling
3648 vtime too regularly, as it is quite expensive. So, once it was in
3649 place, it was natural to piggy-back all the other conditions on it
3650 too and not constantly check them throughout the code.
3652 *****************************************************************************/
3654 void CMTask::do_marking_step(double time_target_ms) {
3655 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3656 assert(concurrent() == _cm->concurrent(), "they should be the same");
3658 assert(concurrent() || _cm->region_stack_empty(),
3659 "the region stack should have been cleared before remark");
3660 assert(_region_finger == NULL,
3661 "this should be non-null only when a region is being scanned");
3663 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3664 assert(_task_queues != NULL, "invariant");
3665 assert(_task_queue != NULL, "invariant");
3666 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3668 assert(!_claimed,
3669 "only one thread should claim this task at any one time");
3671 // OK, this doesn't safeguard again all possible scenarios, as it is
3672 // possible for two threads to set the _claimed flag at the same
3673 // time. But it is only for debugging purposes anyway and it will
3674 // catch most problems.
3675 _claimed = true;
3677 _start_time_ms = os::elapsedVTime() * 1000.0;
3678 statsOnly( _interval_start_time_ms = _start_time_ms );
3680 double diff_prediction_ms =
3681 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3682 _time_target_ms = time_target_ms - diff_prediction_ms;
3684 // set up the variables that are used in the work-based scheme to
3685 // call the regular clock method
3686 _words_scanned = 0;
3687 _refs_reached = 0;
3688 recalculate_limits();
3690 // clear all flags
3691 clear_has_aborted();
3692 _has_aborted_timed_out = false;
3693 _draining_satb_buffers = false;
3695 ++_calls;
3697 if (_cm->verbose_low())
3698 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3699 "target = %1.2lfms >>>>>>>>>>",
3700 _task_id, _calls, _time_target_ms);
3702 // Set up the bitmap and oop closures. Anything that uses them is
3703 // eventually called from this method, so it is OK to allocate these
3704 // statically.
3705 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3706 CMOopClosure oop_closure(_g1h, _cm, this);
3707 set_oop_closure(&oop_closure);
3709 if (_cm->has_overflown()) {
3710 // This can happen if the region stack or the mark stack overflows
3711 // during a GC pause and this task, after a yield point,
3712 // restarts. We have to abort as we need to get into the overflow
3713 // protocol which happens right at the end of this task.
3714 set_has_aborted();
3715 }
3717 // First drain any available SATB buffers. After this, we will not
3718 // look at SATB buffers before the next invocation of this method.
3719 // If enough completed SATB buffers are queued up, the regular clock
3720 // will abort this task so that it restarts.
3721 drain_satb_buffers();
3722 // ...then partially drain the local queue and the global stack
3723 drain_local_queue(true);
3724 drain_global_stack(true);
3726 // Then totally drain the region stack. We will not look at
3727 // it again before the next invocation of this method. Entries on
3728 // the region stack are only added during evacuation pauses, for
3729 // which we have to yield. When we do, we abort the task anyway so
3730 // it will look at the region stack again when it restarts.
3731 bitmap_closure.set_scanning_heap_region(false);
3732 drain_region_stack(&bitmap_closure);
3733 // ...then partially drain the local queue and the global stack
3734 drain_local_queue(true);
3735 drain_global_stack(true);
3737 do {
3738 if (!has_aborted() && _curr_region != NULL) {
3739 // This means that we're already holding on to a region.
3740 assert(_finger != NULL, "if region is not NULL, then the finger "
3741 "should not be NULL either");
3743 // We might have restarted this task after an evacuation pause
3744 // which might have evacuated the region we're holding on to
3745 // underneath our feet. Let's read its limit again to make sure
3746 // that we do not iterate over a region of the heap that
3747 // contains garbage (update_region_limit() will also move
3748 // _finger to the start of the region if it is found empty).
3749 update_region_limit();
3750 // We will start from _finger not from the start of the region,
3751 // as we might be restarting this task after aborting half-way
3752 // through scanning this region. In this case, _finger points to
3753 // the address where we last found a marked object. If this is a
3754 // fresh region, _finger points to start().
3755 MemRegion mr = MemRegion(_finger, _region_limit);
3757 if (_cm->verbose_low())
3758 gclog_or_tty->print_cr("[%d] we're scanning part "
3759 "["PTR_FORMAT", "PTR_FORMAT") "
3760 "of region "PTR_FORMAT,
3761 _task_id, _finger, _region_limit, _curr_region);
3763 // Let's iterate over the bitmap of the part of the
3764 // region that is left.
3765 bitmap_closure.set_scanning_heap_region(true);
3766 if (mr.is_empty() ||
3767 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3768 // We successfully completed iterating over the region. Now,
3769 // let's give up the region.
3770 giveup_current_region();
3771 regular_clock_call();
3772 } else {
3773 assert(has_aborted(), "currently the only way to do so");
3774 // The only way to abort the bitmap iteration is to return
3775 // false from the do_bit() method. However, inside the
3776 // do_bit() method we move the _finger to point to the
3777 // object currently being looked at. So, if we bail out, we
3778 // have definitely set _finger to something non-null.
3779 assert(_finger != NULL, "invariant");
3781 // Region iteration was actually aborted. So now _finger
3782 // points to the address of the object we last scanned. If we
3783 // leave it there, when we restart this task, we will rescan
3784 // the object. It is easy to avoid this. We move the finger by
3785 // enough to point to the next possible object header (the
3786 // bitmap knows by how much we need to move it as it knows its
3787 // granularity).
3788 assert(_finger < _region_limit, "invariant");
3789 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3790 // Check if bitmap iteration was aborted while scanning the last object
3791 if (new_finger >= _region_limit) {
3792 giveup_current_region();
3793 } else {
3794 move_finger_to(new_finger);
3795 }
3796 }
3797 }
3798 // At this point we have either completed iterating over the
3799 // region we were holding on to, or we have aborted.
3801 // We then partially drain the local queue and the global stack.
3802 // (Do we really need this?)
3803 drain_local_queue(true);
3804 drain_global_stack(true);
3806 // Read the note on the claim_region() method on why it might
3807 // return NULL with potentially more regions available for
3808 // claiming and why we have to check out_of_regions() to determine
3809 // whether we're done or not.
3810 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3811 // We are going to try to claim a new region. We should have
3812 // given up on the previous one.
3813 // Separated the asserts so that we know which one fires.
3814 assert(_curr_region == NULL, "invariant");
3815 assert(_finger == NULL, "invariant");
3816 assert(_region_limit == NULL, "invariant");
3817 if (_cm->verbose_low())
3818 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3819 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3820 if (claimed_region != NULL) {
3821 // Yes, we managed to claim one
3822 statsOnly( ++_regions_claimed );
3824 if (_cm->verbose_low())
3825 gclog_or_tty->print_cr("[%d] we successfully claimed "
3826 "region "PTR_FORMAT,
3827 _task_id, claimed_region);
3829 setup_for_region(claimed_region);
3830 assert(_curr_region == claimed_region, "invariant");
3831 }
3832 // It is important to call the regular clock here. It might take
3833 // a while to claim a region if, for example, we hit a large
3834 // block of empty regions. So we need to call the regular clock
3835 // method once round the loop to make sure it's called
3836 // frequently enough.
3837 regular_clock_call();
3838 }
3840 if (!has_aborted() && _curr_region == NULL) {
3841 assert(_cm->out_of_regions(),
3842 "at this point we should be out of regions");
3843 }
3844 } while ( _curr_region != NULL && !has_aborted());
3846 if (!has_aborted()) {
3847 // We cannot check whether the global stack is empty, since other
3848 // tasks might be pushing objects to it concurrently. We also cannot
3849 // check if the region stack is empty because if a thread is aborting
3850 // it can push a partially done region back.
3851 assert(_cm->out_of_regions(),
3852 "at this point we should be out of regions");
3854 if (_cm->verbose_low())
3855 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3857 // Try to reduce the number of available SATB buffers so that
3858 // remark has less work to do.
3859 drain_satb_buffers();
3860 }
3862 // Since we've done everything else, we can now totally drain the
3863 // local queue and global stack.
3864 drain_local_queue(false);
3865 drain_global_stack(false);
3867 // Attempt at work stealing from other task's queues.
3868 if (!has_aborted()) {
3869 // We have not aborted. This means that we have finished all that
3870 // we could. Let's try to do some stealing...
3872 // We cannot check whether the global stack is empty, since other
3873 // tasks might be pushing objects to it concurrently. We also cannot
3874 // check if the region stack is empty because if a thread is aborting
3875 // it can push a partially done region back.
3876 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3877 "only way to reach here");
3879 if (_cm->verbose_low())
3880 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3882 while (!has_aborted()) {
3883 oop obj;
3884 statsOnly( ++_steal_attempts );
3886 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3887 if (_cm->verbose_medium())
3888 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3889 _task_id, (void*) obj);
3891 statsOnly( ++_steals );
3893 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3894 "any stolen object should be marked");
3895 scan_object(obj);
3897 // And since we're towards the end, let's totally drain the
3898 // local queue and global stack.
3899 drain_local_queue(false);
3900 drain_global_stack(false);
3901 } else {
3902 break;
3903 }
3904 }
3905 }
3907 // We still haven't aborted. Now, let's try to get into the
3908 // termination protocol.
3909 if (!has_aborted()) {
3910 // We cannot check whether the global stack is empty, since other
3911 // tasks might be concurrently pushing objects on it. We also cannot
3912 // check if the region stack is empty because if a thread is aborting
3913 // it can push a partially done region back.
3914 // Separated the asserts so that we know which one fires.
3915 assert(_cm->out_of_regions(), "only way to reach here");
3916 assert(_task_queue->size() == 0, "only way to reach here");
3918 if (_cm->verbose_low())
3919 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3921 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3922 // The CMTask class also extends the TerminatorTerminator class,
3923 // hence its should_exit_termination() method will also decide
3924 // whether to exit the termination protocol or not.
3925 bool finished = _cm->terminator()->offer_termination(this);
3926 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3927 _termination_time_ms +=
3928 termination_end_time_ms - _termination_start_time_ms;
3930 if (finished) {
3931 // We're all done.
3933 if (_task_id == 0) {
3934 // let's allow task 0 to do this
3935 if (concurrent()) {
3936 assert(_cm->concurrent_marking_in_progress(), "invariant");
3937 // we need to set this to false before the next
3938 // safepoint. This way we ensure that the marking phase
3939 // doesn't observe any more heap expansions.
3940 _cm->clear_concurrent_marking_in_progress();
3941 }
3942 }
3944 // We can now guarantee that the global stack is empty, since
3945 // all other tasks have finished. We separated the guarantees so
3946 // that, if a condition is false, we can immediately find out
3947 // which one.
3948 guarantee(_cm->out_of_regions(), "only way to reach here");
3949 guarantee(_cm->region_stack_empty(), "only way to reach here");
3950 guarantee(_cm->mark_stack_empty(), "only way to reach here");
3951 guarantee(_task_queue->size() == 0, "only way to reach here");
3952 guarantee(!_cm->has_overflown(), "only way to reach here");
3953 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3954 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3956 if (_cm->verbose_low())
3957 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3958 } else {
3959 // Apparently there's more work to do. Let's abort this task. It
3960 // will restart it and we can hopefully find more things to do.
3962 if (_cm->verbose_low())
3963 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3965 set_has_aborted();
3966 statsOnly( ++_aborted_termination );
3967 }
3968 }
3970 // Mainly for debugging purposes to make sure that a pointer to the
3971 // closure which was statically allocated in this frame doesn't
3972 // escape it by accident.
3973 set_oop_closure(NULL);
3974 double end_time_ms = os::elapsedVTime() * 1000.0;
3975 double elapsed_time_ms = end_time_ms - _start_time_ms;
3976 // Update the step history.
3977 _step_times_ms.add(elapsed_time_ms);
3979 if (has_aborted()) {
3980 // The task was aborted for some reason.
3982 statsOnly( ++_aborted );
3984 if (_has_aborted_timed_out) {
3985 double diff_ms = elapsed_time_ms - _time_target_ms;
3986 // Keep statistics of how well we did with respect to hitting
3987 // our target only if we actually timed out (if we aborted for
3988 // other reasons, then the results might get skewed).
3989 _marking_step_diffs_ms.add(diff_ms);
3990 }
3992 if (_cm->has_overflown()) {
3993 // This is the interesting one. We aborted because a global
3994 // overflow was raised. This means we have to restart the
3995 // marking phase and start iterating over regions. However, in
3996 // order to do this we have to make sure that all tasks stop
3997 // what they are doing and re-initialise in a safe manner. We
3998 // will achieve this with the use of two barrier sync points.
4000 if (_cm->verbose_low())
4001 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4003 _cm->enter_first_sync_barrier(_task_id);
4004 // When we exit this sync barrier we know that all tasks have
4005 // stopped doing marking work. So, it's now safe to
4006 // re-initialise our data structures. At the end of this method,
4007 // task 0 will clear the global data structures.
4009 statsOnly( ++_aborted_overflow );
4011 // We clear the local state of this task...
4012 clear_region_fields();
4014 // ...and enter the second barrier.
4015 _cm->enter_second_sync_barrier(_task_id);
4016 // At this point everything has bee re-initialised and we're
4017 // ready to restart.
4018 }
4020 if (_cm->verbose_low()) {
4021 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4022 "elapsed = %1.2lfms <<<<<<<<<<",
4023 _task_id, _time_target_ms, elapsed_time_ms);
4024 if (_cm->has_aborted())
4025 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4026 _task_id);
4027 }
4028 } else {
4029 if (_cm->verbose_low())
4030 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4031 "elapsed = %1.2lfms <<<<<<<<<<",
4032 _task_id, _time_target_ms, elapsed_time_ms);
4033 }
4035 _claimed = false;
4036 }
4038 CMTask::CMTask(int task_id,
4039 ConcurrentMark* cm,
4040 CMTaskQueue* task_queue,
4041 CMTaskQueueSet* task_queues)
4042 : _g1h(G1CollectedHeap::heap()),
4043 _task_id(task_id), _cm(cm),
4044 _claimed(false),
4045 _nextMarkBitMap(NULL), _hash_seed(17),
4046 _task_queue(task_queue),
4047 _task_queues(task_queues),
4048 _oop_closure(NULL) {
4049 guarantee(task_queue != NULL, "invariant");
4050 guarantee(task_queues != NULL, "invariant");
4052 statsOnly( _clock_due_to_scanning = 0;
4053 _clock_due_to_marking = 0 );
4055 _marking_step_diffs_ms.add(0.5);
4056 }