Thu, 22 Apr 2010 10:02:38 -0700
6819061: G1: eliminate serial Other times that are proportional to the collection set length
6871109: G1: remove the concept of the scan only prefix
Summary: Removed scan only regions and associated code. The young portion of the collection set is now constructed incrementally - when a young region is retired as the current allocation region it is added to the collection set.
Reviewed-by: apetrusenko, iveresov, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 // Currently we do not call this at all. Normally we would call it
301 // during the concurrent marking / remark phases but we now call
302 // the lock-based version instead. But we might want to resurrect this
303 // code in the future. So, we'll leave it here commented out.
304 #if 0
305 MemRegion CMRegionStack::pop() {
306 while (true) {
307 // Otherwise...
308 jint index = _index;
310 if (index == 0) {
311 return MemRegion();
312 }
313 jint next_index = index-1;
314 jint res = Atomic::cmpxchg(next_index, &_index, index);
315 if (res == index) {
316 MemRegion mr = _base[next_index];
317 if (mr.start() != NULL) {
318 assert(mr.end() != NULL, "invariant");
319 assert(mr.word_size() > 0, "invariant");
320 return mr;
321 } else {
322 // that entry was invalidated... let's skip it
323 assert(mr.end() == NULL, "invariant");
324 }
325 }
326 // Otherwise, we need to try again.
327 }
328 }
329 #endif // 0
331 void CMRegionStack::push_with_lock(MemRegion mr) {
332 assert(mr.word_size() > 0, "Precondition");
333 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
335 if (isFull()) {
336 _overflow = true;
337 return;
338 }
340 _base[_index] = mr;
341 _index += 1;
342 }
344 MemRegion CMRegionStack::pop_with_lock() {
345 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
347 while (true) {
348 if (_index == 0) {
349 return MemRegion();
350 }
351 _index -= 1;
353 MemRegion mr = _base[_index];
354 if (mr.start() != NULL) {
355 assert(mr.end() != NULL, "invariant");
356 assert(mr.word_size() > 0, "invariant");
357 return mr;
358 } else {
359 // that entry was invalidated... let's skip it
360 assert(mr.end() == NULL, "invariant");
361 }
362 }
363 }
365 bool CMRegionStack::invalidate_entries_into_cset() {
366 bool result = false;
367 G1CollectedHeap* g1h = G1CollectedHeap::heap();
368 for (int i = 0; i < _oops_do_bound; ++i) {
369 MemRegion mr = _base[i];
370 if (mr.start() != NULL) {
371 assert(mr.end() != NULL, "invariant");
372 assert(mr.word_size() > 0, "invariant");
373 HeapRegion* hr = g1h->heap_region_containing(mr.start());
374 assert(hr != NULL, "invariant");
375 if (hr->in_collection_set()) {
376 // The region points into the collection set
377 _base[i] = MemRegion();
378 result = true;
379 }
380 } else {
381 // that entry was invalidated... let's skip it
382 assert(mr.end() == NULL, "invariant");
383 }
384 }
385 return result;
386 }
388 template<class OopClosureClass>
389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
390 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
391 || SafepointSynchronize::is_at_safepoint(),
392 "Drain recursion must be yield-safe.");
393 bool res = true;
394 debug_only(_drain_in_progress = true);
395 debug_only(_drain_in_progress_yields = yield_after);
396 while (!isEmpty()) {
397 oop newOop = pop();
398 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
399 assert(newOop->is_oop(), "Expected an oop");
400 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
401 "only grey objects on this stack");
402 // iterate over the oops in this oop, marking and pushing
403 // the ones in CMS generation.
404 newOop->oop_iterate(cl);
405 if (yield_after && _cm->do_yield_check()) {
406 res = false; break;
407 }
408 }
409 debug_only(_drain_in_progress = false);
410 return res;
411 }
413 void CMMarkStack::oops_do(OopClosure* f) {
414 if (_index == 0) return;
415 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
416 "Bound must be set.");
417 for (int i = 0; i < _oops_do_bound; i++) {
418 f->do_oop(&_base[i]);
419 }
420 _oops_do_bound = -1;
421 }
423 bool ConcurrentMark::not_yet_marked(oop obj) const {
424 return (_g1h->is_obj_ill(obj)
425 || (_g1h->is_in_permanent(obj)
426 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
427 }
429 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
430 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
431 #endif // _MSC_VER
433 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
434 int max_regions) :
435 _markBitMap1(rs, MinObjAlignment - 1),
436 _markBitMap2(rs, MinObjAlignment - 1),
438 _parallel_marking_threads(0),
439 _sleep_factor(0.0),
440 _marking_task_overhead(1.0),
441 _cleanup_sleep_factor(0.0),
442 _cleanup_task_overhead(1.0),
443 _region_bm(max_regions, false /* in_resource_area*/),
444 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
445 CardTableModRefBS::card_shift,
446 false /* in_resource_area*/),
447 _prevMarkBitMap(&_markBitMap1),
448 _nextMarkBitMap(&_markBitMap2),
449 _at_least_one_mark_complete(false),
451 _markStack(this),
452 _regionStack(),
453 // _finger set in set_non_marking_state
455 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
456 // _active_tasks set in set_non_marking_state
457 // _tasks set inside the constructor
458 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
459 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
461 _has_overflown(false),
462 _concurrent(false),
463 _has_aborted(false),
464 _restart_for_overflow(false),
465 _concurrent_marking_in_progress(false),
466 _should_gray_objects(false),
468 // _verbose_level set below
470 _init_times(),
471 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
472 _cleanup_times(),
473 _total_counting_time(0.0),
474 _total_rs_scrub_time(0.0),
476 _parallel_workers(NULL)
477 {
478 CMVerboseLevel verbose_level =
479 (CMVerboseLevel) G1MarkingVerboseLevel;
480 if (verbose_level < no_verbose)
481 verbose_level = no_verbose;
482 if (verbose_level > high_verbose)
483 verbose_level = high_verbose;
484 _verbose_level = verbose_level;
486 if (verbose_low())
487 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
488 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
490 _markStack.allocate(MarkStackSize);
491 _regionStack.allocate(G1MarkRegionStackSize);
493 // Create & start a ConcurrentMark thread.
494 _cmThread = new ConcurrentMarkThread(this);
495 assert(cmThread() != NULL, "CM Thread should have been created");
496 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
498 _g1h = G1CollectedHeap::heap();
499 assert(CGC_lock != NULL, "Where's the CGC_lock?");
500 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
501 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
503 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
504 satb_qs.set_buffer_size(G1SATBBufferSize);
506 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
507 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
508 for (int i = 0 ; i < size; i++) {
509 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
510 }
512 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
513 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
515 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
516 _active_tasks = _max_task_num;
517 for (int i = 0; i < (int) _max_task_num; ++i) {
518 CMTaskQueue* task_queue = new CMTaskQueue();
519 task_queue->initialize();
520 _task_queues->register_queue(i, task_queue);
522 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
523 _accum_task_vtime[i] = 0.0;
524 }
526 if (ConcGCThreads > ParallelGCThreads) {
527 vm_exit_during_initialization("Can't have more ConcGCThreads "
528 "than ParallelGCThreads.");
529 }
530 if (ParallelGCThreads == 0) {
531 // if we are not running with any parallel GC threads we will not
532 // spawn any marking threads either
533 _parallel_marking_threads = 0;
534 _sleep_factor = 0.0;
535 _marking_task_overhead = 1.0;
536 } else {
537 if (ConcGCThreads > 0) {
538 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
539 // if both are set
541 _parallel_marking_threads = ConcGCThreads;
542 _sleep_factor = 0.0;
543 _marking_task_overhead = 1.0;
544 } else if (G1MarkingOverheadPercent > 0) {
545 // we will calculate the number of parallel marking threads
546 // based on a target overhead with respect to the soft real-time
547 // goal
549 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
550 double overall_cm_overhead =
551 (double) MaxGCPauseMillis * marking_overhead /
552 (double) GCPauseIntervalMillis;
553 double cpu_ratio = 1.0 / (double) os::processor_count();
554 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
555 double marking_task_overhead =
556 overall_cm_overhead / marking_thread_num *
557 (double) os::processor_count();
558 double sleep_factor =
559 (1.0 - marking_task_overhead) / marking_task_overhead;
561 _parallel_marking_threads = (size_t) marking_thread_num;
562 _sleep_factor = sleep_factor;
563 _marking_task_overhead = marking_task_overhead;
564 } else {
565 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
566 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0;
568 }
570 if (parallel_marking_threads() > 1)
571 _cleanup_task_overhead = 1.0;
572 else
573 _cleanup_task_overhead = marking_task_overhead();
574 _cleanup_sleep_factor =
575 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
577 #if 0
578 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
579 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
580 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
581 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
582 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
583 #endif
585 guarantee(parallel_marking_threads() > 0, "peace of mind");
586 _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
587 (int) parallel_marking_threads(), false, true);
588 if (_parallel_workers == NULL)
589 vm_exit_during_initialization("Failed necessary allocation.");
590 }
592 // so that the call below can read a sensible value
593 _heap_start = (HeapWord*) rs.base();
594 set_non_marking_state();
595 }
597 void ConcurrentMark::update_g1_committed(bool force) {
598 // If concurrent marking is not in progress, then we do not need to
599 // update _heap_end. This has a subtle and important
600 // side-effect. Imagine that two evacuation pauses happen between
601 // marking completion and remark. The first one can grow the
602 // heap (hence now the finger is below the heap end). Then, the
603 // second one could unnecessarily push regions on the region
604 // stack. This causes the invariant that the region stack is empty
605 // at the beginning of remark to be false. By ensuring that we do
606 // not observe heap expansions after marking is complete, then we do
607 // not have this problem.
608 if (!concurrent_marking_in_progress() && !force)
609 return;
611 MemRegion committed = _g1h->g1_committed();
612 assert(committed.start() == _heap_start, "start shouldn't change");
613 HeapWord* new_end = committed.end();
614 if (new_end > _heap_end) {
615 // The heap has been expanded.
617 _heap_end = new_end;
618 }
619 // Notice that the heap can also shrink. However, this only happens
620 // during a Full GC (at least currently) and the entire marking
621 // phase will bail out and the task will not be restarted. So, let's
622 // do nothing.
623 }
625 void ConcurrentMark::reset() {
626 // Starting values for these two. This should be called in a STW
627 // phase. CM will be notified of any future g1_committed expansions
628 // will be at the end of evacuation pauses, when tasks are
629 // inactive.
630 MemRegion committed = _g1h->g1_committed();
631 _heap_start = committed.start();
632 _heap_end = committed.end();
634 // Separated the asserts so that we know which one fires.
635 assert(_heap_start != NULL, "heap bounds should look ok");
636 assert(_heap_end != NULL, "heap bounds should look ok");
637 assert(_heap_start < _heap_end, "heap bounds should look ok");
639 // reset all the marking data structures and any necessary flags
640 clear_marking_state();
642 if (verbose_low())
643 gclog_or_tty->print_cr("[global] resetting");
645 // We do reset all of them, since different phases will use
646 // different number of active threads. So, it's easiest to have all
647 // of them ready.
648 for (int i = 0; i < (int) _max_task_num; ++i)
649 _tasks[i]->reset(_nextMarkBitMap);
651 // we need this to make sure that the flag is on during the evac
652 // pause with initial mark piggy-backed
653 set_concurrent_marking_in_progress();
654 }
656 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
657 assert(active_tasks <= _max_task_num, "we should not have more");
659 _active_tasks = active_tasks;
660 // Need to update the three data structures below according to the
661 // number of active threads for this phase.
662 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
663 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
664 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
666 _concurrent = concurrent;
667 // We propagate this to all tasks, not just the active ones.
668 for (int i = 0; i < (int) _max_task_num; ++i)
669 _tasks[i]->set_concurrent(concurrent);
671 if (concurrent) {
672 set_concurrent_marking_in_progress();
673 } else {
674 // We currently assume that the concurrent flag has been set to
675 // false before we start remark. At this point we should also be
676 // in a STW phase.
677 assert(!concurrent_marking_in_progress(), "invariant");
678 assert(_finger == _heap_end, "only way to get here");
679 update_g1_committed(true);
680 }
681 }
683 void ConcurrentMark::set_non_marking_state() {
684 // We set the global marking state to some default values when we're
685 // not doing marking.
686 clear_marking_state();
687 _active_tasks = 0;
688 clear_concurrent_marking_in_progress();
689 }
691 ConcurrentMark::~ConcurrentMark() {
692 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
693 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
694 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
695 _par_cleanup_thread_state);
697 for (int i = 0; i < (int) _max_task_num; ++i) {
698 delete _task_queues->queue(i);
699 delete _tasks[i];
700 }
701 delete _task_queues;
702 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
703 }
705 // This closure is used to mark refs into the g1 generation
706 // from external roots in the CMS bit map.
707 // Called at the first checkpoint.
708 //
710 void ConcurrentMark::clearNextBitmap() {
711 G1CollectedHeap* g1h = G1CollectedHeap::heap();
712 G1CollectorPolicy* g1p = g1h->g1_policy();
714 // Make sure that the concurrent mark thread looks to still be in
715 // the current cycle.
716 guarantee(cmThread()->during_cycle(), "invariant");
718 // We are finishing up the current cycle by clearing the next
719 // marking bitmap and getting it ready for the next cycle. During
720 // this time no other cycle can start. So, let's make sure that this
721 // is the case.
722 guarantee(!g1h->mark_in_progress(), "invariant");
724 // clear the mark bitmap (no grey objects to start with).
725 // We need to do this in chunks and offer to yield in between
726 // each chunk.
727 HeapWord* start = _nextMarkBitMap->startWord();
728 HeapWord* end = _nextMarkBitMap->endWord();
729 HeapWord* cur = start;
730 size_t chunkSize = M;
731 while (cur < end) {
732 HeapWord* next = cur + chunkSize;
733 if (next > end)
734 next = end;
735 MemRegion mr(cur,next);
736 _nextMarkBitMap->clearRange(mr);
737 cur = next;
738 do_yield_check();
740 // Repeat the asserts from above. We'll do them as asserts here to
741 // minimize their overhead on the product. However, we'll have
742 // them as guarantees at the beginning / end of the bitmap
743 // clearing to get some checking in the product.
744 assert(cmThread()->during_cycle(), "invariant");
745 assert(!g1h->mark_in_progress(), "invariant");
746 }
748 // Repeat the asserts from above.
749 guarantee(cmThread()->during_cycle(), "invariant");
750 guarantee(!g1h->mark_in_progress(), "invariant");
751 }
753 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
754 public:
755 bool doHeapRegion(HeapRegion* r) {
756 if (!r->continuesHumongous()) {
757 r->note_start_of_marking(true);
758 }
759 return false;
760 }
761 };
763 void ConcurrentMark::checkpointRootsInitialPre() {
764 G1CollectedHeap* g1h = G1CollectedHeap::heap();
765 G1CollectorPolicy* g1p = g1h->g1_policy();
767 _has_aborted = false;
769 if (G1PrintReachableAtInitialMark) {
770 print_reachable("at-cycle-start",
771 true /* use_prev_marking */, true /* all */);
772 }
774 // Initialise marking structures. This has to be done in a STW phase.
775 reset();
776 }
778 class CMMarkRootsClosure: public OopsInGenClosure {
779 private:
780 ConcurrentMark* _cm;
781 G1CollectedHeap* _g1h;
782 bool _do_barrier;
784 public:
785 CMMarkRootsClosure(ConcurrentMark* cm,
786 G1CollectedHeap* g1h,
787 bool do_barrier) : _cm(cm), _g1h(g1h),
788 _do_barrier(do_barrier) { }
790 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
791 virtual void do_oop( oop* p) { do_oop_work(p); }
793 template <class T> void do_oop_work(T* p) {
794 T heap_oop = oopDesc::load_heap_oop(p);
795 if (!oopDesc::is_null(heap_oop)) {
796 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
797 assert(obj->is_oop() || obj->mark() == NULL,
798 "expected an oop, possibly with mark word displaced");
799 HeapWord* addr = (HeapWord*)obj;
800 if (_g1h->is_in_g1_reserved(addr)) {
801 _cm->grayRoot(obj);
802 }
803 }
804 if (_do_barrier) {
805 assert(!_g1h->is_in_g1_reserved(p),
806 "Should be called on external roots");
807 do_barrier(p);
808 }
809 }
810 };
812 void ConcurrentMark::checkpointRootsInitialPost() {
813 G1CollectedHeap* g1h = G1CollectedHeap::heap();
815 // For each region note start of marking.
816 NoteStartOfMarkHRClosure startcl;
817 g1h->heap_region_iterate(&startcl);
819 // Start weak-reference discovery.
820 ReferenceProcessor* rp = g1h->ref_processor();
821 rp->verify_no_references_recorded();
822 rp->enable_discovery(); // enable ("weak") refs discovery
823 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
825 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
826 // This is the start of the marking cycle, we're expected all
827 // threads to have SATB queues with active set to false.
828 satb_mq_set.set_active_all_threads(true, /* new active value */
829 false /* expected_active */);
831 // update_g1_committed() will be called at the end of an evac pause
832 // when marking is on. So, it's also called at the end of the
833 // initial-mark pause to update the heap end, if the heap expands
834 // during it. No need to call it here.
835 }
837 // Checkpoint the roots into this generation from outside
838 // this generation. [Note this initial checkpoint need only
839 // be approximate -- we'll do a catch up phase subsequently.]
840 void ConcurrentMark::checkpointRootsInitial() {
841 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
842 G1CollectedHeap* g1h = G1CollectedHeap::heap();
844 double start = os::elapsedTime();
846 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
847 g1p->record_concurrent_mark_init_start();
848 checkpointRootsInitialPre();
850 // YSR: when concurrent precleaning is in place, we'll
851 // need to clear the cached card table here
853 ResourceMark rm;
854 HandleMark hm;
856 g1h->ensure_parsability(false);
857 g1h->perm_gen()->save_marks();
859 CMMarkRootsClosure notOlder(this, g1h, false);
860 CMMarkRootsClosure older(this, g1h, true);
862 g1h->set_marking_started();
863 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
865 g1h->process_strong_roots(true, // activate StrongRootsScope
866 false, // fake perm gen collection
867 SharedHeap::SO_AllClasses,
868 ¬Older, // Regular roots
869 NULL, // do not visit active blobs
870 &older // Perm Gen Roots
871 );
872 checkpointRootsInitialPost();
874 // Statistics.
875 double end = os::elapsedTime();
876 _init_times.add((end - start) * 1000.0);
878 g1p->record_concurrent_mark_init_end();
879 }
881 /*
882 Notice that in the next two methods, we actually leave the STS
883 during the barrier sync and join it immediately afterwards. If we
884 do not do this, this then the following deadlock can occur: one
885 thread could be in the barrier sync code, waiting for the other
886 thread to also sync up, whereas another one could be trying to
887 yield, while also waiting for the other threads to sync up too.
889 Because the thread that does the sync barrier has left the STS, it
890 is possible to be suspended for a Full GC or an evacuation pause
891 could occur. This is actually safe, since the entering the sync
892 barrier is one of the last things do_marking_step() does, and it
893 doesn't manipulate any data structures afterwards.
894 */
896 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
897 if (verbose_low())
898 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
900 ConcurrentGCThread::stsLeave();
901 _first_overflow_barrier_sync.enter();
902 ConcurrentGCThread::stsJoin();
903 // at this point everyone should have synced up and not be doing any
904 // more work
906 if (verbose_low())
907 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
909 // let task 0 do this
910 if (task_num == 0) {
911 // task 0 is responsible for clearing the global data structures
912 clear_marking_state();
914 if (PrintGC) {
915 gclog_or_tty->date_stamp(PrintGCDateStamps);
916 gclog_or_tty->stamp(PrintGCTimeStamps);
917 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
918 }
919 }
921 // after this, each task should reset its own data structures then
922 // then go into the second barrier
923 }
925 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
926 if (verbose_low())
927 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
929 ConcurrentGCThread::stsLeave();
930 _second_overflow_barrier_sync.enter();
931 ConcurrentGCThread::stsJoin();
932 // at this point everything should be re-initialised and ready to go
934 if (verbose_low())
935 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
936 }
938 void ConcurrentMark::grayRoot(oop p) {
939 HeapWord* addr = (HeapWord*) p;
940 // We can't really check against _heap_start and _heap_end, since it
941 // is possible during an evacuation pause with piggy-backed
942 // initial-mark that the committed space is expanded during the
943 // pause without CM observing this change. So the assertions below
944 // is a bit conservative; but better than nothing.
945 assert(_g1h->g1_committed().contains(addr),
946 "address should be within the heap bounds");
948 if (!_nextMarkBitMap->isMarked(addr))
949 _nextMarkBitMap->parMark(addr);
950 }
952 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
953 // The objects on the region have already been marked "in bulk" by
954 // the caller. We only need to decide whether to push the region on
955 // the region stack or not.
957 if (!concurrent_marking_in_progress() || !_should_gray_objects)
958 // We're done with marking and waiting for remark. We do not need to
959 // push anything else on the region stack.
960 return;
962 HeapWord* finger = _finger;
964 if (verbose_low())
965 gclog_or_tty->print_cr("[global] attempting to push "
966 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
967 PTR_FORMAT, mr.start(), mr.end(), finger);
969 if (mr.start() < finger) {
970 // The finger is always heap region aligned and it is not possible
971 // for mr to span heap regions.
972 assert(mr.end() <= finger, "invariant");
974 // Separated the asserts so that we know which one fires.
975 assert(mr.start() <= mr.end(),
976 "region boundaries should fall within the committed space");
977 assert(_heap_start <= mr.start(),
978 "region boundaries should fall within the committed space");
979 assert(mr.end() <= _heap_end,
980 "region boundaries should fall within the committed space");
981 if (verbose_low())
982 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
983 "below the finger, pushing it",
984 mr.start(), mr.end());
986 if (!region_stack_push(mr)) {
987 if (verbose_low())
988 gclog_or_tty->print_cr("[global] region stack has overflown.");
989 }
990 }
991 }
993 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
994 // The object is not marked by the caller. We need to at least mark
995 // it and maybe push in on the stack.
997 HeapWord* addr = (HeapWord*)p;
998 if (!_nextMarkBitMap->isMarked(addr)) {
999 // We definitely need to mark it, irrespective whether we bail out
1000 // because we're done with marking.
1001 if (_nextMarkBitMap->parMark(addr)) {
1002 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1003 // If we're done with concurrent marking and we're waiting for
1004 // remark, then we're not pushing anything on the stack.
1005 return;
1007 // No OrderAccess:store_load() is needed. It is implicit in the
1008 // CAS done in parMark(addr) above
1009 HeapWord* finger = _finger;
1011 if (addr < finger) {
1012 if (!mark_stack_push(oop(addr))) {
1013 if (verbose_low())
1014 gclog_or_tty->print_cr("[global] global stack overflow "
1015 "during parMark");
1016 }
1017 }
1018 }
1019 }
1020 }
1022 class CMConcurrentMarkingTask: public AbstractGangTask {
1023 private:
1024 ConcurrentMark* _cm;
1025 ConcurrentMarkThread* _cmt;
1027 public:
1028 void work(int worker_i) {
1029 assert(Thread::current()->is_ConcurrentGC_thread(),
1030 "this should only be done by a conc GC thread");
1032 double start_vtime = os::elapsedVTime();
1034 ConcurrentGCThread::stsJoin();
1036 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1037 CMTask* the_task = _cm->task(worker_i);
1038 the_task->record_start_time();
1039 if (!_cm->has_aborted()) {
1040 do {
1041 double start_vtime_sec = os::elapsedVTime();
1042 double start_time_sec = os::elapsedTime();
1043 the_task->do_marking_step(10.0);
1044 double end_time_sec = os::elapsedTime();
1045 double end_vtime_sec = os::elapsedVTime();
1046 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1047 double elapsed_time_sec = end_time_sec - start_time_sec;
1048 _cm->clear_has_overflown();
1050 bool ret = _cm->do_yield_check(worker_i);
1052 jlong sleep_time_ms;
1053 if (!_cm->has_aborted() && the_task->has_aborted()) {
1054 sleep_time_ms =
1055 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1056 ConcurrentGCThread::stsLeave();
1057 os::sleep(Thread::current(), sleep_time_ms, false);
1058 ConcurrentGCThread::stsJoin();
1059 }
1060 double end_time2_sec = os::elapsedTime();
1061 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1063 #if 0
1064 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1065 "overhead %1.4lf",
1066 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1067 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1068 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1069 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1070 #endif
1071 } while (!_cm->has_aborted() && the_task->has_aborted());
1072 }
1073 the_task->record_end_time();
1074 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1076 ConcurrentGCThread::stsLeave();
1078 double end_vtime = os::elapsedVTime();
1079 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1080 }
1082 CMConcurrentMarkingTask(ConcurrentMark* cm,
1083 ConcurrentMarkThread* cmt) :
1084 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1086 ~CMConcurrentMarkingTask() { }
1087 };
1089 void ConcurrentMark::markFromRoots() {
1090 // we might be tempted to assert that:
1091 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1092 // "inconsistent argument?");
1093 // However that wouldn't be right, because it's possible that
1094 // a safepoint is indeed in progress as a younger generation
1095 // stop-the-world GC happens even as we mark in this generation.
1097 _restart_for_overflow = false;
1099 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1101 CMConcurrentMarkingTask markingTask(this, cmThread());
1102 if (parallel_marking_threads() > 0)
1103 _parallel_workers->run_task(&markingTask);
1104 else
1105 markingTask.work(0);
1106 print_stats();
1107 }
1109 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1110 // world is stopped at this checkpoint
1111 assert(SafepointSynchronize::is_at_safepoint(),
1112 "world should be stopped");
1113 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1115 // If a full collection has happened, we shouldn't do this.
1116 if (has_aborted()) {
1117 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1118 return;
1119 }
1121 if (VerifyDuringGC) {
1122 HandleMark hm; // handle scope
1123 gclog_or_tty->print(" VerifyDuringGC:(before)");
1124 Universe::heap()->prepare_for_verify();
1125 Universe::verify(true, false, true);
1126 }
1128 G1CollectorPolicy* g1p = g1h->g1_policy();
1129 g1p->record_concurrent_mark_remark_start();
1131 double start = os::elapsedTime();
1133 checkpointRootsFinalWork();
1135 double mark_work_end = os::elapsedTime();
1137 weakRefsWork(clear_all_soft_refs);
1139 if (has_overflown()) {
1140 // Oops. We overflowed. Restart concurrent marking.
1141 _restart_for_overflow = true;
1142 // Clear the flag. We do not need it any more.
1143 clear_has_overflown();
1144 if (G1TraceMarkStackOverflow)
1145 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1146 } else {
1147 // We're done with marking.
1148 // This is the end of the marking cycle, we're expected all
1149 // threads to have SATB queues with active set to true.
1150 JavaThread::satb_mark_queue_set().set_active_all_threads(
1151 false, /* new active value */
1152 true /* expected_active */);
1154 if (VerifyDuringGC) {
1155 HandleMark hm; // handle scope
1156 gclog_or_tty->print(" VerifyDuringGC:(after)");
1157 Universe::heap()->prepare_for_verify();
1158 Universe::heap()->verify(/* allow_dirty */ true,
1159 /* silent */ false,
1160 /* use_prev_marking */ false);
1161 }
1162 }
1164 #if VERIFY_OBJS_PROCESSED
1165 _scan_obj_cl.objs_processed = 0;
1166 ThreadLocalObjQueue::objs_enqueued = 0;
1167 #endif
1169 // Statistics
1170 double now = os::elapsedTime();
1171 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1172 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1173 _remark_times.add((now - start) * 1000.0);
1175 g1p->record_concurrent_mark_remark_end();
1176 }
1179 #define CARD_BM_TEST_MODE 0
1181 class CalcLiveObjectsClosure: public HeapRegionClosure {
1183 CMBitMapRO* _bm;
1184 ConcurrentMark* _cm;
1185 bool _changed;
1186 bool _yield;
1187 size_t _words_done;
1188 size_t _tot_live;
1189 size_t _tot_used;
1190 size_t _regions_done;
1191 double _start_vtime_sec;
1193 BitMap* _region_bm;
1194 BitMap* _card_bm;
1195 intptr_t _bottom_card_num;
1196 bool _final;
1198 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1199 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1200 #if CARD_BM_TEST_MODE
1201 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1202 #else
1203 _card_bm->par_at_put(i - _bottom_card_num, 1);
1204 #endif
1205 }
1206 }
1208 public:
1209 CalcLiveObjectsClosure(bool final,
1210 CMBitMapRO *bm, ConcurrentMark *cm,
1211 BitMap* region_bm, BitMap* card_bm) :
1212 _bm(bm), _cm(cm), _changed(false), _yield(true),
1213 _words_done(0), _tot_live(0), _tot_used(0),
1214 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1215 _regions_done(0), _start_vtime_sec(0.0)
1216 {
1217 _bottom_card_num =
1218 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1219 CardTableModRefBS::card_shift);
1220 }
1222 // It takes a region that's not empty (i.e., it has at least one
1223 // live object in it and sets its corresponding bit on the region
1224 // bitmap to 1. If the region is "starts humongous" it will also set
1225 // to 1 the bits on the region bitmap that correspond to its
1226 // associated "continues humongous" regions.
1227 void set_bit_for_region(HeapRegion* hr) {
1228 assert(!hr->continuesHumongous(), "should have filtered those out");
1230 size_t index = hr->hrs_index();
1231 if (!hr->startsHumongous()) {
1232 // Normal (non-humongous) case: just set the bit.
1233 _region_bm->par_at_put((BitMap::idx_t) index, true);
1234 } else {
1235 // Starts humongous case: calculate how many regions are part of
1236 // this humongous region and then set the bit range. It might
1237 // have been a bit more efficient to look at the object that
1238 // spans these humongous regions to calculate their number from
1239 // the object's size. However, it's a good idea to calculate
1240 // this based on the metadata itself, and not the region
1241 // contents, so that this code is not aware of what goes into
1242 // the humongous regions (in case this changes in the future).
1243 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1244 size_t end_index = index + 1;
1245 while (end_index < g1h->n_regions()) {
1246 HeapRegion* chr = g1h->region_at(end_index);
1247 if (!chr->continuesHumongous()) {
1248 break;
1249 }
1250 end_index += 1;
1251 }
1252 _region_bm->par_at_put_range((BitMap::idx_t) index,
1253 (BitMap::idx_t) end_index, true);
1254 }
1255 }
1257 bool doHeapRegion(HeapRegion* hr) {
1258 if (!_final && _regions_done == 0)
1259 _start_vtime_sec = os::elapsedVTime();
1261 if (hr->continuesHumongous()) {
1262 // We will ignore these here and process them when their
1263 // associated "starts humongous" region is processed (see
1264 // set_bit_for_heap_region()). Note that we cannot rely on their
1265 // associated "starts humongous" region to have their bit set to
1266 // 1 since, due to the region chunking in the parallel region
1267 // iteration, a "continues humongous" region might be visited
1268 // before its associated "starts humongous".
1269 return false;
1270 }
1272 HeapWord* nextTop = hr->next_top_at_mark_start();
1273 HeapWord* start = hr->top_at_conc_mark_count();
1274 assert(hr->bottom() <= start && start <= hr->end() &&
1275 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1276 start <= nextTop,
1277 "Preconditions.");
1278 // Otherwise, record the number of word's we'll examine.
1279 size_t words_done = (nextTop - start);
1280 // Find the first marked object at or after "start".
1281 start = _bm->getNextMarkedWordAddress(start, nextTop);
1282 size_t marked_bytes = 0;
1284 // Below, the term "card num" means the result of shifting an address
1285 // by the card shift -- address 0 corresponds to card number 0. One
1286 // must subtract the card num of the bottom of the heap to obtain a
1287 // card table index.
1288 // The first card num of the sequence of live cards currently being
1289 // constructed. -1 ==> no sequence.
1290 intptr_t start_card_num = -1;
1291 // The last card num of the sequence of live cards currently being
1292 // constructed. -1 ==> no sequence.
1293 intptr_t last_card_num = -1;
1295 while (start < nextTop) {
1296 if (_yield && _cm->do_yield_check()) {
1297 // We yielded. It might be for a full collection, in which case
1298 // all bets are off; terminate the traversal.
1299 if (_cm->has_aborted()) {
1300 _changed = false;
1301 return true;
1302 } else {
1303 // Otherwise, it might be a collection pause, and the region
1304 // we're looking at might be in the collection set. We'll
1305 // abandon this region.
1306 return false;
1307 }
1308 }
1309 oop obj = oop(start);
1310 int obj_sz = obj->size();
1311 // The card num of the start of the current object.
1312 intptr_t obj_card_num =
1313 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1315 HeapWord* obj_last = start + obj_sz - 1;
1316 intptr_t obj_last_card_num =
1317 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1319 if (obj_card_num != last_card_num) {
1320 if (start_card_num == -1) {
1321 assert(last_card_num == -1, "Both or neither.");
1322 start_card_num = obj_card_num;
1323 } else {
1324 assert(last_card_num != -1, "Both or neither.");
1325 assert(obj_card_num >= last_card_num, "Inv");
1326 if ((obj_card_num - last_card_num) > 1) {
1327 // Mark the last run, and start a new one.
1328 mark_card_num_range(start_card_num, last_card_num);
1329 start_card_num = obj_card_num;
1330 }
1331 }
1332 #if CARD_BM_TEST_MODE
1333 /*
1334 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1335 obj_card_num - _bottom_card_num,
1336 obj_last_card_num - _bottom_card_num);
1337 */
1338 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1339 _card_bm->par_at_put(j - _bottom_card_num, 1);
1340 }
1341 #endif
1342 }
1343 // In any case, we set the last card num.
1344 last_card_num = obj_last_card_num;
1346 marked_bytes += (size_t)obj_sz * HeapWordSize;
1347 // Find the next marked object after this one.
1348 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1349 _changed = true;
1350 }
1351 // Handle the last range, if any.
1352 if (start_card_num != -1)
1353 mark_card_num_range(start_card_num, last_card_num);
1354 if (_final) {
1355 // Mark the allocated-since-marking portion...
1356 HeapWord* tp = hr->top();
1357 if (nextTop < tp) {
1358 start_card_num =
1359 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1360 last_card_num =
1361 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1362 mark_card_num_range(start_card_num, last_card_num);
1363 // This definitely means the region has live objects.
1364 set_bit_for_region(hr);
1365 }
1366 }
1368 hr->add_to_marked_bytes(marked_bytes);
1369 // Update the live region bitmap.
1370 if (marked_bytes > 0) {
1371 set_bit_for_region(hr);
1372 }
1373 hr->set_top_at_conc_mark_count(nextTop);
1374 _tot_live += hr->next_live_bytes();
1375 _tot_used += hr->used();
1376 _words_done = words_done;
1378 if (!_final) {
1379 ++_regions_done;
1380 if (_regions_done % 10 == 0) {
1381 double end_vtime_sec = os::elapsedVTime();
1382 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1383 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1384 jlong sleep_time_ms =
1385 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1386 os::sleep(Thread::current(), sleep_time_ms, false);
1387 _start_vtime_sec = end_vtime_sec;
1388 }
1389 }
1390 }
1392 return false;
1393 }
1395 bool changed() { return _changed; }
1396 void reset() { _changed = false; _words_done = 0; }
1397 void no_yield() { _yield = false; }
1398 size_t words_done() { return _words_done; }
1399 size_t tot_live() { return _tot_live; }
1400 size_t tot_used() { return _tot_used; }
1401 };
1404 void ConcurrentMark::calcDesiredRegions() {
1405 _region_bm.clear();
1406 _card_bm.clear();
1407 CalcLiveObjectsClosure calccl(false /*final*/,
1408 nextMarkBitMap(), this,
1409 &_region_bm, &_card_bm);
1410 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1411 g1h->heap_region_iterate(&calccl);
1413 do {
1414 calccl.reset();
1415 g1h->heap_region_iterate(&calccl);
1416 } while (calccl.changed());
1417 }
1419 class G1ParFinalCountTask: public AbstractGangTask {
1420 protected:
1421 G1CollectedHeap* _g1h;
1422 CMBitMap* _bm;
1423 size_t _n_workers;
1424 size_t *_live_bytes;
1425 size_t *_used_bytes;
1426 BitMap* _region_bm;
1427 BitMap* _card_bm;
1428 public:
1429 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1430 BitMap* region_bm, BitMap* card_bm) :
1431 AbstractGangTask("G1 final counting"), _g1h(g1h),
1432 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1433 {
1434 if (ParallelGCThreads > 0)
1435 _n_workers = _g1h->workers()->total_workers();
1436 else
1437 _n_workers = 1;
1438 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1439 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1440 }
1442 ~G1ParFinalCountTask() {
1443 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1444 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1445 }
1447 void work(int i) {
1448 CalcLiveObjectsClosure calccl(true /*final*/,
1449 _bm, _g1h->concurrent_mark(),
1450 _region_bm, _card_bm);
1451 calccl.no_yield();
1452 if (ParallelGCThreads > 0) {
1453 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1454 HeapRegion::FinalCountClaimValue);
1455 } else {
1456 _g1h->heap_region_iterate(&calccl);
1457 }
1458 assert(calccl.complete(), "Shouldn't have yielded!");
1460 assert((size_t) i < _n_workers, "invariant");
1461 _live_bytes[i] = calccl.tot_live();
1462 _used_bytes[i] = calccl.tot_used();
1463 }
1464 size_t live_bytes() {
1465 size_t live_bytes = 0;
1466 for (size_t i = 0; i < _n_workers; ++i)
1467 live_bytes += _live_bytes[i];
1468 return live_bytes;
1469 }
1470 size_t used_bytes() {
1471 size_t used_bytes = 0;
1472 for (size_t i = 0; i < _n_workers; ++i)
1473 used_bytes += _used_bytes[i];
1474 return used_bytes;
1475 }
1476 };
1478 class G1ParNoteEndTask;
1480 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1481 G1CollectedHeap* _g1;
1482 int _worker_num;
1483 size_t _max_live_bytes;
1484 size_t _regions_claimed;
1485 size_t _freed_bytes;
1486 size_t _cleared_h_regions;
1487 size_t _freed_regions;
1488 UncleanRegionList* _unclean_region_list;
1489 double _claimed_region_time;
1490 double _max_region_time;
1492 public:
1493 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1494 UncleanRegionList* list,
1495 int worker_num);
1496 size_t freed_bytes() { return _freed_bytes; }
1497 size_t cleared_h_regions() { return _cleared_h_regions; }
1498 size_t freed_regions() { return _freed_regions; }
1499 UncleanRegionList* unclean_region_list() {
1500 return _unclean_region_list;
1501 }
1503 bool doHeapRegion(HeapRegion *r);
1505 size_t max_live_bytes() { return _max_live_bytes; }
1506 size_t regions_claimed() { return _regions_claimed; }
1507 double claimed_region_time_sec() { return _claimed_region_time; }
1508 double max_region_time_sec() { return _max_region_time; }
1509 };
1511 class G1ParNoteEndTask: public AbstractGangTask {
1512 friend class G1NoteEndOfConcMarkClosure;
1513 protected:
1514 G1CollectedHeap* _g1h;
1515 size_t _max_live_bytes;
1516 size_t _freed_bytes;
1517 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1518 public:
1519 G1ParNoteEndTask(G1CollectedHeap* g1h,
1520 ConcurrentMark::ParCleanupThreadState**
1521 par_cleanup_thread_state) :
1522 AbstractGangTask("G1 note end"), _g1h(g1h),
1523 _max_live_bytes(0), _freed_bytes(0),
1524 _par_cleanup_thread_state(par_cleanup_thread_state)
1525 {}
1527 void work(int i) {
1528 double start = os::elapsedTime();
1529 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1530 &_par_cleanup_thread_state[i]->list,
1531 i);
1532 if (ParallelGCThreads > 0) {
1533 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1534 HeapRegion::NoteEndClaimValue);
1535 } else {
1536 _g1h->heap_region_iterate(&g1_note_end);
1537 }
1538 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1540 // Now finish up freeing the current thread's regions.
1541 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1542 g1_note_end.cleared_h_regions(),
1543 0, NULL);
1544 {
1545 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1546 _max_live_bytes += g1_note_end.max_live_bytes();
1547 _freed_bytes += g1_note_end.freed_bytes();
1548 }
1549 double end = os::elapsedTime();
1550 if (G1PrintParCleanupStats) {
1551 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1552 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1553 i, start, end, (end-start)*1000.0,
1554 g1_note_end.regions_claimed(),
1555 g1_note_end.claimed_region_time_sec()*1000.0,
1556 g1_note_end.max_region_time_sec()*1000.0);
1557 }
1558 }
1559 size_t max_live_bytes() { return _max_live_bytes; }
1560 size_t freed_bytes() { return _freed_bytes; }
1561 };
1563 class G1ParScrubRemSetTask: public AbstractGangTask {
1564 protected:
1565 G1RemSet* _g1rs;
1566 BitMap* _region_bm;
1567 BitMap* _card_bm;
1568 public:
1569 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1570 BitMap* region_bm, BitMap* card_bm) :
1571 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1572 _region_bm(region_bm), _card_bm(card_bm)
1573 {}
1575 void work(int i) {
1576 if (ParallelGCThreads > 0) {
1577 _g1rs->scrub_par(_region_bm, _card_bm, i,
1578 HeapRegion::ScrubRemSetClaimValue);
1579 } else {
1580 _g1rs->scrub(_region_bm, _card_bm);
1581 }
1582 }
1584 };
1586 G1NoteEndOfConcMarkClosure::
1587 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1588 UncleanRegionList* list,
1589 int worker_num)
1590 : _g1(g1), _worker_num(worker_num),
1591 _max_live_bytes(0), _regions_claimed(0),
1592 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1593 _claimed_region_time(0.0), _max_region_time(0.0),
1594 _unclean_region_list(list)
1595 {}
1597 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1598 // We use a claim value of zero here because all regions
1599 // were claimed with value 1 in the FinalCount task.
1600 r->reset_gc_time_stamp();
1601 if (!r->continuesHumongous()) {
1602 double start = os::elapsedTime();
1603 _regions_claimed++;
1604 r->note_end_of_marking();
1605 _max_live_bytes += r->max_live_bytes();
1606 _g1->free_region_if_totally_empty_work(r,
1607 _freed_bytes,
1608 _cleared_h_regions,
1609 _freed_regions,
1610 _unclean_region_list,
1611 true /*par*/);
1612 double region_time = (os::elapsedTime() - start);
1613 _claimed_region_time += region_time;
1614 if (region_time > _max_region_time) _max_region_time = region_time;
1615 }
1616 return false;
1617 }
1619 void ConcurrentMark::cleanup() {
1620 // world is stopped at this checkpoint
1621 assert(SafepointSynchronize::is_at_safepoint(),
1622 "world should be stopped");
1623 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1625 // If a full collection has happened, we shouldn't do this.
1626 if (has_aborted()) {
1627 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1628 return;
1629 }
1631 if (VerifyDuringGC) {
1632 HandleMark hm; // handle scope
1633 gclog_or_tty->print(" VerifyDuringGC:(before)");
1634 Universe::heap()->prepare_for_verify();
1635 Universe::verify(/* allow dirty */ true,
1636 /* silent */ false,
1637 /* prev marking */ true);
1638 }
1640 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1641 g1p->record_concurrent_mark_cleanup_start();
1643 double start = os::elapsedTime();
1645 // Do counting once more with the world stopped for good measure.
1646 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1647 &_region_bm, &_card_bm);
1648 if (ParallelGCThreads > 0) {
1649 assert(g1h->check_heap_region_claim_values(
1650 HeapRegion::InitialClaimValue),
1651 "sanity check");
1653 int n_workers = g1h->workers()->total_workers();
1654 g1h->set_par_threads(n_workers);
1655 g1h->workers()->run_task(&g1_par_count_task);
1656 g1h->set_par_threads(0);
1658 assert(g1h->check_heap_region_claim_values(
1659 HeapRegion::FinalCountClaimValue),
1660 "sanity check");
1661 } else {
1662 g1_par_count_task.work(0);
1663 }
1665 size_t known_garbage_bytes =
1666 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1667 #if 0
1668 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1669 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1670 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1671 (double) known_garbage_bytes / (double) (1024 * 1024));
1672 #endif // 0
1673 g1p->set_known_garbage_bytes(known_garbage_bytes);
1675 size_t start_used_bytes = g1h->used();
1676 _at_least_one_mark_complete = true;
1677 g1h->set_marking_complete();
1679 double count_end = os::elapsedTime();
1680 double this_final_counting_time = (count_end - start);
1681 if (G1PrintParCleanupStats) {
1682 gclog_or_tty->print_cr("Cleanup:");
1683 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1684 this_final_counting_time*1000.0);
1685 }
1686 _total_counting_time += this_final_counting_time;
1688 // Install newly created mark bitMap as "prev".
1689 swapMarkBitMaps();
1691 g1h->reset_gc_time_stamp();
1693 // Note end of marking in all heap regions.
1694 double note_end_start = os::elapsedTime();
1695 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1696 if (ParallelGCThreads > 0) {
1697 int n_workers = g1h->workers()->total_workers();
1698 g1h->set_par_threads(n_workers);
1699 g1h->workers()->run_task(&g1_par_note_end_task);
1700 g1h->set_par_threads(0);
1702 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1703 "sanity check");
1704 } else {
1705 g1_par_note_end_task.work(0);
1706 }
1707 g1h->set_unclean_regions_coming(true);
1708 double note_end_end = os::elapsedTime();
1709 // Tell the mutators that there might be unclean regions coming...
1710 if (G1PrintParCleanupStats) {
1711 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1712 (note_end_end - note_end_start)*1000.0);
1713 }
1716 // call below, since it affects the metric by which we sort the heap
1717 // regions.
1718 if (G1ScrubRemSets) {
1719 double rs_scrub_start = os::elapsedTime();
1720 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1721 if (ParallelGCThreads > 0) {
1722 int n_workers = g1h->workers()->total_workers();
1723 g1h->set_par_threads(n_workers);
1724 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1725 g1h->set_par_threads(0);
1727 assert(g1h->check_heap_region_claim_values(
1728 HeapRegion::ScrubRemSetClaimValue),
1729 "sanity check");
1730 } else {
1731 g1_par_scrub_rs_task.work(0);
1732 }
1734 double rs_scrub_end = os::elapsedTime();
1735 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1736 _total_rs_scrub_time += this_rs_scrub_time;
1737 }
1739 // this will also free any regions totally full of garbage objects,
1740 // and sort the regions.
1741 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1742 g1_par_note_end_task.freed_bytes(),
1743 g1_par_note_end_task.max_live_bytes());
1745 // Statistics.
1746 double end = os::elapsedTime();
1747 _cleanup_times.add((end - start) * 1000.0);
1749 // G1CollectedHeap::heap()->print();
1750 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1751 // G1CollectedHeap::heap()->get_gc_time_stamp());
1753 if (PrintGC || PrintGCDetails) {
1754 g1h->print_size_transition(gclog_or_tty,
1755 start_used_bytes,
1756 g1h->used(),
1757 g1h->capacity());
1758 }
1760 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1761 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1763 // We need to make this be a "collection" so any collection pause that
1764 // races with it goes around and waits for completeCleanup to finish.
1765 g1h->increment_total_collections();
1767 if (VerifyDuringGC) {
1768 HandleMark hm; // handle scope
1769 gclog_or_tty->print(" VerifyDuringGC:(after)");
1770 Universe::heap()->prepare_for_verify();
1771 Universe::verify(/* allow dirty */ true,
1772 /* silent */ false,
1773 /* prev marking */ true);
1774 }
1775 }
1777 void ConcurrentMark::completeCleanup() {
1778 // A full collection intervened.
1779 if (has_aborted()) return;
1781 int first = 0;
1782 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1783 for (int t = 0; t < last; t++) {
1784 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1785 assert(list->well_formed(), "Inv");
1786 HeapRegion* hd = list->hd();
1787 while (hd != NULL) {
1788 // Now finish up the other stuff.
1789 hd->rem_set()->clear();
1790 HeapRegion* next_hd = hd->next_from_unclean_list();
1791 (void)list->pop();
1792 assert(list->hd() == next_hd, "how not?");
1793 _g1h->put_region_on_unclean_list(hd);
1794 if (!hd->isHumongous()) {
1795 // Add this to the _free_regions count by 1.
1796 _g1h->finish_free_region_work(0, 0, 1, NULL);
1797 }
1798 hd = list->hd();
1799 assert(hd == next_hd, "how not?");
1800 }
1801 }
1802 }
1805 class G1CMIsAliveClosure: public BoolObjectClosure {
1806 G1CollectedHeap* _g1;
1807 public:
1808 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1809 _g1(g1)
1810 {}
1812 void do_object(oop obj) {
1813 assert(false, "not to be invoked");
1814 }
1815 bool do_object_b(oop obj) {
1816 HeapWord* addr = (HeapWord*)obj;
1817 return addr != NULL &&
1818 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1819 }
1820 };
1822 class G1CMKeepAliveClosure: public OopClosure {
1823 G1CollectedHeap* _g1;
1824 ConcurrentMark* _cm;
1825 CMBitMap* _bitMap;
1826 public:
1827 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1828 CMBitMap* bitMap) :
1829 _g1(g1), _cm(cm),
1830 _bitMap(bitMap) {}
1832 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1833 virtual void do_oop( oop* p) { do_oop_work(p); }
1835 template <class T> void do_oop_work(T* p) {
1836 oop thisOop = oopDesc::load_decode_heap_oop(p);
1837 HeapWord* addr = (HeapWord*)thisOop;
1838 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1839 _bitMap->mark(addr);
1840 _cm->mark_stack_push(thisOop);
1841 }
1842 }
1843 };
1845 class G1CMDrainMarkingStackClosure: public VoidClosure {
1846 CMMarkStack* _markStack;
1847 CMBitMap* _bitMap;
1848 G1CMKeepAliveClosure* _oopClosure;
1849 public:
1850 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1851 G1CMKeepAliveClosure* oopClosure) :
1852 _bitMap(bitMap),
1853 _markStack(markStack),
1854 _oopClosure(oopClosure)
1855 {}
1857 void do_void() {
1858 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1859 }
1860 };
1862 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1863 ResourceMark rm;
1864 HandleMark hm;
1865 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1866 ReferenceProcessor* rp = g1h->ref_processor();
1868 // Process weak references.
1869 rp->setup_policy(clear_all_soft_refs);
1870 assert(_markStack.isEmpty(), "mark stack should be empty");
1872 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1873 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1874 G1CMDrainMarkingStackClosure
1875 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1876 &g1KeepAliveClosure);
1878 // XXXYYY Also: copy the parallel ref processing code from CMS.
1879 rp->process_discovered_references(&g1IsAliveClosure,
1880 &g1KeepAliveClosure,
1881 &g1DrainMarkingStackClosure,
1882 NULL);
1883 assert(_markStack.overflow() || _markStack.isEmpty(),
1884 "mark stack should be empty (unless it overflowed)");
1885 if (_markStack.overflow()) {
1886 set_has_overflown();
1887 }
1889 rp->enqueue_discovered_references();
1890 rp->verify_no_references_recorded();
1891 assert(!rp->discovery_enabled(), "should have been disabled");
1893 // Now clean up stale oops in SymbolTable and StringTable
1894 SymbolTable::unlink(&g1IsAliveClosure);
1895 StringTable::unlink(&g1IsAliveClosure);
1896 }
1898 void ConcurrentMark::swapMarkBitMaps() {
1899 CMBitMapRO* temp = _prevMarkBitMap;
1900 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1901 _nextMarkBitMap = (CMBitMap*) temp;
1902 }
1904 class CMRemarkTask: public AbstractGangTask {
1905 private:
1906 ConcurrentMark *_cm;
1908 public:
1909 void work(int worker_i) {
1910 // Since all available tasks are actually started, we should
1911 // only proceed if we're supposed to be actived.
1912 if ((size_t)worker_i < _cm->active_tasks()) {
1913 CMTask* task = _cm->task(worker_i);
1914 task->record_start_time();
1915 do {
1916 task->do_marking_step(1000000000.0 /* something very large */);
1917 } while (task->has_aborted() && !_cm->has_overflown());
1918 // If we overflow, then we do not want to restart. We instead
1919 // want to abort remark and do concurrent marking again.
1920 task->record_end_time();
1921 }
1922 }
1924 CMRemarkTask(ConcurrentMark* cm) :
1925 AbstractGangTask("Par Remark"), _cm(cm) { }
1926 };
1928 void ConcurrentMark::checkpointRootsFinalWork() {
1929 ResourceMark rm;
1930 HandleMark hm;
1931 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1933 g1h->ensure_parsability(false);
1935 if (ParallelGCThreads > 0) {
1936 G1CollectedHeap::StrongRootsScope srs(g1h);
1937 // this is remark, so we'll use up all available threads
1938 int active_workers = ParallelGCThreads;
1939 set_phase(active_workers, false);
1941 CMRemarkTask remarkTask(this);
1942 // We will start all available threads, even if we decide that the
1943 // active_workers will be fewer. The extra ones will just bail out
1944 // immediately.
1945 int n_workers = g1h->workers()->total_workers();
1946 g1h->set_par_threads(n_workers);
1947 g1h->workers()->run_task(&remarkTask);
1948 g1h->set_par_threads(0);
1949 } else {
1950 G1CollectedHeap::StrongRootsScope srs(g1h);
1951 // this is remark, so we'll use up all available threads
1952 int active_workers = 1;
1953 set_phase(active_workers, false);
1955 CMRemarkTask remarkTask(this);
1956 // We will start all available threads, even if we decide that the
1957 // active_workers will be fewer. The extra ones will just bail out
1958 // immediately.
1959 remarkTask.work(0);
1960 }
1961 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1962 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1964 print_stats();
1966 if (!restart_for_overflow())
1967 set_non_marking_state();
1969 #if VERIFY_OBJS_PROCESSED
1970 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1971 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1972 _scan_obj_cl.objs_processed,
1973 ThreadLocalObjQueue::objs_enqueued);
1974 guarantee(_scan_obj_cl.objs_processed ==
1975 ThreadLocalObjQueue::objs_enqueued,
1976 "Different number of objs processed and enqueued.");
1977 }
1978 #endif
1979 }
1981 #ifndef PRODUCT
1983 class PrintReachableOopClosure: public OopClosure {
1984 private:
1985 G1CollectedHeap* _g1h;
1986 CMBitMapRO* _bitmap;
1987 outputStream* _out;
1988 bool _use_prev_marking;
1989 bool _all;
1991 public:
1992 PrintReachableOopClosure(CMBitMapRO* bitmap,
1993 outputStream* out,
1994 bool use_prev_marking,
1995 bool all) :
1996 _g1h(G1CollectedHeap::heap()),
1997 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
1999 void do_oop(narrowOop* p) { do_oop_work(p); }
2000 void do_oop( oop* p) { do_oop_work(p); }
2002 template <class T> void do_oop_work(T* p) {
2003 oop obj = oopDesc::load_decode_heap_oop(p);
2004 const char* str = NULL;
2005 const char* str2 = "";
2007 if (obj == NULL) {
2008 str = "";
2009 } else if (!_g1h->is_in_g1_reserved(obj)) {
2010 str = " O";
2011 } else {
2012 HeapRegion* hr = _g1h->heap_region_containing(obj);
2013 guarantee(hr != NULL, "invariant");
2014 bool over_tams = false;
2015 if (_use_prev_marking) {
2016 over_tams = hr->obj_allocated_since_prev_marking(obj);
2017 } else {
2018 over_tams = hr->obj_allocated_since_next_marking(obj);
2019 }
2020 bool marked = _bitmap->isMarked((HeapWord*) obj);
2022 if (over_tams) {
2023 str = " >";
2024 if (marked) {
2025 str2 = " AND MARKED";
2026 }
2027 } else if (marked) {
2028 str = " M";
2029 } else {
2030 str = " NOT";
2031 }
2032 }
2034 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2035 p, (void*) obj, str, str2);
2036 }
2037 };
2039 class PrintReachableObjectClosure : public ObjectClosure {
2040 private:
2041 CMBitMapRO* _bitmap;
2042 outputStream* _out;
2043 bool _use_prev_marking;
2044 bool _all;
2045 HeapRegion* _hr;
2047 public:
2048 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2049 outputStream* out,
2050 bool use_prev_marking,
2051 bool all,
2052 HeapRegion* hr) :
2053 _bitmap(bitmap), _out(out),
2054 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2056 void do_object(oop o) {
2057 bool over_tams;
2058 if (_use_prev_marking) {
2059 over_tams = _hr->obj_allocated_since_prev_marking(o);
2060 } else {
2061 over_tams = _hr->obj_allocated_since_next_marking(o);
2062 }
2063 bool marked = _bitmap->isMarked((HeapWord*) o);
2064 bool print_it = _all || over_tams || marked;
2066 if (print_it) {
2067 _out->print_cr(" "PTR_FORMAT"%s",
2068 o, (over_tams) ? " >" : (marked) ? " M" : "");
2069 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2070 o->oop_iterate(&oopCl);
2071 }
2072 }
2073 };
2075 class PrintReachableRegionClosure : public HeapRegionClosure {
2076 private:
2077 CMBitMapRO* _bitmap;
2078 outputStream* _out;
2079 bool _use_prev_marking;
2080 bool _all;
2082 public:
2083 bool doHeapRegion(HeapRegion* hr) {
2084 HeapWord* b = hr->bottom();
2085 HeapWord* e = hr->end();
2086 HeapWord* t = hr->top();
2087 HeapWord* p = NULL;
2088 if (_use_prev_marking) {
2089 p = hr->prev_top_at_mark_start();
2090 } else {
2091 p = hr->next_top_at_mark_start();
2092 }
2093 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2094 "TAMS: "PTR_FORMAT, b, e, t, p);
2095 _out->cr();
2097 HeapWord* from = b;
2098 HeapWord* to = t;
2100 if (to > from) {
2101 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2102 _out->cr();
2103 PrintReachableObjectClosure ocl(_bitmap, _out,
2104 _use_prev_marking, _all, hr);
2105 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2106 _out->cr();
2107 }
2109 return false;
2110 }
2112 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2113 outputStream* out,
2114 bool use_prev_marking,
2115 bool all) :
2116 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2117 };
2119 void ConcurrentMark::print_reachable(const char* str,
2120 bool use_prev_marking,
2121 bool all) {
2122 gclog_or_tty->cr();
2123 gclog_or_tty->print_cr("== Doing heap dump... ");
2125 if (G1PrintReachableBaseFile == NULL) {
2126 gclog_or_tty->print_cr(" #### error: no base file defined");
2127 return;
2128 }
2130 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2131 (JVM_MAXPATHLEN - 1)) {
2132 gclog_or_tty->print_cr(" #### error: file name too long");
2133 return;
2134 }
2136 char file_name[JVM_MAXPATHLEN];
2137 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2138 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2140 fileStream fout(file_name);
2141 if (!fout.is_open()) {
2142 gclog_or_tty->print_cr(" #### error: could not open file");
2143 return;
2144 }
2146 outputStream* out = &fout;
2148 CMBitMapRO* bitmap = NULL;
2149 if (use_prev_marking) {
2150 bitmap = _prevMarkBitMap;
2151 } else {
2152 bitmap = _nextMarkBitMap;
2153 }
2155 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2156 out->cr();
2158 out->print_cr("--- ITERATING OVER REGIONS");
2159 out->cr();
2160 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2161 _g1h->heap_region_iterate(&rcl);
2162 out->cr();
2164 gclog_or_tty->print_cr(" done");
2165 gclog_or_tty->flush();
2166 }
2168 #endif // PRODUCT
2170 // This note is for drainAllSATBBuffers and the code in between.
2171 // In the future we could reuse a task to do this work during an
2172 // evacuation pause (since now tasks are not active and can be claimed
2173 // during an evacuation pause). This was a late change to the code and
2174 // is currently not being taken advantage of.
2176 class CMGlobalObjectClosure : public ObjectClosure {
2177 private:
2178 ConcurrentMark* _cm;
2180 public:
2181 void do_object(oop obj) {
2182 _cm->deal_with_reference(obj);
2183 }
2185 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2186 };
2188 void ConcurrentMark::deal_with_reference(oop obj) {
2189 if (verbose_high())
2190 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2191 (void*) obj);
2194 HeapWord* objAddr = (HeapWord*) obj;
2195 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2196 if (_g1h->is_in_g1_reserved(objAddr)) {
2197 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2198 HeapRegion* hr = _g1h->heap_region_containing(obj);
2199 if (_g1h->is_obj_ill(obj, hr)) {
2200 if (verbose_high())
2201 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2202 "marked", (void*) obj);
2204 // we need to mark it first
2205 if (_nextMarkBitMap->parMark(objAddr)) {
2206 // No OrderAccess:store_load() is needed. It is implicit in the
2207 // CAS done in parMark(objAddr) above
2208 HeapWord* finger = _finger;
2209 if (objAddr < finger) {
2210 if (verbose_high())
2211 gclog_or_tty->print_cr("[global] below the global finger "
2212 "("PTR_FORMAT"), pushing it", finger);
2213 if (!mark_stack_push(obj)) {
2214 if (verbose_low())
2215 gclog_or_tty->print_cr("[global] global stack overflow during "
2216 "deal_with_reference");
2217 }
2218 }
2219 }
2220 }
2221 }
2222 }
2224 void ConcurrentMark::drainAllSATBBuffers() {
2225 CMGlobalObjectClosure oc(this);
2226 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2227 satb_mq_set.set_closure(&oc);
2229 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2230 if (verbose_medium())
2231 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2232 }
2234 // no need to check whether we should do this, as this is only
2235 // called during an evacuation pause
2236 satb_mq_set.iterate_closure_all_threads();
2238 satb_mq_set.set_closure(NULL);
2239 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2240 }
2242 void ConcurrentMark::markPrev(oop p) {
2243 // Note we are overriding the read-only view of the prev map here, via
2244 // the cast.
2245 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2246 }
2248 void ConcurrentMark::clear(oop p) {
2249 assert(p != NULL && p->is_oop(), "expected an oop");
2250 HeapWord* addr = (HeapWord*)p;
2251 assert(addr >= _nextMarkBitMap->startWord() ||
2252 addr < _nextMarkBitMap->endWord(), "in a region");
2254 _nextMarkBitMap->clear(addr);
2255 }
2257 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2258 // Note we are overriding the read-only view of the prev map here, via
2259 // the cast.
2260 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2261 _nextMarkBitMap->clearRange(mr);
2262 }
2264 HeapRegion*
2265 ConcurrentMark::claim_region(int task_num) {
2266 // "checkpoint" the finger
2267 HeapWord* finger = _finger;
2269 // _heap_end will not change underneath our feet; it only changes at
2270 // yield points.
2271 while (finger < _heap_end) {
2272 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2274 // is the gap between reading the finger and doing the CAS too long?
2276 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2277 HeapWord* bottom = curr_region->bottom();
2278 HeapWord* end = curr_region->end();
2279 HeapWord* limit = curr_region->next_top_at_mark_start();
2281 if (verbose_low())
2282 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2283 "["PTR_FORMAT", "PTR_FORMAT"), "
2284 "limit = "PTR_FORMAT,
2285 task_num, curr_region, bottom, end, limit);
2287 HeapWord* res =
2288 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2289 if (res == finger) {
2290 // we succeeded
2292 // notice that _finger == end cannot be guaranteed here since,
2293 // someone else might have moved the finger even further
2294 assert(_finger >= end, "the finger should have moved forward");
2296 if (verbose_low())
2297 gclog_or_tty->print_cr("[%d] we were successful with region = "
2298 PTR_FORMAT, task_num, curr_region);
2300 if (limit > bottom) {
2301 if (verbose_low())
2302 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2303 "returning it ", task_num, curr_region);
2304 return curr_region;
2305 } else {
2306 assert(limit == bottom,
2307 "the region limit should be at bottom");
2308 if (verbose_low())
2309 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2310 "returning NULL", task_num, curr_region);
2311 // we return NULL and the caller should try calling
2312 // claim_region() again.
2313 return NULL;
2314 }
2315 } else {
2316 assert(_finger > finger, "the finger should have moved forward");
2317 if (verbose_low())
2318 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2319 "global finger = "PTR_FORMAT", "
2320 "our finger = "PTR_FORMAT,
2321 task_num, _finger, finger);
2323 // read it again
2324 finger = _finger;
2325 }
2326 }
2328 return NULL;
2329 }
2331 void ConcurrentMark::oops_do(OopClosure* cl) {
2332 if (_markStack.size() > 0 && verbose_low())
2333 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2334 "size = %d", _markStack.size());
2335 // we first iterate over the contents of the mark stack...
2336 _markStack.oops_do(cl);
2338 for (int i = 0; i < (int)_max_task_num; ++i) {
2339 OopTaskQueue* queue = _task_queues->queue((int)i);
2341 if (queue->size() > 0 && verbose_low())
2342 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2343 "size = %d", i, queue->size());
2345 // ...then over the contents of the all the task queues.
2346 queue->oops_do(cl);
2347 }
2349 // finally, invalidate any entries that in the region stack that
2350 // point into the collection set
2351 if (_regionStack.invalidate_entries_into_cset()) {
2352 // otherwise, any gray objects copied during the evacuation pause
2353 // might not be visited.
2354 assert(_should_gray_objects, "invariant");
2355 }
2356 }
2358 void ConcurrentMark::clear_marking_state() {
2359 _markStack.setEmpty();
2360 _markStack.clear_overflow();
2361 _regionStack.setEmpty();
2362 _regionStack.clear_overflow();
2363 clear_has_overflown();
2364 _finger = _heap_start;
2366 for (int i = 0; i < (int)_max_task_num; ++i) {
2367 OopTaskQueue* queue = _task_queues->queue(i);
2368 queue->set_empty();
2369 }
2370 }
2372 void ConcurrentMark::print_stats() {
2373 if (verbose_stats()) {
2374 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2375 for (size_t i = 0; i < _active_tasks; ++i) {
2376 _tasks[i]->print_stats();
2377 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2378 }
2379 }
2380 }
2382 class CSMarkOopClosure: public OopClosure {
2383 friend class CSMarkBitMapClosure;
2385 G1CollectedHeap* _g1h;
2386 CMBitMap* _bm;
2387 ConcurrentMark* _cm;
2388 oop* _ms;
2389 jint* _array_ind_stack;
2390 int _ms_size;
2391 int _ms_ind;
2392 int _array_increment;
2394 bool push(oop obj, int arr_ind = 0) {
2395 if (_ms_ind == _ms_size) {
2396 gclog_or_tty->print_cr("Mark stack is full.");
2397 return false;
2398 }
2399 _ms[_ms_ind] = obj;
2400 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2401 _ms_ind++;
2402 return true;
2403 }
2405 oop pop() {
2406 if (_ms_ind == 0) return NULL;
2407 else {
2408 _ms_ind--;
2409 return _ms[_ms_ind];
2410 }
2411 }
2413 template <class T> bool drain() {
2414 while (_ms_ind > 0) {
2415 oop obj = pop();
2416 assert(obj != NULL, "Since index was non-zero.");
2417 if (obj->is_objArray()) {
2418 jint arr_ind = _array_ind_stack[_ms_ind];
2419 objArrayOop aobj = objArrayOop(obj);
2420 jint len = aobj->length();
2421 jint next_arr_ind = arr_ind + _array_increment;
2422 if (next_arr_ind < len) {
2423 push(obj, next_arr_ind);
2424 }
2425 // Now process this portion of this one.
2426 int lim = MIN2(next_arr_ind, len);
2427 for (int j = arr_ind; j < lim; j++) {
2428 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2429 }
2431 } else {
2432 obj->oop_iterate(this);
2433 }
2434 if (abort()) return false;
2435 }
2436 return true;
2437 }
2439 public:
2440 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2441 _g1h(G1CollectedHeap::heap()),
2442 _cm(cm),
2443 _bm(cm->nextMarkBitMap()),
2444 _ms_size(ms_size), _ms_ind(0),
2445 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2446 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2447 _array_increment(MAX2(ms_size/8, 16))
2448 {}
2450 ~CSMarkOopClosure() {
2451 FREE_C_HEAP_ARRAY(oop, _ms);
2452 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2453 }
2455 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2456 virtual void do_oop( oop* p) { do_oop_work(p); }
2458 template <class T> void do_oop_work(T* p) {
2459 T heap_oop = oopDesc::load_heap_oop(p);
2460 if (oopDesc::is_null(heap_oop)) return;
2461 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2462 if (obj->is_forwarded()) {
2463 // If the object has already been forwarded, we have to make sure
2464 // that it's marked. So follow the forwarding pointer. Note that
2465 // this does the right thing for self-forwarding pointers in the
2466 // evacuation failure case.
2467 obj = obj->forwardee();
2468 }
2469 HeapRegion* hr = _g1h->heap_region_containing(obj);
2470 if (hr != NULL) {
2471 if (hr->in_collection_set()) {
2472 if (_g1h->is_obj_ill(obj)) {
2473 _bm->mark((HeapWord*)obj);
2474 if (!push(obj)) {
2475 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2476 set_abort();
2477 }
2478 }
2479 } else {
2480 // Outside the collection set; we need to gray it
2481 _cm->deal_with_reference(obj);
2482 }
2483 }
2484 }
2485 };
2487 class CSMarkBitMapClosure: public BitMapClosure {
2488 G1CollectedHeap* _g1h;
2489 CMBitMap* _bitMap;
2490 ConcurrentMark* _cm;
2491 CSMarkOopClosure _oop_cl;
2492 public:
2493 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2494 _g1h(G1CollectedHeap::heap()),
2495 _bitMap(cm->nextMarkBitMap()),
2496 _oop_cl(cm, ms_size)
2497 {}
2499 ~CSMarkBitMapClosure() {}
2501 bool do_bit(size_t offset) {
2502 // convert offset into a HeapWord*
2503 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2504 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2505 "address out of range");
2506 assert(_bitMap->isMarked(addr), "tautology");
2507 oop obj = oop(addr);
2508 if (!obj->is_forwarded()) {
2509 if (!_oop_cl.push(obj)) return false;
2510 if (UseCompressedOops) {
2511 if (!_oop_cl.drain<narrowOop>()) return false;
2512 } else {
2513 if (!_oop_cl.drain<oop>()) return false;
2514 }
2515 }
2516 // Otherwise...
2517 return true;
2518 }
2519 };
2522 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2523 CMBitMap* _bm;
2524 CSMarkBitMapClosure _bit_cl;
2525 enum SomePrivateConstants {
2526 MSSize = 1000
2527 };
2528 bool _completed;
2529 public:
2530 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2531 _bm(cm->nextMarkBitMap()),
2532 _bit_cl(cm, MSSize),
2533 _completed(true)
2534 {}
2536 ~CompleteMarkingInCSHRClosure() {}
2538 bool doHeapRegion(HeapRegion* r) {
2539 if (!r->evacuation_failed()) {
2540 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2541 if (!mr.is_empty()) {
2542 if (!_bm->iterate(&_bit_cl, mr)) {
2543 _completed = false;
2544 return true;
2545 }
2546 }
2547 }
2548 return false;
2549 }
2551 bool completed() { return _completed; }
2552 };
2554 class ClearMarksInHRClosure: public HeapRegionClosure {
2555 CMBitMap* _bm;
2556 public:
2557 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2559 bool doHeapRegion(HeapRegion* r) {
2560 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2561 MemRegion usedMR = r->used_region();
2562 _bm->clearRange(r->used_region());
2563 }
2564 return false;
2565 }
2566 };
2568 void ConcurrentMark::complete_marking_in_collection_set() {
2569 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2571 if (!g1h->mark_in_progress()) {
2572 g1h->g1_policy()->record_mark_closure_time(0.0);
2573 return;
2574 }
2576 int i = 1;
2577 double start = os::elapsedTime();
2578 while (true) {
2579 i++;
2580 CompleteMarkingInCSHRClosure cmplt(this);
2581 g1h->collection_set_iterate(&cmplt);
2582 if (cmplt.completed()) break;
2583 }
2584 double end_time = os::elapsedTime();
2585 double elapsed_time_ms = (end_time - start) * 1000.0;
2586 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2587 if (PrintGCDetails) {
2588 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2589 }
2591 ClearMarksInHRClosure clr(nextMarkBitMap());
2592 g1h->collection_set_iterate(&clr);
2593 }
2595 // The next two methods deal with the following optimisation. Some
2596 // objects are gray by being marked and located above the finger. If
2597 // they are copied, during an evacuation pause, below the finger then
2598 // the need to be pushed on the stack. The observation is that, if
2599 // there are no regions in the collection set located above the
2600 // finger, then the above cannot happen, hence we do not need to
2601 // explicitly gray any objects when copying them to below the
2602 // finger. The global stack will be scanned to ensure that, if it
2603 // points to objects being copied, it will update their
2604 // location. There is a tricky situation with the gray objects in
2605 // region stack that are being coped, however. See the comment in
2606 // newCSet().
2608 void ConcurrentMark::newCSet() {
2609 if (!concurrent_marking_in_progress())
2610 // nothing to do if marking is not in progress
2611 return;
2613 // find what the lowest finger is among the global and local fingers
2614 _min_finger = _finger;
2615 for (int i = 0; i < (int)_max_task_num; ++i) {
2616 CMTask* task = _tasks[i];
2617 HeapWord* task_finger = task->finger();
2618 if (task_finger != NULL && task_finger < _min_finger)
2619 _min_finger = task_finger;
2620 }
2622 _should_gray_objects = false;
2624 // This fixes a very subtle and fustrating bug. It might be the case
2625 // that, during en evacuation pause, heap regions that contain
2626 // objects that are gray (by being in regions contained in the
2627 // region stack) are included in the collection set. Since such gray
2628 // objects will be moved, and because it's not easy to redirect
2629 // region stack entries to point to a new location (because objects
2630 // in one region might be scattered to multiple regions after they
2631 // are copied), one option is to ensure that all marked objects
2632 // copied during a pause are pushed on the stack. Notice, however,
2633 // that this problem can only happen when the region stack is not
2634 // empty during an evacuation pause. So, we make the fix a bit less
2635 // conservative and ensure that regions are pushed on the stack,
2636 // irrespective whether all collection set regions are below the
2637 // finger, if the region stack is not empty. This is expected to be
2638 // a rare case, so I don't think it's necessary to be smarted about it.
2639 if (!region_stack_empty())
2640 _should_gray_objects = true;
2641 }
2643 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2644 if (!concurrent_marking_in_progress())
2645 return;
2647 HeapWord* region_end = hr->end();
2648 if (region_end > _min_finger)
2649 _should_gray_objects = true;
2650 }
2652 // abandon current marking iteration due to a Full GC
2653 void ConcurrentMark::abort() {
2654 // Clear all marks to force marking thread to do nothing
2655 _nextMarkBitMap->clearAll();
2656 // Empty mark stack
2657 clear_marking_state();
2658 for (int i = 0; i < (int)_max_task_num; ++i)
2659 _tasks[i]->clear_region_fields();
2660 _has_aborted = true;
2662 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2663 satb_mq_set.abandon_partial_marking();
2664 // This can be called either during or outside marking, we'll read
2665 // the expected_active value from the SATB queue set.
2666 satb_mq_set.set_active_all_threads(
2667 false, /* new active value */
2668 satb_mq_set.is_active() /* expected_active */);
2669 }
2671 static void print_ms_time_info(const char* prefix, const char* name,
2672 NumberSeq& ns) {
2673 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2674 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2675 if (ns.num() > 0) {
2676 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2677 prefix, ns.sd(), ns.maximum());
2678 }
2679 }
2681 void ConcurrentMark::print_summary_info() {
2682 gclog_or_tty->print_cr(" Concurrent marking:");
2683 print_ms_time_info(" ", "init marks", _init_times);
2684 print_ms_time_info(" ", "remarks", _remark_times);
2685 {
2686 print_ms_time_info(" ", "final marks", _remark_mark_times);
2687 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2689 }
2690 print_ms_time_info(" ", "cleanups", _cleanup_times);
2691 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2692 _total_counting_time,
2693 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2694 (double)_cleanup_times.num()
2695 : 0.0));
2696 if (G1ScrubRemSets) {
2697 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2698 _total_rs_scrub_time,
2699 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2700 (double)_cleanup_times.num()
2701 : 0.0));
2702 }
2703 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2704 (_init_times.sum() + _remark_times.sum() +
2705 _cleanup_times.sum())/1000.0);
2706 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2707 "(%8.2f s marking, %8.2f s counting).",
2708 cmThread()->vtime_accum(),
2709 cmThread()->vtime_mark_accum(),
2710 cmThread()->vtime_count_accum());
2711 }
2713 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2714 _parallel_workers->print_worker_threads_on(st);
2715 }
2717 // Closures
2718 // XXX: there seems to be a lot of code duplication here;
2719 // should refactor and consolidate the shared code.
2721 // This closure is used to mark refs into the CMS generation in
2722 // the CMS bit map. Called at the first checkpoint.
2724 // We take a break if someone is trying to stop the world.
2725 bool ConcurrentMark::do_yield_check(int worker_i) {
2726 if (should_yield()) {
2727 if (worker_i == 0)
2728 _g1h->g1_policy()->record_concurrent_pause();
2729 cmThread()->yield();
2730 if (worker_i == 0)
2731 _g1h->g1_policy()->record_concurrent_pause_end();
2732 return true;
2733 } else {
2734 return false;
2735 }
2736 }
2738 bool ConcurrentMark::should_yield() {
2739 return cmThread()->should_yield();
2740 }
2742 bool ConcurrentMark::containing_card_is_marked(void* p) {
2743 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2744 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2745 }
2747 bool ConcurrentMark::containing_cards_are_marked(void* start,
2748 void* last) {
2749 return
2750 containing_card_is_marked(start) &&
2751 containing_card_is_marked(last);
2752 }
2754 #ifndef PRODUCT
2755 // for debugging purposes
2756 void ConcurrentMark::print_finger() {
2757 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2758 _heap_start, _heap_end, _finger);
2759 for (int i = 0; i < (int) _max_task_num; ++i) {
2760 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2761 }
2762 gclog_or_tty->print_cr("");
2763 }
2764 #endif
2766 // Closure for iteration over bitmaps
2767 class CMBitMapClosure : public BitMapClosure {
2768 private:
2769 // the bitmap that is being iterated over
2770 CMBitMap* _nextMarkBitMap;
2771 ConcurrentMark* _cm;
2772 CMTask* _task;
2773 // true if we're scanning a heap region claimed by the task (so that
2774 // we move the finger along), false if we're not, i.e. currently when
2775 // scanning a heap region popped from the region stack (so that we
2776 // do not move the task finger along; it'd be a mistake if we did so).
2777 bool _scanning_heap_region;
2779 public:
2780 CMBitMapClosure(CMTask *task,
2781 ConcurrentMark* cm,
2782 CMBitMap* nextMarkBitMap)
2783 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2785 void set_scanning_heap_region(bool scanning_heap_region) {
2786 _scanning_heap_region = scanning_heap_region;
2787 }
2789 bool do_bit(size_t offset) {
2790 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2791 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2792 assert( addr < _cm->finger(), "invariant");
2794 if (_scanning_heap_region) {
2795 statsOnly( _task->increase_objs_found_on_bitmap() );
2796 assert(addr >= _task->finger(), "invariant");
2797 // We move that task's local finger along.
2798 _task->move_finger_to(addr);
2799 } else {
2800 // We move the task's region finger along.
2801 _task->move_region_finger_to(addr);
2802 }
2804 _task->scan_object(oop(addr));
2805 // we only partially drain the local queue and global stack
2806 _task->drain_local_queue(true);
2807 _task->drain_global_stack(true);
2809 // if the has_aborted flag has been raised, we need to bail out of
2810 // the iteration
2811 return !_task->has_aborted();
2812 }
2813 };
2815 // Closure for iterating over objects, currently only used for
2816 // processing SATB buffers.
2817 class CMObjectClosure : public ObjectClosure {
2818 private:
2819 CMTask* _task;
2821 public:
2822 void do_object(oop obj) {
2823 _task->deal_with_reference(obj);
2824 }
2826 CMObjectClosure(CMTask* task) : _task(task) { }
2827 };
2829 // Closure for iterating over object fields
2830 class CMOopClosure : public OopClosure {
2831 private:
2832 G1CollectedHeap* _g1h;
2833 ConcurrentMark* _cm;
2834 CMTask* _task;
2836 public:
2837 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2838 virtual void do_oop( oop* p) { do_oop_work(p); }
2840 template <class T> void do_oop_work(T* p) {
2841 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2842 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2843 "invariant");
2845 oop obj = oopDesc::load_decode_heap_oop(p);
2846 if (_cm->verbose_high())
2847 gclog_or_tty->print_cr("[%d] we're looking at location "
2848 "*"PTR_FORMAT" = "PTR_FORMAT,
2849 _task->task_id(), p, (void*) obj);
2850 _task->deal_with_reference(obj);
2851 }
2853 CMOopClosure(G1CollectedHeap* g1h,
2854 ConcurrentMark* cm,
2855 CMTask* task)
2856 : _g1h(g1h), _cm(cm), _task(task) { }
2857 };
2859 void CMTask::setup_for_region(HeapRegion* hr) {
2860 // Separated the asserts so that we know which one fires.
2861 assert(hr != NULL,
2862 "claim_region() should have filtered out continues humongous regions");
2863 assert(!hr->continuesHumongous(),
2864 "claim_region() should have filtered out continues humongous regions");
2866 if (_cm->verbose_low())
2867 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2868 _task_id, hr);
2870 _curr_region = hr;
2871 _finger = hr->bottom();
2872 update_region_limit();
2873 }
2875 void CMTask::update_region_limit() {
2876 HeapRegion* hr = _curr_region;
2877 HeapWord* bottom = hr->bottom();
2878 HeapWord* limit = hr->next_top_at_mark_start();
2880 if (limit == bottom) {
2881 if (_cm->verbose_low())
2882 gclog_or_tty->print_cr("[%d] found an empty region "
2883 "["PTR_FORMAT", "PTR_FORMAT")",
2884 _task_id, bottom, limit);
2885 // The region was collected underneath our feet.
2886 // We set the finger to bottom to ensure that the bitmap
2887 // iteration that will follow this will not do anything.
2888 // (this is not a condition that holds when we set the region up,
2889 // as the region is not supposed to be empty in the first place)
2890 _finger = bottom;
2891 } else if (limit >= _region_limit) {
2892 assert(limit >= _finger, "peace of mind");
2893 } else {
2894 assert(limit < _region_limit, "only way to get here");
2895 // This can happen under some pretty unusual circumstances. An
2896 // evacuation pause empties the region underneath our feet (NTAMS
2897 // at bottom). We then do some allocation in the region (NTAMS
2898 // stays at bottom), followed by the region being used as a GC
2899 // alloc region (NTAMS will move to top() and the objects
2900 // originally below it will be grayed). All objects now marked in
2901 // the region are explicitly grayed, if below the global finger,
2902 // and we do not need in fact to scan anything else. So, we simply
2903 // set _finger to be limit to ensure that the bitmap iteration
2904 // doesn't do anything.
2905 _finger = limit;
2906 }
2908 _region_limit = limit;
2909 }
2911 void CMTask::giveup_current_region() {
2912 assert(_curr_region != NULL, "invariant");
2913 if (_cm->verbose_low())
2914 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2915 _task_id, _curr_region);
2916 clear_region_fields();
2917 }
2919 void CMTask::clear_region_fields() {
2920 // Values for these three fields that indicate that we're not
2921 // holding on to a region.
2922 _curr_region = NULL;
2923 _finger = NULL;
2924 _region_limit = NULL;
2926 _region_finger = NULL;
2927 }
2929 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2930 guarantee(nextMarkBitMap != NULL, "invariant");
2932 if (_cm->verbose_low())
2933 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2935 _nextMarkBitMap = nextMarkBitMap;
2936 clear_region_fields();
2938 _calls = 0;
2939 _elapsed_time_ms = 0.0;
2940 _termination_time_ms = 0.0;
2941 _termination_start_time_ms = 0.0;
2943 #if _MARKING_STATS_
2944 _local_pushes = 0;
2945 _local_pops = 0;
2946 _local_max_size = 0;
2947 _objs_scanned = 0;
2948 _global_pushes = 0;
2949 _global_pops = 0;
2950 _global_max_size = 0;
2951 _global_transfers_to = 0;
2952 _global_transfers_from = 0;
2953 _region_stack_pops = 0;
2954 _regions_claimed = 0;
2955 _objs_found_on_bitmap = 0;
2956 _satb_buffers_processed = 0;
2957 _steal_attempts = 0;
2958 _steals = 0;
2959 _aborted = 0;
2960 _aborted_overflow = 0;
2961 _aborted_cm_aborted = 0;
2962 _aborted_yield = 0;
2963 _aborted_timed_out = 0;
2964 _aborted_satb = 0;
2965 _aborted_termination = 0;
2966 #endif // _MARKING_STATS_
2967 }
2969 bool CMTask::should_exit_termination() {
2970 regular_clock_call();
2971 // This is called when we are in the termination protocol. We should
2972 // quit if, for some reason, this task wants to abort or the global
2973 // stack is not empty (this means that we can get work from it).
2974 return !_cm->mark_stack_empty() || has_aborted();
2975 }
2977 // This determines whether the method below will check both the local
2978 // and global fingers when determining whether to push on the stack a
2979 // gray object (value 1) or whether it will only check the global one
2980 // (value 0). The tradeoffs are that the former will be a bit more
2981 // accurate and possibly push less on the stack, but it might also be
2982 // a little bit slower.
2984 #define _CHECK_BOTH_FINGERS_ 1
2986 void CMTask::deal_with_reference(oop obj) {
2987 if (_cm->verbose_high())
2988 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2989 _task_id, (void*) obj);
2991 ++_refs_reached;
2993 HeapWord* objAddr = (HeapWord*) obj;
2994 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2995 if (_g1h->is_in_g1_reserved(objAddr)) {
2996 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2997 HeapRegion* hr = _g1h->heap_region_containing(obj);
2998 if (_g1h->is_obj_ill(obj, hr)) {
2999 if (_cm->verbose_high())
3000 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3001 _task_id, (void*) obj);
3003 // we need to mark it first
3004 if (_nextMarkBitMap->parMark(objAddr)) {
3005 // No OrderAccess:store_load() is needed. It is implicit in the
3006 // CAS done in parMark(objAddr) above
3007 HeapWord* global_finger = _cm->finger();
3009 #if _CHECK_BOTH_FINGERS_
3010 // we will check both the local and global fingers
3012 if (_finger != NULL && objAddr < _finger) {
3013 if (_cm->verbose_high())
3014 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3015 "pushing it", _task_id, _finger);
3016 push(obj);
3017 } else if (_curr_region != NULL && objAddr < _region_limit) {
3018 // do nothing
3019 } else if (objAddr < global_finger) {
3020 // Notice that the global finger might be moving forward
3021 // concurrently. This is not a problem. In the worst case, we
3022 // mark the object while it is above the global finger and, by
3023 // the time we read the global finger, it has moved forward
3024 // passed this object. In this case, the object will probably
3025 // be visited when a task is scanning the region and will also
3026 // be pushed on the stack. So, some duplicate work, but no
3027 // correctness problems.
3029 if (_cm->verbose_high())
3030 gclog_or_tty->print_cr("[%d] below the global finger "
3031 "("PTR_FORMAT"), pushing it",
3032 _task_id, global_finger);
3033 push(obj);
3034 } else {
3035 // do nothing
3036 }
3037 #else // _CHECK_BOTH_FINGERS_
3038 // we will only check the global finger
3040 if (objAddr < global_finger) {
3041 // see long comment above
3043 if (_cm->verbose_high())
3044 gclog_or_tty->print_cr("[%d] below the global finger "
3045 "("PTR_FORMAT"), pushing it",
3046 _task_id, global_finger);
3047 push(obj);
3048 }
3049 #endif // _CHECK_BOTH_FINGERS_
3050 }
3051 }
3052 }
3053 }
3055 void CMTask::push(oop obj) {
3056 HeapWord* objAddr = (HeapWord*) obj;
3057 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3058 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3059 "invariant");
3060 assert(!_g1h->is_obj_ill(obj), "invariant");
3061 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3063 if (_cm->verbose_high())
3064 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3066 if (!_task_queue->push(obj)) {
3067 // The local task queue looks full. We need to push some entries
3068 // to the global stack.
3070 if (_cm->verbose_medium())
3071 gclog_or_tty->print_cr("[%d] task queue overflow, "
3072 "moving entries to the global stack",
3073 _task_id);
3074 move_entries_to_global_stack();
3076 // this should succeed since, even if we overflow the global
3077 // stack, we should have definitely removed some entries from the
3078 // local queue. So, there must be space on it.
3079 bool success = _task_queue->push(obj);
3080 assert(success, "invariant");
3081 }
3083 statsOnly( int tmp_size = _task_queue->size();
3084 if (tmp_size > _local_max_size)
3085 _local_max_size = tmp_size;
3086 ++_local_pushes );
3087 }
3089 void CMTask::reached_limit() {
3090 assert(_words_scanned >= _words_scanned_limit ||
3091 _refs_reached >= _refs_reached_limit ,
3092 "shouldn't have been called otherwise");
3093 regular_clock_call();
3094 }
3096 void CMTask::regular_clock_call() {
3097 if (has_aborted())
3098 return;
3100 // First, we need to recalculate the words scanned and refs reached
3101 // limits for the next clock call.
3102 recalculate_limits();
3104 // During the regular clock call we do the following
3106 // (1) If an overflow has been flagged, then we abort.
3107 if (_cm->has_overflown()) {
3108 set_has_aborted();
3109 return;
3110 }
3112 // If we are not concurrent (i.e. we're doing remark) we don't need
3113 // to check anything else. The other steps are only needed during
3114 // the concurrent marking phase.
3115 if (!concurrent())
3116 return;
3118 // (2) If marking has been aborted for Full GC, then we also abort.
3119 if (_cm->has_aborted()) {
3120 set_has_aborted();
3121 statsOnly( ++_aborted_cm_aborted );
3122 return;
3123 }
3125 double curr_time_ms = os::elapsedVTime() * 1000.0;
3127 // (3) If marking stats are enabled, then we update the step history.
3128 #if _MARKING_STATS_
3129 if (_words_scanned >= _words_scanned_limit)
3130 ++_clock_due_to_scanning;
3131 if (_refs_reached >= _refs_reached_limit)
3132 ++_clock_due_to_marking;
3134 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3135 _interval_start_time_ms = curr_time_ms;
3136 _all_clock_intervals_ms.add(last_interval_ms);
3138 if (_cm->verbose_medium()) {
3139 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3140 "scanned = %d%s, refs reached = %d%s",
3141 _task_id, last_interval_ms,
3142 _words_scanned,
3143 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3144 _refs_reached,
3145 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3146 }
3147 #endif // _MARKING_STATS_
3149 // (4) We check whether we should yield. If we have to, then we abort.
3150 if (_cm->should_yield()) {
3151 // We should yield. To do this we abort the task. The caller is
3152 // responsible for yielding.
3153 set_has_aborted();
3154 statsOnly( ++_aborted_yield );
3155 return;
3156 }
3158 // (5) We check whether we've reached our time quota. If we have,
3159 // then we abort.
3160 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3161 if (elapsed_time_ms > _time_target_ms) {
3162 set_has_aborted();
3163 _has_aborted_timed_out = true;
3164 statsOnly( ++_aborted_timed_out );
3165 return;
3166 }
3168 // (6) Finally, we check whether there are enough completed STAB
3169 // buffers available for processing. If there are, we abort.
3170 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3171 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3172 if (_cm->verbose_low())
3173 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3174 _task_id);
3175 // we do need to process SATB buffers, we'll abort and restart
3176 // the marking task to do so
3177 set_has_aborted();
3178 statsOnly( ++_aborted_satb );
3179 return;
3180 }
3181 }
3183 void CMTask::recalculate_limits() {
3184 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3185 _words_scanned_limit = _real_words_scanned_limit;
3187 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3188 _refs_reached_limit = _real_refs_reached_limit;
3189 }
3191 void CMTask::decrease_limits() {
3192 // This is called when we believe that we're going to do an infrequent
3193 // operation which will increase the per byte scanned cost (i.e. move
3194 // entries to/from the global stack). It basically tries to decrease the
3195 // scanning limit so that the clock is called earlier.
3197 if (_cm->verbose_medium())
3198 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3200 _words_scanned_limit = _real_words_scanned_limit -
3201 3 * words_scanned_period / 4;
3202 _refs_reached_limit = _real_refs_reached_limit -
3203 3 * refs_reached_period / 4;
3204 }
3206 void CMTask::move_entries_to_global_stack() {
3207 // local array where we'll store the entries that will be popped
3208 // from the local queue
3209 oop buffer[global_stack_transfer_size];
3211 int n = 0;
3212 oop obj;
3213 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3214 buffer[n] = obj;
3215 ++n;
3216 }
3218 if (n > 0) {
3219 // we popped at least one entry from the local queue
3221 statsOnly( ++_global_transfers_to; _local_pops += n );
3223 if (!_cm->mark_stack_push(buffer, n)) {
3224 if (_cm->verbose_low())
3225 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3226 set_has_aborted();
3227 } else {
3228 // the transfer was successful
3230 if (_cm->verbose_medium())
3231 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3232 _task_id, n);
3233 statsOnly( int tmp_size = _cm->mark_stack_size();
3234 if (tmp_size > _global_max_size)
3235 _global_max_size = tmp_size;
3236 _global_pushes += n );
3237 }
3238 }
3240 // this operation was quite expensive, so decrease the limits
3241 decrease_limits();
3242 }
3244 void CMTask::get_entries_from_global_stack() {
3245 // local array where we'll store the entries that will be popped
3246 // from the global stack.
3247 oop buffer[global_stack_transfer_size];
3248 int n;
3249 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3250 assert(n <= global_stack_transfer_size,
3251 "we should not pop more than the given limit");
3252 if (n > 0) {
3253 // yes, we did actually pop at least one entry
3255 statsOnly( ++_global_transfers_from; _global_pops += n );
3256 if (_cm->verbose_medium())
3257 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3258 _task_id, n);
3259 for (int i = 0; i < n; ++i) {
3260 bool success = _task_queue->push(buffer[i]);
3261 // We only call this when the local queue is empty or under a
3262 // given target limit. So, we do not expect this push to fail.
3263 assert(success, "invariant");
3264 }
3266 statsOnly( int tmp_size = _task_queue->size();
3267 if (tmp_size > _local_max_size)
3268 _local_max_size = tmp_size;
3269 _local_pushes += n );
3270 }
3272 // this operation was quite expensive, so decrease the limits
3273 decrease_limits();
3274 }
3276 void CMTask::drain_local_queue(bool partially) {
3277 if (has_aborted())
3278 return;
3280 // Decide what the target size is, depending whether we're going to
3281 // drain it partially (so that other tasks can steal if they run out
3282 // of things to do) or totally (at the very end).
3283 size_t target_size;
3284 if (partially)
3285 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3286 else
3287 target_size = 0;
3289 if (_task_queue->size() > target_size) {
3290 if (_cm->verbose_high())
3291 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3292 _task_id, target_size);
3294 oop obj;
3295 bool ret = _task_queue->pop_local(obj);
3296 while (ret) {
3297 statsOnly( ++_local_pops );
3299 if (_cm->verbose_high())
3300 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3301 (void*) obj);
3303 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3304 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3305 "invariant");
3307 scan_object(obj);
3309 if (_task_queue->size() <= target_size || has_aborted())
3310 ret = false;
3311 else
3312 ret = _task_queue->pop_local(obj);
3313 }
3315 if (_cm->verbose_high())
3316 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3317 _task_id, _task_queue->size());
3318 }
3319 }
3321 void CMTask::drain_global_stack(bool partially) {
3322 if (has_aborted())
3323 return;
3325 // We have a policy to drain the local queue before we attempt to
3326 // drain the global stack.
3327 assert(partially || _task_queue->size() == 0, "invariant");
3329 // Decide what the target size is, depending whether we're going to
3330 // drain it partially (so that other tasks can steal if they run out
3331 // of things to do) or totally (at the very end). Notice that,
3332 // because we move entries from the global stack in chunks or
3333 // because another task might be doing the same, we might in fact
3334 // drop below the target. But, this is not a problem.
3335 size_t target_size;
3336 if (partially)
3337 target_size = _cm->partial_mark_stack_size_target();
3338 else
3339 target_size = 0;
3341 if (_cm->mark_stack_size() > target_size) {
3342 if (_cm->verbose_low())
3343 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3344 _task_id, target_size);
3346 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3347 get_entries_from_global_stack();
3348 drain_local_queue(partially);
3349 }
3351 if (_cm->verbose_low())
3352 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3353 _task_id, _cm->mark_stack_size());
3354 }
3355 }
3357 // SATB Queue has several assumptions on whether to call the par or
3358 // non-par versions of the methods. this is why some of the code is
3359 // replicated. We should really get rid of the single-threaded version
3360 // of the code to simplify things.
3361 void CMTask::drain_satb_buffers() {
3362 if (has_aborted())
3363 return;
3365 // We set this so that the regular clock knows that we're in the
3366 // middle of draining buffers and doesn't set the abort flag when it
3367 // notices that SATB buffers are available for draining. It'd be
3368 // very counter productive if it did that. :-)
3369 _draining_satb_buffers = true;
3371 CMObjectClosure oc(this);
3372 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3373 if (ParallelGCThreads > 0)
3374 satb_mq_set.set_par_closure(_task_id, &oc);
3375 else
3376 satb_mq_set.set_closure(&oc);
3378 // This keeps claiming and applying the closure to completed buffers
3379 // until we run out of buffers or we need to abort.
3380 if (ParallelGCThreads > 0) {
3381 while (!has_aborted() &&
3382 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3383 if (_cm->verbose_medium())
3384 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3385 statsOnly( ++_satb_buffers_processed );
3386 regular_clock_call();
3387 }
3388 } else {
3389 while (!has_aborted() &&
3390 satb_mq_set.apply_closure_to_completed_buffer()) {
3391 if (_cm->verbose_medium())
3392 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3393 statsOnly( ++_satb_buffers_processed );
3394 regular_clock_call();
3395 }
3396 }
3398 if (!concurrent() && !has_aborted()) {
3399 // We should only do this during remark.
3400 if (ParallelGCThreads > 0)
3401 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3402 else
3403 satb_mq_set.iterate_closure_all_threads();
3404 }
3406 _draining_satb_buffers = false;
3408 assert(has_aborted() ||
3409 concurrent() ||
3410 satb_mq_set.completed_buffers_num() == 0, "invariant");
3412 if (ParallelGCThreads > 0)
3413 satb_mq_set.set_par_closure(_task_id, NULL);
3414 else
3415 satb_mq_set.set_closure(NULL);
3417 // again, this was a potentially expensive operation, decrease the
3418 // limits to get the regular clock call early
3419 decrease_limits();
3420 }
3422 void CMTask::drain_region_stack(BitMapClosure* bc) {
3423 if (has_aborted())
3424 return;
3426 assert(_region_finger == NULL,
3427 "it should be NULL when we're not scanning a region");
3429 if (!_cm->region_stack_empty()) {
3430 if (_cm->verbose_low())
3431 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3432 _task_id, _cm->region_stack_size());
3434 MemRegion mr = _cm->region_stack_pop_with_lock();
3435 // it returns MemRegion() if the pop fails
3436 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3438 while (mr.start() != NULL) {
3439 if (_cm->verbose_medium())
3440 gclog_or_tty->print_cr("[%d] we are scanning region "
3441 "["PTR_FORMAT", "PTR_FORMAT")",
3442 _task_id, mr.start(), mr.end());
3443 assert(mr.end() <= _cm->finger(),
3444 "otherwise the region shouldn't be on the stack");
3445 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3446 if (_nextMarkBitMap->iterate(bc, mr)) {
3447 assert(!has_aborted(),
3448 "cannot abort the task without aborting the bitmap iteration");
3450 // We finished iterating over the region without aborting.
3451 regular_clock_call();
3452 if (has_aborted())
3453 mr = MemRegion();
3454 else {
3455 mr = _cm->region_stack_pop_with_lock();
3456 // it returns MemRegion() if the pop fails
3457 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3458 }
3459 } else {
3460 assert(has_aborted(), "currently the only way to do so");
3462 // The only way to abort the bitmap iteration is to return
3463 // false from the do_bit() method. However, inside the
3464 // do_bit() method we move the _region_finger to point to the
3465 // object currently being looked at. So, if we bail out, we
3466 // have definitely set _region_finger to something non-null.
3467 assert(_region_finger != NULL, "invariant");
3469 // The iteration was actually aborted. So now _region_finger
3470 // points to the address of the object we last scanned. If we
3471 // leave it there, when we restart this task, we will rescan
3472 // the object. It is easy to avoid this. We move the finger by
3473 // enough to point to the next possible object header (the
3474 // bitmap knows by how much we need to move it as it knows its
3475 // granularity).
3476 MemRegion newRegion =
3477 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3479 if (!newRegion.is_empty()) {
3480 if (_cm->verbose_low()) {
3481 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3482 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3483 _task_id,
3484 newRegion.start(), newRegion.end());
3485 }
3486 // Now push the part of the region we didn't scan on the
3487 // region stack to make sure a task scans it later.
3488 _cm->region_stack_push_with_lock(newRegion);
3489 }
3490 // break from while
3491 mr = MemRegion();
3492 }
3493 _region_finger = NULL;
3494 }
3496 if (_cm->verbose_low())
3497 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3498 _task_id, _cm->region_stack_size());
3499 }
3500 }
3502 void CMTask::print_stats() {
3503 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3504 _task_id, _calls);
3505 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3506 _elapsed_time_ms, _termination_time_ms);
3507 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3508 _step_times_ms.num(), _step_times_ms.avg(),
3509 _step_times_ms.sd());
3510 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3511 _step_times_ms.maximum(), _step_times_ms.sum());
3513 #if _MARKING_STATS_
3514 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3515 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3516 _all_clock_intervals_ms.sd());
3517 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3518 _all_clock_intervals_ms.maximum(),
3519 _all_clock_intervals_ms.sum());
3520 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3521 _clock_due_to_scanning, _clock_due_to_marking);
3522 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3523 _objs_scanned, _objs_found_on_bitmap);
3524 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3525 _local_pushes, _local_pops, _local_max_size);
3526 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3527 _global_pushes, _global_pops, _global_max_size);
3528 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3529 _global_transfers_to,_global_transfers_from);
3530 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3531 _regions_claimed, _region_stack_pops);
3532 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3533 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3534 _steal_attempts, _steals);
3535 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3536 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3537 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3538 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3539 _aborted_timed_out, _aborted_satb, _aborted_termination);
3540 #endif // _MARKING_STATS_
3541 }
3543 /*****************************************************************************
3545 The do_marking_step(time_target_ms) method is the building block
3546 of the parallel marking framework. It can be called in parallel
3547 with other invocations of do_marking_step() on different tasks
3548 (but only one per task, obviously) and concurrently with the
3549 mutator threads, or during remark, hence it eliminates the need
3550 for two versions of the code. When called during remark, it will
3551 pick up from where the task left off during the concurrent marking
3552 phase. Interestingly, tasks are also claimable during evacuation
3553 pauses too, since do_marking_step() ensures that it aborts before
3554 it needs to yield.
3556 The data structures that is uses to do marking work are the
3557 following:
3559 (1) Marking Bitmap. If there are gray objects that appear only
3560 on the bitmap (this happens either when dealing with an overflow
3561 or when the initial marking phase has simply marked the roots
3562 and didn't push them on the stack), then tasks claim heap
3563 regions whose bitmap they then scan to find gray objects. A
3564 global finger indicates where the end of the last claimed region
3565 is. A local finger indicates how far into the region a task has
3566 scanned. The two fingers are used to determine how to gray an
3567 object (i.e. whether simply marking it is OK, as it will be
3568 visited by a task in the future, or whether it needs to be also
3569 pushed on a stack).
3571 (2) Local Queue. The local queue of the task which is accessed
3572 reasonably efficiently by the task. Other tasks can steal from
3573 it when they run out of work. Throughout the marking phase, a
3574 task attempts to keep its local queue short but not totally
3575 empty, so that entries are available for stealing by other
3576 tasks. Only when there is no more work, a task will totally
3577 drain its local queue.
3579 (3) Global Mark Stack. This handles local queue overflow. During
3580 marking only sets of entries are moved between it and the local
3581 queues, as access to it requires a mutex and more fine-grain
3582 interaction with it which might cause contention. If it
3583 overflows, then the marking phase should restart and iterate
3584 over the bitmap to identify gray objects. Throughout the marking
3585 phase, tasks attempt to keep the global mark stack at a small
3586 length but not totally empty, so that entries are available for
3587 popping by other tasks. Only when there is no more work, tasks
3588 will totally drain the global mark stack.
3590 (4) Global Region Stack. Entries on it correspond to areas of
3591 the bitmap that need to be scanned since they contain gray
3592 objects. Pushes on the region stack only happen during
3593 evacuation pauses and typically correspond to areas covered by
3594 GC LABS. If it overflows, then the marking phase should restart
3595 and iterate over the bitmap to identify gray objects. Tasks will
3596 try to totally drain the region stack as soon as possible.
3598 (5) SATB Buffer Queue. This is where completed SATB buffers are
3599 made available. Buffers are regularly removed from this queue
3600 and scanned for roots, so that the queue doesn't get too
3601 long. During remark, all completed buffers are processed, as
3602 well as the filled in parts of any uncompleted buffers.
3604 The do_marking_step() method tries to abort when the time target
3605 has been reached. There are a few other cases when the
3606 do_marking_step() method also aborts:
3608 (1) When the marking phase has been aborted (after a Full GC).
3610 (2) When a global overflow (either on the global stack or the
3611 region stack) has been triggered. Before the task aborts, it
3612 will actually sync up with the other tasks to ensure that all
3613 the marking data structures (local queues, stacks, fingers etc.)
3614 are re-initialised so that when do_marking_step() completes,
3615 the marking phase can immediately restart.
3617 (3) When enough completed SATB buffers are available. The
3618 do_marking_step() method only tries to drain SATB buffers right
3619 at the beginning. So, if enough buffers are available, the
3620 marking step aborts and the SATB buffers are processed at
3621 the beginning of the next invocation.
3623 (4) To yield. when we have to yield then we abort and yield
3624 right at the end of do_marking_step(). This saves us from a lot
3625 of hassle as, by yielding we might allow a Full GC. If this
3626 happens then objects will be compacted underneath our feet, the
3627 heap might shrink, etc. We save checking for this by just
3628 aborting and doing the yield right at the end.
3630 From the above it follows that the do_marking_step() method should
3631 be called in a loop (or, otherwise, regularly) until it completes.
3633 If a marking step completes without its has_aborted() flag being
3634 true, it means it has completed the current marking phase (and
3635 also all other marking tasks have done so and have all synced up).
3637 A method called regular_clock_call() is invoked "regularly" (in
3638 sub ms intervals) throughout marking. It is this clock method that
3639 checks all the abort conditions which were mentioned above and
3640 decides when the task should abort. A work-based scheme is used to
3641 trigger this clock method: when the number of object words the
3642 marking phase has scanned or the number of references the marking
3643 phase has visited reach a given limit. Additional invocations to
3644 the method clock have been planted in a few other strategic places
3645 too. The initial reason for the clock method was to avoid calling
3646 vtime too regularly, as it is quite expensive. So, once it was in
3647 place, it was natural to piggy-back all the other conditions on it
3648 too and not constantly check them throughout the code.
3650 *****************************************************************************/
3652 void CMTask::do_marking_step(double time_target_ms) {
3653 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3654 assert(concurrent() == _cm->concurrent(), "they should be the same");
3656 assert(concurrent() || _cm->region_stack_empty(),
3657 "the region stack should have been cleared before remark");
3658 assert(_region_finger == NULL,
3659 "this should be non-null only when a region is being scanned");
3661 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3662 assert(_task_queues != NULL, "invariant");
3663 assert(_task_queue != NULL, "invariant");
3664 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3666 assert(!_claimed,
3667 "only one thread should claim this task at any one time");
3669 // OK, this doesn't safeguard again all possible scenarios, as it is
3670 // possible for two threads to set the _claimed flag at the same
3671 // time. But it is only for debugging purposes anyway and it will
3672 // catch most problems.
3673 _claimed = true;
3675 _start_time_ms = os::elapsedVTime() * 1000.0;
3676 statsOnly( _interval_start_time_ms = _start_time_ms );
3678 double diff_prediction_ms =
3679 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3680 _time_target_ms = time_target_ms - diff_prediction_ms;
3682 // set up the variables that are used in the work-based scheme to
3683 // call the regular clock method
3684 _words_scanned = 0;
3685 _refs_reached = 0;
3686 recalculate_limits();
3688 // clear all flags
3689 clear_has_aborted();
3690 _has_aborted_timed_out = false;
3691 _draining_satb_buffers = false;
3693 ++_calls;
3695 if (_cm->verbose_low())
3696 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3697 "target = %1.2lfms >>>>>>>>>>",
3698 _task_id, _calls, _time_target_ms);
3700 // Set up the bitmap and oop closures. Anything that uses them is
3701 // eventually called from this method, so it is OK to allocate these
3702 // statically.
3703 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3704 CMOopClosure oop_closure(_g1h, _cm, this);
3705 set_oop_closure(&oop_closure);
3707 if (_cm->has_overflown()) {
3708 // This can happen if the region stack or the mark stack overflows
3709 // during a GC pause and this task, after a yield point,
3710 // restarts. We have to abort as we need to get into the overflow
3711 // protocol which happens right at the end of this task.
3712 set_has_aborted();
3713 }
3715 // First drain any available SATB buffers. After this, we will not
3716 // look at SATB buffers before the next invocation of this method.
3717 // If enough completed SATB buffers are queued up, the regular clock
3718 // will abort this task so that it restarts.
3719 drain_satb_buffers();
3720 // ...then partially drain the local queue and the global stack
3721 drain_local_queue(true);
3722 drain_global_stack(true);
3724 // Then totally drain the region stack. We will not look at
3725 // it again before the next invocation of this method. Entries on
3726 // the region stack are only added during evacuation pauses, for
3727 // which we have to yield. When we do, we abort the task anyway so
3728 // it will look at the region stack again when it restarts.
3729 bitmap_closure.set_scanning_heap_region(false);
3730 drain_region_stack(&bitmap_closure);
3731 // ...then partially drain the local queue and the global stack
3732 drain_local_queue(true);
3733 drain_global_stack(true);
3735 do {
3736 if (!has_aborted() && _curr_region != NULL) {
3737 // This means that we're already holding on to a region.
3738 assert(_finger != NULL, "if region is not NULL, then the finger "
3739 "should not be NULL either");
3741 // We might have restarted this task after an evacuation pause
3742 // which might have evacuated the region we're holding on to
3743 // underneath our feet. Let's read its limit again to make sure
3744 // that we do not iterate over a region of the heap that
3745 // contains garbage (update_region_limit() will also move
3746 // _finger to the start of the region if it is found empty).
3747 update_region_limit();
3748 // We will start from _finger not from the start of the region,
3749 // as we might be restarting this task after aborting half-way
3750 // through scanning this region. In this case, _finger points to
3751 // the address where we last found a marked object. If this is a
3752 // fresh region, _finger points to start().
3753 MemRegion mr = MemRegion(_finger, _region_limit);
3755 if (_cm->verbose_low())
3756 gclog_or_tty->print_cr("[%d] we're scanning part "
3757 "["PTR_FORMAT", "PTR_FORMAT") "
3758 "of region "PTR_FORMAT,
3759 _task_id, _finger, _region_limit, _curr_region);
3761 // Let's iterate over the bitmap of the part of the
3762 // region that is left.
3763 bitmap_closure.set_scanning_heap_region(true);
3764 if (mr.is_empty() ||
3765 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3766 // We successfully completed iterating over the region. Now,
3767 // let's give up the region.
3768 giveup_current_region();
3769 regular_clock_call();
3770 } else {
3771 assert(has_aborted(), "currently the only way to do so");
3772 // The only way to abort the bitmap iteration is to return
3773 // false from the do_bit() method. However, inside the
3774 // do_bit() method we move the _finger to point to the
3775 // object currently being looked at. So, if we bail out, we
3776 // have definitely set _finger to something non-null.
3777 assert(_finger != NULL, "invariant");
3779 // Region iteration was actually aborted. So now _finger
3780 // points to the address of the object we last scanned. If we
3781 // leave it there, when we restart this task, we will rescan
3782 // the object. It is easy to avoid this. We move the finger by
3783 // enough to point to the next possible object header (the
3784 // bitmap knows by how much we need to move it as it knows its
3785 // granularity).
3786 assert(_finger < _region_limit, "invariant");
3787 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3788 // Check if bitmap iteration was aborted while scanning the last object
3789 if (new_finger >= _region_limit) {
3790 giveup_current_region();
3791 } else {
3792 move_finger_to(new_finger);
3793 }
3794 }
3795 }
3796 // At this point we have either completed iterating over the
3797 // region we were holding on to, or we have aborted.
3799 // We then partially drain the local queue and the global stack.
3800 // (Do we really need this?)
3801 drain_local_queue(true);
3802 drain_global_stack(true);
3804 // Read the note on the claim_region() method on why it might
3805 // return NULL with potentially more regions available for
3806 // claiming and why we have to check out_of_regions() to determine
3807 // whether we're done or not.
3808 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3809 // We are going to try to claim a new region. We should have
3810 // given up on the previous one.
3811 // Separated the asserts so that we know which one fires.
3812 assert(_curr_region == NULL, "invariant");
3813 assert(_finger == NULL, "invariant");
3814 assert(_region_limit == NULL, "invariant");
3815 if (_cm->verbose_low())
3816 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3817 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3818 if (claimed_region != NULL) {
3819 // Yes, we managed to claim one
3820 statsOnly( ++_regions_claimed );
3822 if (_cm->verbose_low())
3823 gclog_or_tty->print_cr("[%d] we successfully claimed "
3824 "region "PTR_FORMAT,
3825 _task_id, claimed_region);
3827 setup_for_region(claimed_region);
3828 assert(_curr_region == claimed_region, "invariant");
3829 }
3830 // It is important to call the regular clock here. It might take
3831 // a while to claim a region if, for example, we hit a large
3832 // block of empty regions. So we need to call the regular clock
3833 // method once round the loop to make sure it's called
3834 // frequently enough.
3835 regular_clock_call();
3836 }
3838 if (!has_aborted() && _curr_region == NULL) {
3839 assert(_cm->out_of_regions(),
3840 "at this point we should be out of regions");
3841 }
3842 } while ( _curr_region != NULL && !has_aborted());
3844 if (!has_aborted()) {
3845 // We cannot check whether the global stack is empty, since other
3846 // tasks might be pushing objects to it concurrently. We also cannot
3847 // check if the region stack is empty because if a thread is aborting
3848 // it can push a partially done region back.
3849 assert(_cm->out_of_regions(),
3850 "at this point we should be out of regions");
3852 if (_cm->verbose_low())
3853 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3855 // Try to reduce the number of available SATB buffers so that
3856 // remark has less work to do.
3857 drain_satb_buffers();
3858 }
3860 // Since we've done everything else, we can now totally drain the
3861 // local queue and global stack.
3862 drain_local_queue(false);
3863 drain_global_stack(false);
3865 // Attempt at work stealing from other task's queues.
3866 if (!has_aborted()) {
3867 // We have not aborted. This means that we have finished all that
3868 // we could. Let's try to do some stealing...
3870 // We cannot check whether the global stack is empty, since other
3871 // tasks might be pushing objects to it concurrently. We also cannot
3872 // check if the region stack is empty because if a thread is aborting
3873 // it can push a partially done region back.
3874 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3875 "only way to reach here");
3877 if (_cm->verbose_low())
3878 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3880 while (!has_aborted()) {
3881 oop obj;
3882 statsOnly( ++_steal_attempts );
3884 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3885 if (_cm->verbose_medium())
3886 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3887 _task_id, (void*) obj);
3889 statsOnly( ++_steals );
3891 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3892 "any stolen object should be marked");
3893 scan_object(obj);
3895 // And since we're towards the end, let's totally drain the
3896 // local queue and global stack.
3897 drain_local_queue(false);
3898 drain_global_stack(false);
3899 } else {
3900 break;
3901 }
3902 }
3903 }
3905 // We still haven't aborted. Now, let's try to get into the
3906 // termination protocol.
3907 if (!has_aborted()) {
3908 // We cannot check whether the global stack is empty, since other
3909 // tasks might be concurrently pushing objects on it. We also cannot
3910 // check if the region stack is empty because if a thread is aborting
3911 // it can push a partially done region back.
3912 // Separated the asserts so that we know which one fires.
3913 assert(_cm->out_of_regions(), "only way to reach here");
3914 assert(_task_queue->size() == 0, "only way to reach here");
3916 if (_cm->verbose_low())
3917 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3919 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3920 // The CMTask class also extends the TerminatorTerminator class,
3921 // hence its should_exit_termination() method will also decide
3922 // whether to exit the termination protocol or not.
3923 bool finished = _cm->terminator()->offer_termination(this);
3924 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3925 _termination_time_ms +=
3926 termination_end_time_ms - _termination_start_time_ms;
3928 if (finished) {
3929 // We're all done.
3931 if (_task_id == 0) {
3932 // let's allow task 0 to do this
3933 if (concurrent()) {
3934 assert(_cm->concurrent_marking_in_progress(), "invariant");
3935 // we need to set this to false before the next
3936 // safepoint. This way we ensure that the marking phase
3937 // doesn't observe any more heap expansions.
3938 _cm->clear_concurrent_marking_in_progress();
3939 }
3940 }
3942 // We can now guarantee that the global stack is empty, since
3943 // all other tasks have finished. We separated the guarantees so
3944 // that, if a condition is false, we can immediately find out
3945 // which one.
3946 guarantee(_cm->out_of_regions(), "only way to reach here");
3947 guarantee(_cm->region_stack_empty(), "only way to reach here");
3948 guarantee(_cm->mark_stack_empty(), "only way to reach here");
3949 guarantee(_task_queue->size() == 0, "only way to reach here");
3950 guarantee(!_cm->has_overflown(), "only way to reach here");
3951 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3952 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3954 if (_cm->verbose_low())
3955 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3956 } else {
3957 // Apparently there's more work to do. Let's abort this task. It
3958 // will restart it and we can hopefully find more things to do.
3960 if (_cm->verbose_low())
3961 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3963 set_has_aborted();
3964 statsOnly( ++_aborted_termination );
3965 }
3966 }
3968 // Mainly for debugging purposes to make sure that a pointer to the
3969 // closure which was statically allocated in this frame doesn't
3970 // escape it by accident.
3971 set_oop_closure(NULL);
3972 double end_time_ms = os::elapsedVTime() * 1000.0;
3973 double elapsed_time_ms = end_time_ms - _start_time_ms;
3974 // Update the step history.
3975 _step_times_ms.add(elapsed_time_ms);
3977 if (has_aborted()) {
3978 // The task was aborted for some reason.
3980 statsOnly( ++_aborted );
3982 if (_has_aborted_timed_out) {
3983 double diff_ms = elapsed_time_ms - _time_target_ms;
3984 // Keep statistics of how well we did with respect to hitting
3985 // our target only if we actually timed out (if we aborted for
3986 // other reasons, then the results might get skewed).
3987 _marking_step_diffs_ms.add(diff_ms);
3988 }
3990 if (_cm->has_overflown()) {
3991 // This is the interesting one. We aborted because a global
3992 // overflow was raised. This means we have to restart the
3993 // marking phase and start iterating over regions. However, in
3994 // order to do this we have to make sure that all tasks stop
3995 // what they are doing and re-initialise in a safe manner. We
3996 // will achieve this with the use of two barrier sync points.
3998 if (_cm->verbose_low())
3999 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4001 _cm->enter_first_sync_barrier(_task_id);
4002 // When we exit this sync barrier we know that all tasks have
4003 // stopped doing marking work. So, it's now safe to
4004 // re-initialise our data structures. At the end of this method,
4005 // task 0 will clear the global data structures.
4007 statsOnly( ++_aborted_overflow );
4009 // We clear the local state of this task...
4010 clear_region_fields();
4012 // ...and enter the second barrier.
4013 _cm->enter_second_sync_barrier(_task_id);
4014 // At this point everything has bee re-initialised and we're
4015 // ready to restart.
4016 }
4018 if (_cm->verbose_low()) {
4019 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4020 "elapsed = %1.2lfms <<<<<<<<<<",
4021 _task_id, _time_target_ms, elapsed_time_ms);
4022 if (_cm->has_aborted())
4023 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4024 _task_id);
4025 }
4026 } else {
4027 if (_cm->verbose_low())
4028 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4029 "elapsed = %1.2lfms <<<<<<<<<<",
4030 _task_id, _time_target_ms, elapsed_time_ms);
4031 }
4033 _claimed = false;
4034 }
4036 CMTask::CMTask(int task_id,
4037 ConcurrentMark* cm,
4038 CMTaskQueue* task_queue,
4039 CMTaskQueueSet* task_queues)
4040 : _g1h(G1CollectedHeap::heap()),
4041 _task_id(task_id), _cm(cm),
4042 _claimed(false),
4043 _nextMarkBitMap(NULL), _hash_seed(17),
4044 _task_queue(task_queue),
4045 _task_queues(task_queues),
4046 _oop_closure(NULL) {
4047 guarantee(task_queue != NULL, "invariant");
4048 guarantee(task_queues != NULL, "invariant");
4050 statsOnly( _clock_due_to_scanning = 0;
4051 _clock_due_to_marking = 0 );
4053 _marking_step_diffs_ms.add(0.5);
4054 }