Tue, 12 Oct 2010 09:36:48 -0700
6971296: G1: simplify G1RemSet class hierarchy
Summary: Remove G1RemSet base class and StupidG1RemSet class; rename HRInto_G1RemSet to just G1RemSet.
Reviewed-by: ysr, tonyp
1 /*
2 * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 assert(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push_lock_free(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 jint index = _index;
286 if (index >= _capacity) {
287 _overflow = true;
288 return;
289 }
290 // Otherwise...
291 jint next_index = index+1;
292 jint res = Atomic::cmpxchg(next_index, &_index, index);
293 if (res == index) {
294 _base[index] = mr;
295 return;
296 }
297 // Otherwise, we need to try again.
298 }
299 }
301 // Lock-free pop of the region stack. Called during the concurrent
302 // marking / remark phases. Should only be called in tandem with
303 // other lock-free pops.
304 MemRegion CMRegionStack::pop_lock_free() {
305 while (true) {
306 jint index = _index;
308 if (index == 0) {
309 return MemRegion();
310 }
311 // Otherwise...
312 jint next_index = index-1;
313 jint res = Atomic::cmpxchg(next_index, &_index, index);
314 if (res == index) {
315 MemRegion mr = _base[next_index];
316 if (mr.start() != NULL) {
317 assert(mr.end() != NULL, "invariant");
318 assert(mr.word_size() > 0, "invariant");
319 return mr;
320 } else {
321 // that entry was invalidated... let's skip it
322 assert(mr.end() == NULL, "invariant");
323 }
324 }
325 // Otherwise, we need to try again.
326 }
327 }
329 #if 0
330 // The routines that manipulate the region stack with a lock are
331 // not currently used. They should be retained, however, as a
332 // diagnostic aid.
334 void CMRegionStack::push_with_lock(MemRegion mr) {
335 assert(mr.word_size() > 0, "Precondition");
336 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
338 if (isFull()) {
339 _overflow = true;
340 return;
341 }
343 _base[_index] = mr;
344 _index += 1;
345 }
347 MemRegion CMRegionStack::pop_with_lock() {
348 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
350 while (true) {
351 if (_index == 0) {
352 return MemRegion();
353 }
354 _index -= 1;
356 MemRegion mr = _base[_index];
357 if (mr.start() != NULL) {
358 assert(mr.end() != NULL, "invariant");
359 assert(mr.word_size() > 0, "invariant");
360 return mr;
361 } else {
362 // that entry was invalidated... let's skip it
363 assert(mr.end() == NULL, "invariant");
364 }
365 }
366 }
367 #endif
369 bool CMRegionStack::invalidate_entries_into_cset() {
370 bool result = false;
371 G1CollectedHeap* g1h = G1CollectedHeap::heap();
372 for (int i = 0; i < _oops_do_bound; ++i) {
373 MemRegion mr = _base[i];
374 if (mr.start() != NULL) {
375 assert(mr.end() != NULL, "invariant");
376 assert(mr.word_size() > 0, "invariant");
377 HeapRegion* hr = g1h->heap_region_containing(mr.start());
378 assert(hr != NULL, "invariant");
379 if (hr->in_collection_set()) {
380 // The region points into the collection set
381 _base[i] = MemRegion();
382 result = true;
383 }
384 } else {
385 // that entry was invalidated... let's skip it
386 assert(mr.end() == NULL, "invariant");
387 }
388 }
389 return result;
390 }
392 template<class OopClosureClass>
393 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
394 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
395 || SafepointSynchronize::is_at_safepoint(),
396 "Drain recursion must be yield-safe.");
397 bool res = true;
398 debug_only(_drain_in_progress = true);
399 debug_only(_drain_in_progress_yields = yield_after);
400 while (!isEmpty()) {
401 oop newOop = pop();
402 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
403 assert(newOop->is_oop(), "Expected an oop");
404 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
405 "only grey objects on this stack");
406 // iterate over the oops in this oop, marking and pushing
407 // the ones in CMS generation.
408 newOop->oop_iterate(cl);
409 if (yield_after && _cm->do_yield_check()) {
410 res = false; break;
411 }
412 }
413 debug_only(_drain_in_progress = false);
414 return res;
415 }
417 void CMMarkStack::oops_do(OopClosure* f) {
418 if (_index == 0) return;
419 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
420 "Bound must be set.");
421 for (int i = 0; i < _oops_do_bound; i++) {
422 f->do_oop(&_base[i]);
423 }
424 _oops_do_bound = -1;
425 }
427 bool ConcurrentMark::not_yet_marked(oop obj) const {
428 return (_g1h->is_obj_ill(obj)
429 || (_g1h->is_in_permanent(obj)
430 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
431 }
433 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
434 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
435 #endif // _MSC_VER
437 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
438 int max_regions) :
439 _markBitMap1(rs, MinObjAlignment - 1),
440 _markBitMap2(rs, MinObjAlignment - 1),
442 _parallel_marking_threads(0),
443 _sleep_factor(0.0),
444 _marking_task_overhead(1.0),
445 _cleanup_sleep_factor(0.0),
446 _cleanup_task_overhead(1.0),
447 _region_bm(max_regions, false /* in_resource_area*/),
448 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
449 CardTableModRefBS::card_shift,
450 false /* in_resource_area*/),
451 _prevMarkBitMap(&_markBitMap1),
452 _nextMarkBitMap(&_markBitMap2),
453 _at_least_one_mark_complete(false),
455 _markStack(this),
456 _regionStack(),
457 // _finger set in set_non_marking_state
459 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
460 // _active_tasks set in set_non_marking_state
461 // _tasks set inside the constructor
462 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
463 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
465 _has_overflown(false),
466 _concurrent(false),
467 _has_aborted(false),
468 _restart_for_overflow(false),
469 _concurrent_marking_in_progress(false),
470 _should_gray_objects(false),
472 // _verbose_level set below
474 _init_times(),
475 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
476 _cleanup_times(),
477 _total_counting_time(0.0),
478 _total_rs_scrub_time(0.0),
480 _parallel_workers(NULL)
481 {
482 CMVerboseLevel verbose_level =
483 (CMVerboseLevel) G1MarkingVerboseLevel;
484 if (verbose_level < no_verbose)
485 verbose_level = no_verbose;
486 if (verbose_level > high_verbose)
487 verbose_level = high_verbose;
488 _verbose_level = verbose_level;
490 if (verbose_low())
491 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
492 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
494 _markStack.allocate(MarkStackSize);
495 _regionStack.allocate(G1MarkRegionStackSize);
497 // Create & start a ConcurrentMark thread.
498 _cmThread = new ConcurrentMarkThread(this);
499 assert(cmThread() != NULL, "CM Thread should have been created");
500 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
502 _g1h = G1CollectedHeap::heap();
503 assert(CGC_lock != NULL, "Where's the CGC_lock?");
504 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
505 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
507 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
508 satb_qs.set_buffer_size(G1SATBBufferSize);
510 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
511 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
512 for (int i = 0 ; i < size; i++) {
513 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
514 }
516 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
517 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
519 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
520 _active_tasks = _max_task_num;
521 for (int i = 0; i < (int) _max_task_num; ++i) {
522 CMTaskQueue* task_queue = new CMTaskQueue();
523 task_queue->initialize();
524 _task_queues->register_queue(i, task_queue);
526 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
527 _accum_task_vtime[i] = 0.0;
528 }
530 if (ConcGCThreads > ParallelGCThreads) {
531 vm_exit_during_initialization("Can't have more ConcGCThreads "
532 "than ParallelGCThreads.");
533 }
534 if (ParallelGCThreads == 0) {
535 // if we are not running with any parallel GC threads we will not
536 // spawn any marking threads either
537 _parallel_marking_threads = 0;
538 _sleep_factor = 0.0;
539 _marking_task_overhead = 1.0;
540 } else {
541 if (ConcGCThreads > 0) {
542 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
543 // if both are set
545 _parallel_marking_threads = ConcGCThreads;
546 _sleep_factor = 0.0;
547 _marking_task_overhead = 1.0;
548 } else if (G1MarkingOverheadPercent > 0) {
549 // we will calculate the number of parallel marking threads
550 // based on a target overhead with respect to the soft real-time
551 // goal
553 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
554 double overall_cm_overhead =
555 (double) MaxGCPauseMillis * marking_overhead /
556 (double) GCPauseIntervalMillis;
557 double cpu_ratio = 1.0 / (double) os::processor_count();
558 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
559 double marking_task_overhead =
560 overall_cm_overhead / marking_thread_num *
561 (double) os::processor_count();
562 double sleep_factor =
563 (1.0 - marking_task_overhead) / marking_task_overhead;
565 _parallel_marking_threads = (size_t) marking_thread_num;
566 _sleep_factor = sleep_factor;
567 _marking_task_overhead = marking_task_overhead;
568 } else {
569 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
570 _sleep_factor = 0.0;
571 _marking_task_overhead = 1.0;
572 }
574 if (parallel_marking_threads() > 1)
575 _cleanup_task_overhead = 1.0;
576 else
577 _cleanup_task_overhead = marking_task_overhead();
578 _cleanup_sleep_factor =
579 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
581 #if 0
582 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
583 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
584 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
585 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
586 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
587 #endif
589 guarantee(parallel_marking_threads() > 0, "peace of mind");
590 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
591 (int) _parallel_marking_threads, false, true);
592 if (_parallel_workers == NULL) {
593 vm_exit_during_initialization("Failed necessary allocation.");
594 } else {
595 _parallel_workers->initialize_workers();
596 }
597 }
599 // so that the call below can read a sensible value
600 _heap_start = (HeapWord*) rs.base();
601 set_non_marking_state();
602 }
604 void ConcurrentMark::update_g1_committed(bool force) {
605 // If concurrent marking is not in progress, then we do not need to
606 // update _heap_end. This has a subtle and important
607 // side-effect. Imagine that two evacuation pauses happen between
608 // marking completion and remark. The first one can grow the
609 // heap (hence now the finger is below the heap end). Then, the
610 // second one could unnecessarily push regions on the region
611 // stack. This causes the invariant that the region stack is empty
612 // at the beginning of remark to be false. By ensuring that we do
613 // not observe heap expansions after marking is complete, then we do
614 // not have this problem.
615 if (!concurrent_marking_in_progress() && !force)
616 return;
618 MemRegion committed = _g1h->g1_committed();
619 assert(committed.start() == _heap_start, "start shouldn't change");
620 HeapWord* new_end = committed.end();
621 if (new_end > _heap_end) {
622 // The heap has been expanded.
624 _heap_end = new_end;
625 }
626 // Notice that the heap can also shrink. However, this only happens
627 // during a Full GC (at least currently) and the entire marking
628 // phase will bail out and the task will not be restarted. So, let's
629 // do nothing.
630 }
632 void ConcurrentMark::reset() {
633 // Starting values for these two. This should be called in a STW
634 // phase. CM will be notified of any future g1_committed expansions
635 // will be at the end of evacuation pauses, when tasks are
636 // inactive.
637 MemRegion committed = _g1h->g1_committed();
638 _heap_start = committed.start();
639 _heap_end = committed.end();
641 // Separated the asserts so that we know which one fires.
642 assert(_heap_start != NULL, "heap bounds should look ok");
643 assert(_heap_end != NULL, "heap bounds should look ok");
644 assert(_heap_start < _heap_end, "heap bounds should look ok");
646 // reset all the marking data structures and any necessary flags
647 clear_marking_state();
649 if (verbose_low())
650 gclog_or_tty->print_cr("[global] resetting");
652 // We do reset all of them, since different phases will use
653 // different number of active threads. So, it's easiest to have all
654 // of them ready.
655 for (int i = 0; i < (int) _max_task_num; ++i) {
656 _tasks[i]->reset(_nextMarkBitMap);
657 }
659 // we need this to make sure that the flag is on during the evac
660 // pause with initial mark piggy-backed
661 set_concurrent_marking_in_progress();
662 }
664 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
665 assert(active_tasks <= _max_task_num, "we should not have more");
667 _active_tasks = active_tasks;
668 // Need to update the three data structures below according to the
669 // number of active threads for this phase.
670 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
671 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
672 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
674 _concurrent = concurrent;
675 // We propagate this to all tasks, not just the active ones.
676 for (int i = 0; i < (int) _max_task_num; ++i)
677 _tasks[i]->set_concurrent(concurrent);
679 if (concurrent) {
680 set_concurrent_marking_in_progress();
681 } else {
682 // We currently assume that the concurrent flag has been set to
683 // false before we start remark. At this point we should also be
684 // in a STW phase.
685 assert(!concurrent_marking_in_progress(), "invariant");
686 assert(_finger == _heap_end, "only way to get here");
687 update_g1_committed(true);
688 }
689 }
691 void ConcurrentMark::set_non_marking_state() {
692 // We set the global marking state to some default values when we're
693 // not doing marking.
694 clear_marking_state();
695 _active_tasks = 0;
696 clear_concurrent_marking_in_progress();
697 }
699 ConcurrentMark::~ConcurrentMark() {
700 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
701 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
702 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
703 _par_cleanup_thread_state);
705 for (int i = 0; i < (int) _max_task_num; ++i) {
706 delete _task_queues->queue(i);
707 delete _tasks[i];
708 }
709 delete _task_queues;
710 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
711 }
713 // This closure is used to mark refs into the g1 generation
714 // from external roots in the CMS bit map.
715 // Called at the first checkpoint.
716 //
718 void ConcurrentMark::clearNextBitmap() {
719 G1CollectedHeap* g1h = G1CollectedHeap::heap();
720 G1CollectorPolicy* g1p = g1h->g1_policy();
722 // Make sure that the concurrent mark thread looks to still be in
723 // the current cycle.
724 guarantee(cmThread()->during_cycle(), "invariant");
726 // We are finishing up the current cycle by clearing the next
727 // marking bitmap and getting it ready for the next cycle. During
728 // this time no other cycle can start. So, let's make sure that this
729 // is the case.
730 guarantee(!g1h->mark_in_progress(), "invariant");
732 // clear the mark bitmap (no grey objects to start with).
733 // We need to do this in chunks and offer to yield in between
734 // each chunk.
735 HeapWord* start = _nextMarkBitMap->startWord();
736 HeapWord* end = _nextMarkBitMap->endWord();
737 HeapWord* cur = start;
738 size_t chunkSize = M;
739 while (cur < end) {
740 HeapWord* next = cur + chunkSize;
741 if (next > end)
742 next = end;
743 MemRegion mr(cur,next);
744 _nextMarkBitMap->clearRange(mr);
745 cur = next;
746 do_yield_check();
748 // Repeat the asserts from above. We'll do them as asserts here to
749 // minimize their overhead on the product. However, we'll have
750 // them as guarantees at the beginning / end of the bitmap
751 // clearing to get some checking in the product.
752 assert(cmThread()->during_cycle(), "invariant");
753 assert(!g1h->mark_in_progress(), "invariant");
754 }
756 // Repeat the asserts from above.
757 guarantee(cmThread()->during_cycle(), "invariant");
758 guarantee(!g1h->mark_in_progress(), "invariant");
759 }
761 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
762 public:
763 bool doHeapRegion(HeapRegion* r) {
764 if (!r->continuesHumongous()) {
765 r->note_start_of_marking(true);
766 }
767 return false;
768 }
769 };
771 void ConcurrentMark::checkpointRootsInitialPre() {
772 G1CollectedHeap* g1h = G1CollectedHeap::heap();
773 G1CollectorPolicy* g1p = g1h->g1_policy();
775 _has_aborted = false;
777 #ifndef PRODUCT
778 if (G1PrintReachableAtInitialMark) {
779 print_reachable("at-cycle-start",
780 true /* use_prev_marking */, true /* all */);
781 }
782 #endif
784 // Initialise marking structures. This has to be done in a STW phase.
785 reset();
786 }
788 class CMMarkRootsClosure: public OopsInGenClosure {
789 private:
790 ConcurrentMark* _cm;
791 G1CollectedHeap* _g1h;
792 bool _do_barrier;
794 public:
795 CMMarkRootsClosure(ConcurrentMark* cm,
796 G1CollectedHeap* g1h,
797 bool do_barrier) : _cm(cm), _g1h(g1h),
798 _do_barrier(do_barrier) { }
800 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
801 virtual void do_oop( oop* p) { do_oop_work(p); }
803 template <class T> void do_oop_work(T* p) {
804 T heap_oop = oopDesc::load_heap_oop(p);
805 if (!oopDesc::is_null(heap_oop)) {
806 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
807 assert(obj->is_oop() || obj->mark() == NULL,
808 "expected an oop, possibly with mark word displaced");
809 HeapWord* addr = (HeapWord*)obj;
810 if (_g1h->is_in_g1_reserved(addr)) {
811 _cm->grayRoot(obj);
812 }
813 }
814 if (_do_barrier) {
815 assert(!_g1h->is_in_g1_reserved(p),
816 "Should be called on external roots");
817 do_barrier(p);
818 }
819 }
820 };
822 void ConcurrentMark::checkpointRootsInitialPost() {
823 G1CollectedHeap* g1h = G1CollectedHeap::heap();
825 // For each region note start of marking.
826 NoteStartOfMarkHRClosure startcl;
827 g1h->heap_region_iterate(&startcl);
829 // Start weak-reference discovery.
830 ReferenceProcessor* rp = g1h->ref_processor();
831 rp->verify_no_references_recorded();
832 rp->enable_discovery(); // enable ("weak") refs discovery
833 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
835 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
836 // This is the start of the marking cycle, we're expected all
837 // threads to have SATB queues with active set to false.
838 satb_mq_set.set_active_all_threads(true, /* new active value */
839 false /* expected_active */);
841 // update_g1_committed() will be called at the end of an evac pause
842 // when marking is on. So, it's also called at the end of the
843 // initial-mark pause to update the heap end, if the heap expands
844 // during it. No need to call it here.
845 }
847 // Checkpoint the roots into this generation from outside
848 // this generation. [Note this initial checkpoint need only
849 // be approximate -- we'll do a catch up phase subsequently.]
850 void ConcurrentMark::checkpointRootsInitial() {
851 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
852 G1CollectedHeap* g1h = G1CollectedHeap::heap();
854 double start = os::elapsedTime();
856 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
857 g1p->record_concurrent_mark_init_start();
858 checkpointRootsInitialPre();
860 // YSR: when concurrent precleaning is in place, we'll
861 // need to clear the cached card table here
863 ResourceMark rm;
864 HandleMark hm;
866 g1h->ensure_parsability(false);
867 g1h->perm_gen()->save_marks();
869 CMMarkRootsClosure notOlder(this, g1h, false);
870 CMMarkRootsClosure older(this, g1h, true);
872 g1h->set_marking_started();
873 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
875 g1h->process_strong_roots(true, // activate StrongRootsScope
876 false, // fake perm gen collection
877 SharedHeap::SO_AllClasses,
878 ¬Older, // Regular roots
879 NULL, // do not visit active blobs
880 &older // Perm Gen Roots
881 );
882 checkpointRootsInitialPost();
884 // Statistics.
885 double end = os::elapsedTime();
886 _init_times.add((end - start) * 1000.0);
888 g1p->record_concurrent_mark_init_end();
889 }
891 /*
892 Notice that in the next two methods, we actually leave the STS
893 during the barrier sync and join it immediately afterwards. If we
894 do not do this, this then the following deadlock can occur: one
895 thread could be in the barrier sync code, waiting for the other
896 thread to also sync up, whereas another one could be trying to
897 yield, while also waiting for the other threads to sync up too.
899 Because the thread that does the sync barrier has left the STS, it
900 is possible to be suspended for a Full GC or an evacuation pause
901 could occur. This is actually safe, since the entering the sync
902 barrier is one of the last things do_marking_step() does, and it
903 doesn't manipulate any data structures afterwards.
904 */
906 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
907 if (verbose_low())
908 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
910 ConcurrentGCThread::stsLeave();
911 _first_overflow_barrier_sync.enter();
912 ConcurrentGCThread::stsJoin();
913 // at this point everyone should have synced up and not be doing any
914 // more work
916 if (verbose_low())
917 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
919 // let task 0 do this
920 if (task_num == 0) {
921 // task 0 is responsible for clearing the global data structures
922 clear_marking_state();
924 if (PrintGC) {
925 gclog_or_tty->date_stamp(PrintGCDateStamps);
926 gclog_or_tty->stamp(PrintGCTimeStamps);
927 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
928 }
929 }
931 // after this, each task should reset its own data structures then
932 // then go into the second barrier
933 }
935 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
936 if (verbose_low())
937 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
939 ConcurrentGCThread::stsLeave();
940 _second_overflow_barrier_sync.enter();
941 ConcurrentGCThread::stsJoin();
942 // at this point everything should be re-initialised and ready to go
944 if (verbose_low())
945 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
946 }
948 void ConcurrentMark::grayRoot(oop p) {
949 HeapWord* addr = (HeapWord*) p;
950 // We can't really check against _heap_start and _heap_end, since it
951 // is possible during an evacuation pause with piggy-backed
952 // initial-mark that the committed space is expanded during the
953 // pause without CM observing this change. So the assertions below
954 // is a bit conservative; but better than nothing.
955 assert(_g1h->g1_committed().contains(addr),
956 "address should be within the heap bounds");
958 if (!_nextMarkBitMap->isMarked(addr))
959 _nextMarkBitMap->parMark(addr);
960 }
962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
963 // The objects on the region have already been marked "in bulk" by
964 // the caller. We only need to decide whether to push the region on
965 // the region stack or not.
967 if (!concurrent_marking_in_progress() || !_should_gray_objects)
968 // We're done with marking and waiting for remark. We do not need to
969 // push anything else on the region stack.
970 return;
972 HeapWord* finger = _finger;
974 if (verbose_low())
975 gclog_or_tty->print_cr("[global] attempting to push "
976 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
977 PTR_FORMAT, mr.start(), mr.end(), finger);
979 if (mr.start() < finger) {
980 // The finger is always heap region aligned and it is not possible
981 // for mr to span heap regions.
982 assert(mr.end() <= finger, "invariant");
984 // Separated the asserts so that we know which one fires.
985 assert(mr.start() <= mr.end(),
986 "region boundaries should fall within the committed space");
987 assert(_heap_start <= mr.start(),
988 "region boundaries should fall within the committed space");
989 assert(mr.end() <= _heap_end,
990 "region boundaries should fall within the committed space");
991 if (verbose_low())
992 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
993 "below the finger, pushing it",
994 mr.start(), mr.end());
996 if (!region_stack_push_lock_free(mr)) {
997 if (verbose_low())
998 gclog_or_tty->print_cr("[global] region stack has overflown.");
999 }
1000 }
1001 }
1003 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1004 // The object is not marked by the caller. We need to at least mark
1005 // it and maybe push in on the stack.
1007 HeapWord* addr = (HeapWord*)p;
1008 if (!_nextMarkBitMap->isMarked(addr)) {
1009 // We definitely need to mark it, irrespective whether we bail out
1010 // because we're done with marking.
1011 if (_nextMarkBitMap->parMark(addr)) {
1012 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1013 // If we're done with concurrent marking and we're waiting for
1014 // remark, then we're not pushing anything on the stack.
1015 return;
1017 // No OrderAccess:store_load() is needed. It is implicit in the
1018 // CAS done in parMark(addr) above
1019 HeapWord* finger = _finger;
1021 if (addr < finger) {
1022 if (!mark_stack_push(oop(addr))) {
1023 if (verbose_low())
1024 gclog_or_tty->print_cr("[global] global stack overflow "
1025 "during parMark");
1026 }
1027 }
1028 }
1029 }
1030 }
1032 class CMConcurrentMarkingTask: public AbstractGangTask {
1033 private:
1034 ConcurrentMark* _cm;
1035 ConcurrentMarkThread* _cmt;
1037 public:
1038 void work(int worker_i) {
1039 assert(Thread::current()->is_ConcurrentGC_thread(),
1040 "this should only be done by a conc GC thread");
1042 double start_vtime = os::elapsedVTime();
1044 ConcurrentGCThread::stsJoin();
1046 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1047 CMTask* the_task = _cm->task(worker_i);
1048 the_task->record_start_time();
1049 if (!_cm->has_aborted()) {
1050 do {
1051 double start_vtime_sec = os::elapsedVTime();
1052 double start_time_sec = os::elapsedTime();
1053 the_task->do_marking_step(10.0);
1054 double end_time_sec = os::elapsedTime();
1055 double end_vtime_sec = os::elapsedVTime();
1056 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1057 double elapsed_time_sec = end_time_sec - start_time_sec;
1058 _cm->clear_has_overflown();
1060 bool ret = _cm->do_yield_check(worker_i);
1062 jlong sleep_time_ms;
1063 if (!_cm->has_aborted() && the_task->has_aborted()) {
1064 sleep_time_ms =
1065 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1066 ConcurrentGCThread::stsLeave();
1067 os::sleep(Thread::current(), sleep_time_ms, false);
1068 ConcurrentGCThread::stsJoin();
1069 }
1070 double end_time2_sec = os::elapsedTime();
1071 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1073 #if 0
1074 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1075 "overhead %1.4lf",
1076 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1077 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1078 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1079 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1080 #endif
1081 } while (!_cm->has_aborted() && the_task->has_aborted());
1082 }
1083 the_task->record_end_time();
1084 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1086 ConcurrentGCThread::stsLeave();
1088 double end_vtime = os::elapsedVTime();
1089 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1090 }
1092 CMConcurrentMarkingTask(ConcurrentMark* cm,
1093 ConcurrentMarkThread* cmt) :
1094 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1096 ~CMConcurrentMarkingTask() { }
1097 };
1099 void ConcurrentMark::markFromRoots() {
1100 // we might be tempted to assert that:
1101 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1102 // "inconsistent argument?");
1103 // However that wouldn't be right, because it's possible that
1104 // a safepoint is indeed in progress as a younger generation
1105 // stop-the-world GC happens even as we mark in this generation.
1107 _restart_for_overflow = false;
1109 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1111 CMConcurrentMarkingTask markingTask(this, cmThread());
1112 if (parallel_marking_threads() > 0)
1113 _parallel_workers->run_task(&markingTask);
1114 else
1115 markingTask.work(0);
1116 print_stats();
1117 }
1119 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1120 // world is stopped at this checkpoint
1121 assert(SafepointSynchronize::is_at_safepoint(),
1122 "world should be stopped");
1123 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1125 // If a full collection has happened, we shouldn't do this.
1126 if (has_aborted()) {
1127 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1128 return;
1129 }
1131 if (VerifyDuringGC) {
1132 HandleMark hm; // handle scope
1133 gclog_or_tty->print(" VerifyDuringGC:(before)");
1134 Universe::heap()->prepare_for_verify();
1135 Universe::verify(true, false, true);
1136 }
1138 G1CollectorPolicy* g1p = g1h->g1_policy();
1139 g1p->record_concurrent_mark_remark_start();
1141 double start = os::elapsedTime();
1143 checkpointRootsFinalWork();
1145 double mark_work_end = os::elapsedTime();
1147 weakRefsWork(clear_all_soft_refs);
1149 if (has_overflown()) {
1150 // Oops. We overflowed. Restart concurrent marking.
1151 _restart_for_overflow = true;
1152 // Clear the flag. We do not need it any more.
1153 clear_has_overflown();
1154 if (G1TraceMarkStackOverflow)
1155 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1156 } else {
1157 // We're done with marking.
1158 // This is the end of the marking cycle, we're expected all
1159 // threads to have SATB queues with active set to true.
1160 JavaThread::satb_mark_queue_set().set_active_all_threads(
1161 false, /* new active value */
1162 true /* expected_active */);
1164 if (VerifyDuringGC) {
1165 HandleMark hm; // handle scope
1166 gclog_or_tty->print(" VerifyDuringGC:(after)");
1167 Universe::heap()->prepare_for_verify();
1168 Universe::heap()->verify(/* allow_dirty */ true,
1169 /* silent */ false,
1170 /* use_prev_marking */ false);
1171 }
1172 }
1174 #if VERIFY_OBJS_PROCESSED
1175 _scan_obj_cl.objs_processed = 0;
1176 ThreadLocalObjQueue::objs_enqueued = 0;
1177 #endif
1179 // Statistics
1180 double now = os::elapsedTime();
1181 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1182 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1183 _remark_times.add((now - start) * 1000.0);
1185 g1p->record_concurrent_mark_remark_end();
1186 }
1189 #define CARD_BM_TEST_MODE 0
1191 class CalcLiveObjectsClosure: public HeapRegionClosure {
1193 CMBitMapRO* _bm;
1194 ConcurrentMark* _cm;
1195 bool _changed;
1196 bool _yield;
1197 size_t _words_done;
1198 size_t _tot_live;
1199 size_t _tot_used;
1200 size_t _regions_done;
1201 double _start_vtime_sec;
1203 BitMap* _region_bm;
1204 BitMap* _card_bm;
1205 intptr_t _bottom_card_num;
1206 bool _final;
1208 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1209 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1210 #if CARD_BM_TEST_MODE
1211 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1212 #else
1213 _card_bm->par_at_put(i - _bottom_card_num, 1);
1214 #endif
1215 }
1216 }
1218 public:
1219 CalcLiveObjectsClosure(bool final,
1220 CMBitMapRO *bm, ConcurrentMark *cm,
1221 BitMap* region_bm, BitMap* card_bm) :
1222 _bm(bm), _cm(cm), _changed(false), _yield(true),
1223 _words_done(0), _tot_live(0), _tot_used(0),
1224 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1225 _regions_done(0), _start_vtime_sec(0.0)
1226 {
1227 _bottom_card_num =
1228 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1229 CardTableModRefBS::card_shift);
1230 }
1232 // It takes a region that's not empty (i.e., it has at least one
1233 // live object in it and sets its corresponding bit on the region
1234 // bitmap to 1. If the region is "starts humongous" it will also set
1235 // to 1 the bits on the region bitmap that correspond to its
1236 // associated "continues humongous" regions.
1237 void set_bit_for_region(HeapRegion* hr) {
1238 assert(!hr->continuesHumongous(), "should have filtered those out");
1240 size_t index = hr->hrs_index();
1241 if (!hr->startsHumongous()) {
1242 // Normal (non-humongous) case: just set the bit.
1243 _region_bm->par_at_put((BitMap::idx_t) index, true);
1244 } else {
1245 // Starts humongous case: calculate how many regions are part of
1246 // this humongous region and then set the bit range. It might
1247 // have been a bit more efficient to look at the object that
1248 // spans these humongous regions to calculate their number from
1249 // the object's size. However, it's a good idea to calculate
1250 // this based on the metadata itself, and not the region
1251 // contents, so that this code is not aware of what goes into
1252 // the humongous regions (in case this changes in the future).
1253 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1254 size_t end_index = index + 1;
1255 while (end_index < g1h->n_regions()) {
1256 HeapRegion* chr = g1h->region_at(end_index);
1257 if (!chr->continuesHumongous()) {
1258 break;
1259 }
1260 end_index += 1;
1261 }
1262 _region_bm->par_at_put_range((BitMap::idx_t) index,
1263 (BitMap::idx_t) end_index, true);
1264 }
1265 }
1267 bool doHeapRegion(HeapRegion* hr) {
1268 if (!_final && _regions_done == 0)
1269 _start_vtime_sec = os::elapsedVTime();
1271 if (hr->continuesHumongous()) {
1272 // We will ignore these here and process them when their
1273 // associated "starts humongous" region is processed (see
1274 // set_bit_for_heap_region()). Note that we cannot rely on their
1275 // associated "starts humongous" region to have their bit set to
1276 // 1 since, due to the region chunking in the parallel region
1277 // iteration, a "continues humongous" region might be visited
1278 // before its associated "starts humongous".
1279 return false;
1280 }
1282 HeapWord* nextTop = hr->next_top_at_mark_start();
1283 HeapWord* start = hr->top_at_conc_mark_count();
1284 assert(hr->bottom() <= start && start <= hr->end() &&
1285 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1286 start <= nextTop,
1287 "Preconditions.");
1288 // Otherwise, record the number of word's we'll examine.
1289 size_t words_done = (nextTop - start);
1290 // Find the first marked object at or after "start".
1291 start = _bm->getNextMarkedWordAddress(start, nextTop);
1292 size_t marked_bytes = 0;
1294 // Below, the term "card num" means the result of shifting an address
1295 // by the card shift -- address 0 corresponds to card number 0. One
1296 // must subtract the card num of the bottom of the heap to obtain a
1297 // card table index.
1298 // The first card num of the sequence of live cards currently being
1299 // constructed. -1 ==> no sequence.
1300 intptr_t start_card_num = -1;
1301 // The last card num of the sequence of live cards currently being
1302 // constructed. -1 ==> no sequence.
1303 intptr_t last_card_num = -1;
1305 while (start < nextTop) {
1306 if (_yield && _cm->do_yield_check()) {
1307 // We yielded. It might be for a full collection, in which case
1308 // all bets are off; terminate the traversal.
1309 if (_cm->has_aborted()) {
1310 _changed = false;
1311 return true;
1312 } else {
1313 // Otherwise, it might be a collection pause, and the region
1314 // we're looking at might be in the collection set. We'll
1315 // abandon this region.
1316 return false;
1317 }
1318 }
1319 oop obj = oop(start);
1320 int obj_sz = obj->size();
1321 // The card num of the start of the current object.
1322 intptr_t obj_card_num =
1323 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1325 HeapWord* obj_last = start + obj_sz - 1;
1326 intptr_t obj_last_card_num =
1327 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1329 if (obj_card_num != last_card_num) {
1330 if (start_card_num == -1) {
1331 assert(last_card_num == -1, "Both or neither.");
1332 start_card_num = obj_card_num;
1333 } else {
1334 assert(last_card_num != -1, "Both or neither.");
1335 assert(obj_card_num >= last_card_num, "Inv");
1336 if ((obj_card_num - last_card_num) > 1) {
1337 // Mark the last run, and start a new one.
1338 mark_card_num_range(start_card_num, last_card_num);
1339 start_card_num = obj_card_num;
1340 }
1341 }
1342 #if CARD_BM_TEST_MODE
1343 /*
1344 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1345 obj_card_num - _bottom_card_num,
1346 obj_last_card_num - _bottom_card_num);
1347 */
1348 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1349 _card_bm->par_at_put(j - _bottom_card_num, 1);
1350 }
1351 #endif
1352 }
1353 // In any case, we set the last card num.
1354 last_card_num = obj_last_card_num;
1356 marked_bytes += (size_t)obj_sz * HeapWordSize;
1357 // Find the next marked object after this one.
1358 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1359 _changed = true;
1360 }
1361 // Handle the last range, if any.
1362 if (start_card_num != -1)
1363 mark_card_num_range(start_card_num, last_card_num);
1364 if (_final) {
1365 // Mark the allocated-since-marking portion...
1366 HeapWord* tp = hr->top();
1367 if (nextTop < tp) {
1368 start_card_num =
1369 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1370 last_card_num =
1371 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1372 mark_card_num_range(start_card_num, last_card_num);
1373 // This definitely means the region has live objects.
1374 set_bit_for_region(hr);
1375 }
1376 }
1378 hr->add_to_marked_bytes(marked_bytes);
1379 // Update the live region bitmap.
1380 if (marked_bytes > 0) {
1381 set_bit_for_region(hr);
1382 }
1383 hr->set_top_at_conc_mark_count(nextTop);
1384 _tot_live += hr->next_live_bytes();
1385 _tot_used += hr->used();
1386 _words_done = words_done;
1388 if (!_final) {
1389 ++_regions_done;
1390 if (_regions_done % 10 == 0) {
1391 double end_vtime_sec = os::elapsedVTime();
1392 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1393 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1394 jlong sleep_time_ms =
1395 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1396 os::sleep(Thread::current(), sleep_time_ms, false);
1397 _start_vtime_sec = end_vtime_sec;
1398 }
1399 }
1400 }
1402 return false;
1403 }
1405 bool changed() { return _changed; }
1406 void reset() { _changed = false; _words_done = 0; }
1407 void no_yield() { _yield = false; }
1408 size_t words_done() { return _words_done; }
1409 size_t tot_live() { return _tot_live; }
1410 size_t tot_used() { return _tot_used; }
1411 };
1414 void ConcurrentMark::calcDesiredRegions() {
1415 _region_bm.clear();
1416 _card_bm.clear();
1417 CalcLiveObjectsClosure calccl(false /*final*/,
1418 nextMarkBitMap(), this,
1419 &_region_bm, &_card_bm);
1420 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1421 g1h->heap_region_iterate(&calccl);
1423 do {
1424 calccl.reset();
1425 g1h->heap_region_iterate(&calccl);
1426 } while (calccl.changed());
1427 }
1429 class G1ParFinalCountTask: public AbstractGangTask {
1430 protected:
1431 G1CollectedHeap* _g1h;
1432 CMBitMap* _bm;
1433 size_t _n_workers;
1434 size_t *_live_bytes;
1435 size_t *_used_bytes;
1436 BitMap* _region_bm;
1437 BitMap* _card_bm;
1438 public:
1439 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1440 BitMap* region_bm, BitMap* card_bm) :
1441 AbstractGangTask("G1 final counting"), _g1h(g1h),
1442 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1443 {
1444 if (ParallelGCThreads > 0)
1445 _n_workers = _g1h->workers()->total_workers();
1446 else
1447 _n_workers = 1;
1448 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1449 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1450 }
1452 ~G1ParFinalCountTask() {
1453 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1454 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1455 }
1457 void work(int i) {
1458 CalcLiveObjectsClosure calccl(true /*final*/,
1459 _bm, _g1h->concurrent_mark(),
1460 _region_bm, _card_bm);
1461 calccl.no_yield();
1462 if (G1CollectedHeap::use_parallel_gc_threads()) {
1463 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1464 HeapRegion::FinalCountClaimValue);
1465 } else {
1466 _g1h->heap_region_iterate(&calccl);
1467 }
1468 assert(calccl.complete(), "Shouldn't have yielded!");
1470 assert((size_t) i < _n_workers, "invariant");
1471 _live_bytes[i] = calccl.tot_live();
1472 _used_bytes[i] = calccl.tot_used();
1473 }
1474 size_t live_bytes() {
1475 size_t live_bytes = 0;
1476 for (size_t i = 0; i < _n_workers; ++i)
1477 live_bytes += _live_bytes[i];
1478 return live_bytes;
1479 }
1480 size_t used_bytes() {
1481 size_t used_bytes = 0;
1482 for (size_t i = 0; i < _n_workers; ++i)
1483 used_bytes += _used_bytes[i];
1484 return used_bytes;
1485 }
1486 };
1488 class G1ParNoteEndTask;
1490 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1491 G1CollectedHeap* _g1;
1492 int _worker_num;
1493 size_t _max_live_bytes;
1494 size_t _regions_claimed;
1495 size_t _freed_bytes;
1496 size_t _cleared_h_regions;
1497 size_t _freed_regions;
1498 UncleanRegionList* _unclean_region_list;
1499 double _claimed_region_time;
1500 double _max_region_time;
1502 public:
1503 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1504 UncleanRegionList* list,
1505 int worker_num);
1506 size_t freed_bytes() { return _freed_bytes; }
1507 size_t cleared_h_regions() { return _cleared_h_regions; }
1508 size_t freed_regions() { return _freed_regions; }
1509 UncleanRegionList* unclean_region_list() {
1510 return _unclean_region_list;
1511 }
1513 bool doHeapRegion(HeapRegion *r);
1515 size_t max_live_bytes() { return _max_live_bytes; }
1516 size_t regions_claimed() { return _regions_claimed; }
1517 double claimed_region_time_sec() { return _claimed_region_time; }
1518 double max_region_time_sec() { return _max_region_time; }
1519 };
1521 class G1ParNoteEndTask: public AbstractGangTask {
1522 friend class G1NoteEndOfConcMarkClosure;
1523 protected:
1524 G1CollectedHeap* _g1h;
1525 size_t _max_live_bytes;
1526 size_t _freed_bytes;
1527 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1528 public:
1529 G1ParNoteEndTask(G1CollectedHeap* g1h,
1530 ConcurrentMark::ParCleanupThreadState**
1531 par_cleanup_thread_state) :
1532 AbstractGangTask("G1 note end"), _g1h(g1h),
1533 _max_live_bytes(0), _freed_bytes(0),
1534 _par_cleanup_thread_state(par_cleanup_thread_state)
1535 {}
1537 void work(int i) {
1538 double start = os::elapsedTime();
1539 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1540 &_par_cleanup_thread_state[i]->list,
1541 i);
1542 if (G1CollectedHeap::use_parallel_gc_threads()) {
1543 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1544 HeapRegion::NoteEndClaimValue);
1545 } else {
1546 _g1h->heap_region_iterate(&g1_note_end);
1547 }
1548 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1550 // Now finish up freeing the current thread's regions.
1551 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1552 g1_note_end.cleared_h_regions(),
1553 0, NULL);
1554 {
1555 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1556 _max_live_bytes += g1_note_end.max_live_bytes();
1557 _freed_bytes += g1_note_end.freed_bytes();
1558 }
1559 double end = os::elapsedTime();
1560 if (G1PrintParCleanupStats) {
1561 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1562 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1563 i, start, end, (end-start)*1000.0,
1564 g1_note_end.regions_claimed(),
1565 g1_note_end.claimed_region_time_sec()*1000.0,
1566 g1_note_end.max_region_time_sec()*1000.0);
1567 }
1568 }
1569 size_t max_live_bytes() { return _max_live_bytes; }
1570 size_t freed_bytes() { return _freed_bytes; }
1571 };
1573 class G1ParScrubRemSetTask: public AbstractGangTask {
1574 protected:
1575 G1RemSet* _g1rs;
1576 BitMap* _region_bm;
1577 BitMap* _card_bm;
1578 public:
1579 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1580 BitMap* region_bm, BitMap* card_bm) :
1581 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1582 _region_bm(region_bm), _card_bm(card_bm)
1583 {}
1585 void work(int i) {
1586 if (G1CollectedHeap::use_parallel_gc_threads()) {
1587 _g1rs->scrub_par(_region_bm, _card_bm, i,
1588 HeapRegion::ScrubRemSetClaimValue);
1589 } else {
1590 _g1rs->scrub(_region_bm, _card_bm);
1591 }
1592 }
1594 };
1596 G1NoteEndOfConcMarkClosure::
1597 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1598 UncleanRegionList* list,
1599 int worker_num)
1600 : _g1(g1), _worker_num(worker_num),
1601 _max_live_bytes(0), _regions_claimed(0),
1602 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1603 _claimed_region_time(0.0), _max_region_time(0.0),
1604 _unclean_region_list(list)
1605 {}
1607 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1608 // We use a claim value of zero here because all regions
1609 // were claimed with value 1 in the FinalCount task.
1610 r->reset_gc_time_stamp();
1611 if (!r->continuesHumongous()) {
1612 double start = os::elapsedTime();
1613 _regions_claimed++;
1614 r->note_end_of_marking();
1615 _max_live_bytes += r->max_live_bytes();
1616 _g1->free_region_if_totally_empty_work(r,
1617 _freed_bytes,
1618 _cleared_h_regions,
1619 _freed_regions,
1620 _unclean_region_list,
1621 true /*par*/);
1622 double region_time = (os::elapsedTime() - start);
1623 _claimed_region_time += region_time;
1624 if (region_time > _max_region_time) _max_region_time = region_time;
1625 }
1626 return false;
1627 }
1629 void ConcurrentMark::cleanup() {
1630 // world is stopped at this checkpoint
1631 assert(SafepointSynchronize::is_at_safepoint(),
1632 "world should be stopped");
1633 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1635 // If a full collection has happened, we shouldn't do this.
1636 if (has_aborted()) {
1637 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1638 return;
1639 }
1641 if (VerifyDuringGC) {
1642 HandleMark hm; // handle scope
1643 gclog_or_tty->print(" VerifyDuringGC:(before)");
1644 Universe::heap()->prepare_for_verify();
1645 Universe::verify(/* allow dirty */ true,
1646 /* silent */ false,
1647 /* prev marking */ true);
1648 }
1650 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1651 g1p->record_concurrent_mark_cleanup_start();
1653 double start = os::elapsedTime();
1655 // Do counting once more with the world stopped for good measure.
1656 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1657 &_region_bm, &_card_bm);
1658 if (G1CollectedHeap::use_parallel_gc_threads()) {
1659 assert(g1h->check_heap_region_claim_values(
1660 HeapRegion::InitialClaimValue),
1661 "sanity check");
1663 int n_workers = g1h->workers()->total_workers();
1664 g1h->set_par_threads(n_workers);
1665 g1h->workers()->run_task(&g1_par_count_task);
1666 g1h->set_par_threads(0);
1668 assert(g1h->check_heap_region_claim_values(
1669 HeapRegion::FinalCountClaimValue),
1670 "sanity check");
1671 } else {
1672 g1_par_count_task.work(0);
1673 }
1675 size_t known_garbage_bytes =
1676 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1677 #if 0
1678 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1679 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1680 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1681 (double) known_garbage_bytes / (double) (1024 * 1024));
1682 #endif // 0
1683 g1p->set_known_garbage_bytes(known_garbage_bytes);
1685 size_t start_used_bytes = g1h->used();
1686 _at_least_one_mark_complete = true;
1687 g1h->set_marking_complete();
1689 double count_end = os::elapsedTime();
1690 double this_final_counting_time = (count_end - start);
1691 if (G1PrintParCleanupStats) {
1692 gclog_or_tty->print_cr("Cleanup:");
1693 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1694 this_final_counting_time*1000.0);
1695 }
1696 _total_counting_time += this_final_counting_time;
1698 // Install newly created mark bitMap as "prev".
1699 swapMarkBitMaps();
1701 g1h->reset_gc_time_stamp();
1703 // Note end of marking in all heap regions.
1704 double note_end_start = os::elapsedTime();
1705 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1706 if (G1CollectedHeap::use_parallel_gc_threads()) {
1707 int n_workers = g1h->workers()->total_workers();
1708 g1h->set_par_threads(n_workers);
1709 g1h->workers()->run_task(&g1_par_note_end_task);
1710 g1h->set_par_threads(0);
1712 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1713 "sanity check");
1714 } else {
1715 g1_par_note_end_task.work(0);
1716 }
1717 g1h->set_unclean_regions_coming(true);
1718 double note_end_end = os::elapsedTime();
1719 // Tell the mutators that there might be unclean regions coming...
1720 if (G1PrintParCleanupStats) {
1721 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1722 (note_end_end - note_end_start)*1000.0);
1723 }
1726 // call below, since it affects the metric by which we sort the heap
1727 // regions.
1728 if (G1ScrubRemSets) {
1729 double rs_scrub_start = os::elapsedTime();
1730 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1731 if (G1CollectedHeap::use_parallel_gc_threads()) {
1732 int n_workers = g1h->workers()->total_workers();
1733 g1h->set_par_threads(n_workers);
1734 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1735 g1h->set_par_threads(0);
1737 assert(g1h->check_heap_region_claim_values(
1738 HeapRegion::ScrubRemSetClaimValue),
1739 "sanity check");
1740 } else {
1741 g1_par_scrub_rs_task.work(0);
1742 }
1744 double rs_scrub_end = os::elapsedTime();
1745 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1746 _total_rs_scrub_time += this_rs_scrub_time;
1747 }
1749 // this will also free any regions totally full of garbage objects,
1750 // and sort the regions.
1751 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1752 g1_par_note_end_task.freed_bytes(),
1753 g1_par_note_end_task.max_live_bytes());
1755 // Statistics.
1756 double end = os::elapsedTime();
1757 _cleanup_times.add((end - start) * 1000.0);
1759 // G1CollectedHeap::heap()->print();
1760 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1761 // G1CollectedHeap::heap()->get_gc_time_stamp());
1763 if (PrintGC || PrintGCDetails) {
1764 g1h->print_size_transition(gclog_or_tty,
1765 start_used_bytes,
1766 g1h->used(),
1767 g1h->capacity());
1768 }
1770 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1771 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1773 // We need to make this be a "collection" so any collection pause that
1774 // races with it goes around and waits for completeCleanup to finish.
1775 g1h->increment_total_collections();
1777 if (VerifyDuringGC) {
1778 HandleMark hm; // handle scope
1779 gclog_or_tty->print(" VerifyDuringGC:(after)");
1780 Universe::heap()->prepare_for_verify();
1781 Universe::verify(/* allow dirty */ true,
1782 /* silent */ false,
1783 /* prev marking */ true);
1784 }
1785 }
1787 void ConcurrentMark::completeCleanup() {
1788 // A full collection intervened.
1789 if (has_aborted()) return;
1791 int first = 0;
1792 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1793 for (int t = 0; t < last; t++) {
1794 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1795 assert(list->well_formed(), "Inv");
1796 HeapRegion* hd = list->hd();
1797 while (hd != NULL) {
1798 // Now finish up the other stuff.
1799 hd->rem_set()->clear();
1800 HeapRegion* next_hd = hd->next_from_unclean_list();
1801 (void)list->pop();
1802 assert(list->hd() == next_hd, "how not?");
1803 _g1h->put_region_on_unclean_list(hd);
1804 if (!hd->isHumongous()) {
1805 // Add this to the _free_regions count by 1.
1806 _g1h->finish_free_region_work(0, 0, 1, NULL);
1807 }
1808 hd = list->hd();
1809 assert(hd == next_hd, "how not?");
1810 }
1811 }
1812 }
1815 class G1CMIsAliveClosure: public BoolObjectClosure {
1816 G1CollectedHeap* _g1;
1817 public:
1818 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1819 _g1(g1)
1820 {}
1822 void do_object(oop obj) {
1823 assert(false, "not to be invoked");
1824 }
1825 bool do_object_b(oop obj) {
1826 HeapWord* addr = (HeapWord*)obj;
1827 return addr != NULL &&
1828 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1829 }
1830 };
1832 class G1CMKeepAliveClosure: public OopClosure {
1833 G1CollectedHeap* _g1;
1834 ConcurrentMark* _cm;
1835 CMBitMap* _bitMap;
1836 public:
1837 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1838 CMBitMap* bitMap) :
1839 _g1(g1), _cm(cm),
1840 _bitMap(bitMap) {}
1842 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1843 virtual void do_oop( oop* p) { do_oop_work(p); }
1845 template <class T> void do_oop_work(T* p) {
1846 oop thisOop = oopDesc::load_decode_heap_oop(p);
1847 HeapWord* addr = (HeapWord*)thisOop;
1848 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1849 _bitMap->mark(addr);
1850 _cm->mark_stack_push(thisOop);
1851 }
1852 }
1853 };
1855 class G1CMDrainMarkingStackClosure: public VoidClosure {
1856 CMMarkStack* _markStack;
1857 CMBitMap* _bitMap;
1858 G1CMKeepAliveClosure* _oopClosure;
1859 public:
1860 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1861 G1CMKeepAliveClosure* oopClosure) :
1862 _bitMap(bitMap),
1863 _markStack(markStack),
1864 _oopClosure(oopClosure)
1865 {}
1867 void do_void() {
1868 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1869 }
1870 };
1872 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1873 ResourceMark rm;
1874 HandleMark hm;
1875 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1876 ReferenceProcessor* rp = g1h->ref_processor();
1878 // Process weak references.
1879 rp->setup_policy(clear_all_soft_refs);
1880 assert(_markStack.isEmpty(), "mark stack should be empty");
1882 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1883 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1884 G1CMDrainMarkingStackClosure
1885 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1886 &g1KeepAliveClosure);
1888 // XXXYYY Also: copy the parallel ref processing code from CMS.
1889 rp->process_discovered_references(&g1IsAliveClosure,
1890 &g1KeepAliveClosure,
1891 &g1DrainMarkingStackClosure,
1892 NULL);
1893 assert(_markStack.overflow() || _markStack.isEmpty(),
1894 "mark stack should be empty (unless it overflowed)");
1895 if (_markStack.overflow()) {
1896 set_has_overflown();
1897 }
1899 rp->enqueue_discovered_references();
1900 rp->verify_no_references_recorded();
1901 assert(!rp->discovery_enabled(), "should have been disabled");
1903 // Now clean up stale oops in SymbolTable and StringTable
1904 SymbolTable::unlink(&g1IsAliveClosure);
1905 StringTable::unlink(&g1IsAliveClosure);
1906 }
1908 void ConcurrentMark::swapMarkBitMaps() {
1909 CMBitMapRO* temp = _prevMarkBitMap;
1910 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1911 _nextMarkBitMap = (CMBitMap*) temp;
1912 }
1914 class CMRemarkTask: public AbstractGangTask {
1915 private:
1916 ConcurrentMark *_cm;
1918 public:
1919 void work(int worker_i) {
1920 // Since all available tasks are actually started, we should
1921 // only proceed if we're supposed to be actived.
1922 if ((size_t)worker_i < _cm->active_tasks()) {
1923 CMTask* task = _cm->task(worker_i);
1924 task->record_start_time();
1925 do {
1926 task->do_marking_step(1000000000.0 /* something very large */);
1927 } while (task->has_aborted() && !_cm->has_overflown());
1928 // If we overflow, then we do not want to restart. We instead
1929 // want to abort remark and do concurrent marking again.
1930 task->record_end_time();
1931 }
1932 }
1934 CMRemarkTask(ConcurrentMark* cm) :
1935 AbstractGangTask("Par Remark"), _cm(cm) { }
1936 };
1938 void ConcurrentMark::checkpointRootsFinalWork() {
1939 ResourceMark rm;
1940 HandleMark hm;
1941 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1943 g1h->ensure_parsability(false);
1945 if (G1CollectedHeap::use_parallel_gc_threads()) {
1946 G1CollectedHeap::StrongRootsScope srs(g1h);
1947 // this is remark, so we'll use up all available threads
1948 int active_workers = ParallelGCThreads;
1949 set_phase(active_workers, false);
1951 CMRemarkTask remarkTask(this);
1952 // We will start all available threads, even if we decide that the
1953 // active_workers will be fewer. The extra ones will just bail out
1954 // immediately.
1955 int n_workers = g1h->workers()->total_workers();
1956 g1h->set_par_threads(n_workers);
1957 g1h->workers()->run_task(&remarkTask);
1958 g1h->set_par_threads(0);
1959 } else {
1960 G1CollectedHeap::StrongRootsScope srs(g1h);
1961 // this is remark, so we'll use up all available threads
1962 int active_workers = 1;
1963 set_phase(active_workers, false);
1965 CMRemarkTask remarkTask(this);
1966 // We will start all available threads, even if we decide that the
1967 // active_workers will be fewer. The extra ones will just bail out
1968 // immediately.
1969 remarkTask.work(0);
1970 }
1971 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1972 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1974 print_stats();
1976 if (!restart_for_overflow())
1977 set_non_marking_state();
1979 #if VERIFY_OBJS_PROCESSED
1980 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1981 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1982 _scan_obj_cl.objs_processed,
1983 ThreadLocalObjQueue::objs_enqueued);
1984 guarantee(_scan_obj_cl.objs_processed ==
1985 ThreadLocalObjQueue::objs_enqueued,
1986 "Different number of objs processed and enqueued.");
1987 }
1988 #endif
1989 }
1991 #ifndef PRODUCT
1993 class PrintReachableOopClosure: public OopClosure {
1994 private:
1995 G1CollectedHeap* _g1h;
1996 CMBitMapRO* _bitmap;
1997 outputStream* _out;
1998 bool _use_prev_marking;
1999 bool _all;
2001 public:
2002 PrintReachableOopClosure(CMBitMapRO* bitmap,
2003 outputStream* out,
2004 bool use_prev_marking,
2005 bool all) :
2006 _g1h(G1CollectedHeap::heap()),
2007 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2009 void do_oop(narrowOop* p) { do_oop_work(p); }
2010 void do_oop( oop* p) { do_oop_work(p); }
2012 template <class T> void do_oop_work(T* p) {
2013 oop obj = oopDesc::load_decode_heap_oop(p);
2014 const char* str = NULL;
2015 const char* str2 = "";
2017 if (obj == NULL) {
2018 str = "";
2019 } else if (!_g1h->is_in_g1_reserved(obj)) {
2020 str = " O";
2021 } else {
2022 HeapRegion* hr = _g1h->heap_region_containing(obj);
2023 guarantee(hr != NULL, "invariant");
2024 bool over_tams = false;
2025 if (_use_prev_marking) {
2026 over_tams = hr->obj_allocated_since_prev_marking(obj);
2027 } else {
2028 over_tams = hr->obj_allocated_since_next_marking(obj);
2029 }
2030 bool marked = _bitmap->isMarked((HeapWord*) obj);
2032 if (over_tams) {
2033 str = " >";
2034 if (marked) {
2035 str2 = " AND MARKED";
2036 }
2037 } else if (marked) {
2038 str = " M";
2039 } else {
2040 str = " NOT";
2041 }
2042 }
2044 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2045 p, (void*) obj, str, str2);
2046 }
2047 };
2049 class PrintReachableObjectClosure : public ObjectClosure {
2050 private:
2051 CMBitMapRO* _bitmap;
2052 outputStream* _out;
2053 bool _use_prev_marking;
2054 bool _all;
2055 HeapRegion* _hr;
2057 public:
2058 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2059 outputStream* out,
2060 bool use_prev_marking,
2061 bool all,
2062 HeapRegion* hr) :
2063 _bitmap(bitmap), _out(out),
2064 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2066 void do_object(oop o) {
2067 bool over_tams;
2068 if (_use_prev_marking) {
2069 over_tams = _hr->obj_allocated_since_prev_marking(o);
2070 } else {
2071 over_tams = _hr->obj_allocated_since_next_marking(o);
2072 }
2073 bool marked = _bitmap->isMarked((HeapWord*) o);
2074 bool print_it = _all || over_tams || marked;
2076 if (print_it) {
2077 _out->print_cr(" "PTR_FORMAT"%s",
2078 o, (over_tams) ? " >" : (marked) ? " M" : "");
2079 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2080 o->oop_iterate(&oopCl);
2081 }
2082 }
2083 };
2085 class PrintReachableRegionClosure : public HeapRegionClosure {
2086 private:
2087 CMBitMapRO* _bitmap;
2088 outputStream* _out;
2089 bool _use_prev_marking;
2090 bool _all;
2092 public:
2093 bool doHeapRegion(HeapRegion* hr) {
2094 HeapWord* b = hr->bottom();
2095 HeapWord* e = hr->end();
2096 HeapWord* t = hr->top();
2097 HeapWord* p = NULL;
2098 if (_use_prev_marking) {
2099 p = hr->prev_top_at_mark_start();
2100 } else {
2101 p = hr->next_top_at_mark_start();
2102 }
2103 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2104 "TAMS: "PTR_FORMAT, b, e, t, p);
2105 _out->cr();
2107 HeapWord* from = b;
2108 HeapWord* to = t;
2110 if (to > from) {
2111 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2112 _out->cr();
2113 PrintReachableObjectClosure ocl(_bitmap, _out,
2114 _use_prev_marking, _all, hr);
2115 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2116 _out->cr();
2117 }
2119 return false;
2120 }
2122 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2123 outputStream* out,
2124 bool use_prev_marking,
2125 bool all) :
2126 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2127 };
2129 void ConcurrentMark::print_reachable(const char* str,
2130 bool use_prev_marking,
2131 bool all) {
2132 gclog_or_tty->cr();
2133 gclog_or_tty->print_cr("== Doing heap dump... ");
2135 if (G1PrintReachableBaseFile == NULL) {
2136 gclog_or_tty->print_cr(" #### error: no base file defined");
2137 return;
2138 }
2140 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2141 (JVM_MAXPATHLEN - 1)) {
2142 gclog_or_tty->print_cr(" #### error: file name too long");
2143 return;
2144 }
2146 char file_name[JVM_MAXPATHLEN];
2147 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2148 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2150 fileStream fout(file_name);
2151 if (!fout.is_open()) {
2152 gclog_or_tty->print_cr(" #### error: could not open file");
2153 return;
2154 }
2156 outputStream* out = &fout;
2158 CMBitMapRO* bitmap = NULL;
2159 if (use_prev_marking) {
2160 bitmap = _prevMarkBitMap;
2161 } else {
2162 bitmap = _nextMarkBitMap;
2163 }
2165 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2166 out->cr();
2168 out->print_cr("--- ITERATING OVER REGIONS");
2169 out->cr();
2170 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2171 _g1h->heap_region_iterate(&rcl);
2172 out->cr();
2174 gclog_or_tty->print_cr(" done");
2175 gclog_or_tty->flush();
2176 }
2178 #endif // PRODUCT
2180 // This note is for drainAllSATBBuffers and the code in between.
2181 // In the future we could reuse a task to do this work during an
2182 // evacuation pause (since now tasks are not active and can be claimed
2183 // during an evacuation pause). This was a late change to the code and
2184 // is currently not being taken advantage of.
2186 class CMGlobalObjectClosure : public ObjectClosure {
2187 private:
2188 ConcurrentMark* _cm;
2190 public:
2191 void do_object(oop obj) {
2192 _cm->deal_with_reference(obj);
2193 }
2195 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2196 };
2198 void ConcurrentMark::deal_with_reference(oop obj) {
2199 if (verbose_high())
2200 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2201 (void*) obj);
2204 HeapWord* objAddr = (HeapWord*) obj;
2205 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2206 if (_g1h->is_in_g1_reserved(objAddr)) {
2207 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2208 HeapRegion* hr = _g1h->heap_region_containing(obj);
2209 if (_g1h->is_obj_ill(obj, hr)) {
2210 if (verbose_high())
2211 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2212 "marked", (void*) obj);
2214 // we need to mark it first
2215 if (_nextMarkBitMap->parMark(objAddr)) {
2216 // No OrderAccess:store_load() is needed. It is implicit in the
2217 // CAS done in parMark(objAddr) above
2218 HeapWord* finger = _finger;
2219 if (objAddr < finger) {
2220 if (verbose_high())
2221 gclog_or_tty->print_cr("[global] below the global finger "
2222 "("PTR_FORMAT"), pushing it", finger);
2223 if (!mark_stack_push(obj)) {
2224 if (verbose_low())
2225 gclog_or_tty->print_cr("[global] global stack overflow during "
2226 "deal_with_reference");
2227 }
2228 }
2229 }
2230 }
2231 }
2232 }
2234 void ConcurrentMark::drainAllSATBBuffers() {
2235 CMGlobalObjectClosure oc(this);
2236 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2237 satb_mq_set.set_closure(&oc);
2239 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2240 if (verbose_medium())
2241 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2242 }
2244 // no need to check whether we should do this, as this is only
2245 // called during an evacuation pause
2246 satb_mq_set.iterate_closure_all_threads();
2248 satb_mq_set.set_closure(NULL);
2249 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2250 }
2252 void ConcurrentMark::markPrev(oop p) {
2253 // Note we are overriding the read-only view of the prev map here, via
2254 // the cast.
2255 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2256 }
2258 void ConcurrentMark::clear(oop p) {
2259 assert(p != NULL && p->is_oop(), "expected an oop");
2260 HeapWord* addr = (HeapWord*)p;
2261 assert(addr >= _nextMarkBitMap->startWord() ||
2262 addr < _nextMarkBitMap->endWord(), "in a region");
2264 _nextMarkBitMap->clear(addr);
2265 }
2267 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2268 // Note we are overriding the read-only view of the prev map here, via
2269 // the cast.
2270 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2271 _nextMarkBitMap->clearRange(mr);
2272 }
2274 HeapRegion*
2275 ConcurrentMark::claim_region(int task_num) {
2276 // "checkpoint" the finger
2277 HeapWord* finger = _finger;
2279 // _heap_end will not change underneath our feet; it only changes at
2280 // yield points.
2281 while (finger < _heap_end) {
2282 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2284 // is the gap between reading the finger and doing the CAS too long?
2286 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2287 HeapWord* bottom = curr_region->bottom();
2288 HeapWord* end = curr_region->end();
2289 HeapWord* limit = curr_region->next_top_at_mark_start();
2291 if (verbose_low())
2292 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2293 "["PTR_FORMAT", "PTR_FORMAT"), "
2294 "limit = "PTR_FORMAT,
2295 task_num, curr_region, bottom, end, limit);
2297 HeapWord* res =
2298 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2299 if (res == finger) {
2300 // we succeeded
2302 // notice that _finger == end cannot be guaranteed here since,
2303 // someone else might have moved the finger even further
2304 assert(_finger >= end, "the finger should have moved forward");
2306 if (verbose_low())
2307 gclog_or_tty->print_cr("[%d] we were successful with region = "
2308 PTR_FORMAT, task_num, curr_region);
2310 if (limit > bottom) {
2311 if (verbose_low())
2312 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2313 "returning it ", task_num, curr_region);
2314 return curr_region;
2315 } else {
2316 assert(limit == bottom,
2317 "the region limit should be at bottom");
2318 if (verbose_low())
2319 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2320 "returning NULL", task_num, curr_region);
2321 // we return NULL and the caller should try calling
2322 // claim_region() again.
2323 return NULL;
2324 }
2325 } else {
2326 assert(_finger > finger, "the finger should have moved forward");
2327 if (verbose_low())
2328 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2329 "global finger = "PTR_FORMAT", "
2330 "our finger = "PTR_FORMAT,
2331 task_num, _finger, finger);
2333 // read it again
2334 finger = _finger;
2335 }
2336 }
2338 return NULL;
2339 }
2341 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2342 bool result = false;
2343 for (int i = 0; i < (int)_max_task_num; ++i) {
2344 CMTask* the_task = _tasks[i];
2345 MemRegion mr = the_task->aborted_region();
2346 if (mr.start() != NULL) {
2347 assert(mr.end() != NULL, "invariant");
2348 assert(mr.word_size() > 0, "invariant");
2349 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2350 assert(hr != NULL, "invariant");
2351 if (hr->in_collection_set()) {
2352 // The region points into the collection set
2353 the_task->set_aborted_region(MemRegion());
2354 result = true;
2355 }
2356 }
2357 }
2358 return result;
2359 }
2361 bool ConcurrentMark::has_aborted_regions() {
2362 for (int i = 0; i < (int)_max_task_num; ++i) {
2363 CMTask* the_task = _tasks[i];
2364 MemRegion mr = the_task->aborted_region();
2365 if (mr.start() != NULL) {
2366 assert(mr.end() != NULL, "invariant");
2367 assert(mr.word_size() > 0, "invariant");
2368 return true;
2369 }
2370 }
2371 return false;
2372 }
2374 void ConcurrentMark::oops_do(OopClosure* cl) {
2375 if (_markStack.size() > 0 && verbose_low())
2376 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2377 "size = %d", _markStack.size());
2378 // we first iterate over the contents of the mark stack...
2379 _markStack.oops_do(cl);
2381 for (int i = 0; i < (int)_max_task_num; ++i) {
2382 OopTaskQueue* queue = _task_queues->queue((int)i);
2384 if (queue->size() > 0 && verbose_low())
2385 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2386 "size = %d", i, queue->size());
2388 // ...then over the contents of the all the task queues.
2389 queue->oops_do(cl);
2390 }
2392 // Invalidate any entries, that are in the region stack, that
2393 // point into the collection set
2394 if (_regionStack.invalidate_entries_into_cset()) {
2395 // otherwise, any gray objects copied during the evacuation pause
2396 // might not be visited.
2397 assert(_should_gray_objects, "invariant");
2398 }
2400 // Invalidate any aborted regions, recorded in the individual CM
2401 // tasks, that point into the collection set.
2402 if (invalidate_aborted_regions_in_cset()) {
2403 // otherwise, any gray objects copied during the evacuation pause
2404 // might not be visited.
2405 assert(_should_gray_objects, "invariant");
2406 }
2408 }
2410 void ConcurrentMark::clear_marking_state() {
2411 _markStack.setEmpty();
2412 _markStack.clear_overflow();
2413 _regionStack.setEmpty();
2414 _regionStack.clear_overflow();
2415 clear_has_overflown();
2416 _finger = _heap_start;
2418 for (int i = 0; i < (int)_max_task_num; ++i) {
2419 OopTaskQueue* queue = _task_queues->queue(i);
2420 queue->set_empty();
2421 }
2422 }
2424 void ConcurrentMark::print_stats() {
2425 if (verbose_stats()) {
2426 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2427 for (size_t i = 0; i < _active_tasks; ++i) {
2428 _tasks[i]->print_stats();
2429 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2430 }
2431 }
2432 }
2434 class CSMarkOopClosure: public OopClosure {
2435 friend class CSMarkBitMapClosure;
2437 G1CollectedHeap* _g1h;
2438 CMBitMap* _bm;
2439 ConcurrentMark* _cm;
2440 oop* _ms;
2441 jint* _array_ind_stack;
2442 int _ms_size;
2443 int _ms_ind;
2444 int _array_increment;
2446 bool push(oop obj, int arr_ind = 0) {
2447 if (_ms_ind == _ms_size) {
2448 gclog_or_tty->print_cr("Mark stack is full.");
2449 return false;
2450 }
2451 _ms[_ms_ind] = obj;
2452 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2453 _ms_ind++;
2454 return true;
2455 }
2457 oop pop() {
2458 if (_ms_ind == 0) return NULL;
2459 else {
2460 _ms_ind--;
2461 return _ms[_ms_ind];
2462 }
2463 }
2465 template <class T> bool drain() {
2466 while (_ms_ind > 0) {
2467 oop obj = pop();
2468 assert(obj != NULL, "Since index was non-zero.");
2469 if (obj->is_objArray()) {
2470 jint arr_ind = _array_ind_stack[_ms_ind];
2471 objArrayOop aobj = objArrayOop(obj);
2472 jint len = aobj->length();
2473 jint next_arr_ind = arr_ind + _array_increment;
2474 if (next_arr_ind < len) {
2475 push(obj, next_arr_ind);
2476 }
2477 // Now process this portion of this one.
2478 int lim = MIN2(next_arr_ind, len);
2479 for (int j = arr_ind; j < lim; j++) {
2480 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2481 }
2483 } else {
2484 obj->oop_iterate(this);
2485 }
2486 if (abort()) return false;
2487 }
2488 return true;
2489 }
2491 public:
2492 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2493 _g1h(G1CollectedHeap::heap()),
2494 _cm(cm),
2495 _bm(cm->nextMarkBitMap()),
2496 _ms_size(ms_size), _ms_ind(0),
2497 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2498 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2499 _array_increment(MAX2(ms_size/8, 16))
2500 {}
2502 ~CSMarkOopClosure() {
2503 FREE_C_HEAP_ARRAY(oop, _ms);
2504 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2505 }
2507 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2508 virtual void do_oop( oop* p) { do_oop_work(p); }
2510 template <class T> void do_oop_work(T* p) {
2511 T heap_oop = oopDesc::load_heap_oop(p);
2512 if (oopDesc::is_null(heap_oop)) return;
2513 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2514 if (obj->is_forwarded()) {
2515 // If the object has already been forwarded, we have to make sure
2516 // that it's marked. So follow the forwarding pointer. Note that
2517 // this does the right thing for self-forwarding pointers in the
2518 // evacuation failure case.
2519 obj = obj->forwardee();
2520 }
2521 HeapRegion* hr = _g1h->heap_region_containing(obj);
2522 if (hr != NULL) {
2523 if (hr->in_collection_set()) {
2524 if (_g1h->is_obj_ill(obj)) {
2525 _bm->mark((HeapWord*)obj);
2526 if (!push(obj)) {
2527 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2528 set_abort();
2529 }
2530 }
2531 } else {
2532 // Outside the collection set; we need to gray it
2533 _cm->deal_with_reference(obj);
2534 }
2535 }
2536 }
2537 };
2539 class CSMarkBitMapClosure: public BitMapClosure {
2540 G1CollectedHeap* _g1h;
2541 CMBitMap* _bitMap;
2542 ConcurrentMark* _cm;
2543 CSMarkOopClosure _oop_cl;
2544 public:
2545 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2546 _g1h(G1CollectedHeap::heap()),
2547 _bitMap(cm->nextMarkBitMap()),
2548 _oop_cl(cm, ms_size)
2549 {}
2551 ~CSMarkBitMapClosure() {}
2553 bool do_bit(size_t offset) {
2554 // convert offset into a HeapWord*
2555 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2556 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2557 "address out of range");
2558 assert(_bitMap->isMarked(addr), "tautology");
2559 oop obj = oop(addr);
2560 if (!obj->is_forwarded()) {
2561 if (!_oop_cl.push(obj)) return false;
2562 if (UseCompressedOops) {
2563 if (!_oop_cl.drain<narrowOop>()) return false;
2564 } else {
2565 if (!_oop_cl.drain<oop>()) return false;
2566 }
2567 }
2568 // Otherwise...
2569 return true;
2570 }
2571 };
2574 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2575 CMBitMap* _bm;
2576 CSMarkBitMapClosure _bit_cl;
2577 enum SomePrivateConstants {
2578 MSSize = 1000
2579 };
2580 bool _completed;
2581 public:
2582 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2583 _bm(cm->nextMarkBitMap()),
2584 _bit_cl(cm, MSSize),
2585 _completed(true)
2586 {}
2588 ~CompleteMarkingInCSHRClosure() {}
2590 bool doHeapRegion(HeapRegion* r) {
2591 if (!r->evacuation_failed()) {
2592 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2593 if (!mr.is_empty()) {
2594 if (!_bm->iterate(&_bit_cl, mr)) {
2595 _completed = false;
2596 return true;
2597 }
2598 }
2599 }
2600 return false;
2601 }
2603 bool completed() { return _completed; }
2604 };
2606 class ClearMarksInHRClosure: public HeapRegionClosure {
2607 CMBitMap* _bm;
2608 public:
2609 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2611 bool doHeapRegion(HeapRegion* r) {
2612 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2613 MemRegion usedMR = r->used_region();
2614 _bm->clearRange(r->used_region());
2615 }
2616 return false;
2617 }
2618 };
2620 void ConcurrentMark::complete_marking_in_collection_set() {
2621 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2623 if (!g1h->mark_in_progress()) {
2624 g1h->g1_policy()->record_mark_closure_time(0.0);
2625 return;
2626 }
2628 int i = 1;
2629 double start = os::elapsedTime();
2630 while (true) {
2631 i++;
2632 CompleteMarkingInCSHRClosure cmplt(this);
2633 g1h->collection_set_iterate(&cmplt);
2634 if (cmplt.completed()) break;
2635 }
2636 double end_time = os::elapsedTime();
2637 double elapsed_time_ms = (end_time - start) * 1000.0;
2638 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2640 ClearMarksInHRClosure clr(nextMarkBitMap());
2641 g1h->collection_set_iterate(&clr);
2642 }
2644 // The next two methods deal with the following optimisation. Some
2645 // objects are gray by being marked and located above the finger. If
2646 // they are copied, during an evacuation pause, below the finger then
2647 // the need to be pushed on the stack. The observation is that, if
2648 // there are no regions in the collection set located above the
2649 // finger, then the above cannot happen, hence we do not need to
2650 // explicitly gray any objects when copying them to below the
2651 // finger. The global stack will be scanned to ensure that, if it
2652 // points to objects being copied, it will update their
2653 // location. There is a tricky situation with the gray objects in
2654 // region stack that are being coped, however. See the comment in
2655 // newCSet().
2657 void ConcurrentMark::newCSet() {
2658 if (!concurrent_marking_in_progress())
2659 // nothing to do if marking is not in progress
2660 return;
2662 // find what the lowest finger is among the global and local fingers
2663 _min_finger = _finger;
2664 for (int i = 0; i < (int)_max_task_num; ++i) {
2665 CMTask* task = _tasks[i];
2666 HeapWord* task_finger = task->finger();
2667 if (task_finger != NULL && task_finger < _min_finger)
2668 _min_finger = task_finger;
2669 }
2671 _should_gray_objects = false;
2673 // This fixes a very subtle and fustrating bug. It might be the case
2674 // that, during en evacuation pause, heap regions that contain
2675 // objects that are gray (by being in regions contained in the
2676 // region stack) are included in the collection set. Since such gray
2677 // objects will be moved, and because it's not easy to redirect
2678 // region stack entries to point to a new location (because objects
2679 // in one region might be scattered to multiple regions after they
2680 // are copied), one option is to ensure that all marked objects
2681 // copied during a pause are pushed on the stack. Notice, however,
2682 // that this problem can only happen when the region stack is not
2683 // empty during an evacuation pause. So, we make the fix a bit less
2684 // conservative and ensure that regions are pushed on the stack,
2685 // irrespective whether all collection set regions are below the
2686 // finger, if the region stack is not empty. This is expected to be
2687 // a rare case, so I don't think it's necessary to be smarted about it.
2688 if (!region_stack_empty() || has_aborted_regions())
2689 _should_gray_objects = true;
2690 }
2692 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2693 if (!concurrent_marking_in_progress())
2694 return;
2696 HeapWord* region_end = hr->end();
2697 if (region_end > _min_finger)
2698 _should_gray_objects = true;
2699 }
2701 // abandon current marking iteration due to a Full GC
2702 void ConcurrentMark::abort() {
2703 // Clear all marks to force marking thread to do nothing
2704 _nextMarkBitMap->clearAll();
2705 // Empty mark stack
2706 clear_marking_state();
2707 for (int i = 0; i < (int)_max_task_num; ++i) {
2708 _tasks[i]->clear_region_fields();
2709 _tasks[i]->clear_aborted_region();
2710 }
2711 _has_aborted = true;
2713 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2714 satb_mq_set.abandon_partial_marking();
2715 // This can be called either during or outside marking, we'll read
2716 // the expected_active value from the SATB queue set.
2717 satb_mq_set.set_active_all_threads(
2718 false, /* new active value */
2719 satb_mq_set.is_active() /* expected_active */);
2720 }
2722 static void print_ms_time_info(const char* prefix, const char* name,
2723 NumberSeq& ns) {
2724 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2725 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2726 if (ns.num() > 0) {
2727 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2728 prefix, ns.sd(), ns.maximum());
2729 }
2730 }
2732 void ConcurrentMark::print_summary_info() {
2733 gclog_or_tty->print_cr(" Concurrent marking:");
2734 print_ms_time_info(" ", "init marks", _init_times);
2735 print_ms_time_info(" ", "remarks", _remark_times);
2736 {
2737 print_ms_time_info(" ", "final marks", _remark_mark_times);
2738 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2740 }
2741 print_ms_time_info(" ", "cleanups", _cleanup_times);
2742 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2743 _total_counting_time,
2744 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2745 (double)_cleanup_times.num()
2746 : 0.0));
2747 if (G1ScrubRemSets) {
2748 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2749 _total_rs_scrub_time,
2750 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2751 (double)_cleanup_times.num()
2752 : 0.0));
2753 }
2754 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2755 (_init_times.sum() + _remark_times.sum() +
2756 _cleanup_times.sum())/1000.0);
2757 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2758 "(%8.2f s marking, %8.2f s counting).",
2759 cmThread()->vtime_accum(),
2760 cmThread()->vtime_mark_accum(),
2761 cmThread()->vtime_count_accum());
2762 }
2764 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2765 _parallel_workers->print_worker_threads_on(st);
2766 }
2768 // Closures
2769 // XXX: there seems to be a lot of code duplication here;
2770 // should refactor and consolidate the shared code.
2772 // This closure is used to mark refs into the CMS generation in
2773 // the CMS bit map. Called at the first checkpoint.
2775 // We take a break if someone is trying to stop the world.
2776 bool ConcurrentMark::do_yield_check(int worker_i) {
2777 if (should_yield()) {
2778 if (worker_i == 0)
2779 _g1h->g1_policy()->record_concurrent_pause();
2780 cmThread()->yield();
2781 if (worker_i == 0)
2782 _g1h->g1_policy()->record_concurrent_pause_end();
2783 return true;
2784 } else {
2785 return false;
2786 }
2787 }
2789 bool ConcurrentMark::should_yield() {
2790 return cmThread()->should_yield();
2791 }
2793 bool ConcurrentMark::containing_card_is_marked(void* p) {
2794 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2795 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2796 }
2798 bool ConcurrentMark::containing_cards_are_marked(void* start,
2799 void* last) {
2800 return
2801 containing_card_is_marked(start) &&
2802 containing_card_is_marked(last);
2803 }
2805 #ifndef PRODUCT
2806 // for debugging purposes
2807 void ConcurrentMark::print_finger() {
2808 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2809 _heap_start, _heap_end, _finger);
2810 for (int i = 0; i < (int) _max_task_num; ++i) {
2811 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2812 }
2813 gclog_or_tty->print_cr("");
2814 }
2815 #endif
2817 // Closure for iteration over bitmaps
2818 class CMBitMapClosure : public BitMapClosure {
2819 private:
2820 // the bitmap that is being iterated over
2821 CMBitMap* _nextMarkBitMap;
2822 ConcurrentMark* _cm;
2823 CMTask* _task;
2824 // true if we're scanning a heap region claimed by the task (so that
2825 // we move the finger along), false if we're not, i.e. currently when
2826 // scanning a heap region popped from the region stack (so that we
2827 // do not move the task finger along; it'd be a mistake if we did so).
2828 bool _scanning_heap_region;
2830 public:
2831 CMBitMapClosure(CMTask *task,
2832 ConcurrentMark* cm,
2833 CMBitMap* nextMarkBitMap)
2834 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2836 void set_scanning_heap_region(bool scanning_heap_region) {
2837 _scanning_heap_region = scanning_heap_region;
2838 }
2840 bool do_bit(size_t offset) {
2841 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2842 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2843 assert( addr < _cm->finger(), "invariant");
2845 if (_scanning_heap_region) {
2846 statsOnly( _task->increase_objs_found_on_bitmap() );
2847 assert(addr >= _task->finger(), "invariant");
2848 // We move that task's local finger along.
2849 _task->move_finger_to(addr);
2850 } else {
2851 // We move the task's region finger along.
2852 _task->move_region_finger_to(addr);
2853 }
2855 _task->scan_object(oop(addr));
2856 // we only partially drain the local queue and global stack
2857 _task->drain_local_queue(true);
2858 _task->drain_global_stack(true);
2860 // if the has_aborted flag has been raised, we need to bail out of
2861 // the iteration
2862 return !_task->has_aborted();
2863 }
2864 };
2866 // Closure for iterating over objects, currently only used for
2867 // processing SATB buffers.
2868 class CMObjectClosure : public ObjectClosure {
2869 private:
2870 CMTask* _task;
2872 public:
2873 void do_object(oop obj) {
2874 _task->deal_with_reference(obj);
2875 }
2877 CMObjectClosure(CMTask* task) : _task(task) { }
2878 };
2880 // Closure for iterating over object fields
2881 class CMOopClosure : public OopClosure {
2882 private:
2883 G1CollectedHeap* _g1h;
2884 ConcurrentMark* _cm;
2885 CMTask* _task;
2887 public:
2888 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2889 virtual void do_oop( oop* p) { do_oop_work(p); }
2891 template <class T> void do_oop_work(T* p) {
2892 assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2893 assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2894 "invariant");
2896 oop obj = oopDesc::load_decode_heap_oop(p);
2897 if (_cm->verbose_high())
2898 gclog_or_tty->print_cr("[%d] we're looking at location "
2899 "*"PTR_FORMAT" = "PTR_FORMAT,
2900 _task->task_id(), p, (void*) obj);
2901 _task->deal_with_reference(obj);
2902 }
2904 CMOopClosure(G1CollectedHeap* g1h,
2905 ConcurrentMark* cm,
2906 CMTask* task)
2907 : _g1h(g1h), _cm(cm), _task(task) { }
2908 };
2910 void CMTask::setup_for_region(HeapRegion* hr) {
2911 // Separated the asserts so that we know which one fires.
2912 assert(hr != NULL,
2913 "claim_region() should have filtered out continues humongous regions");
2914 assert(!hr->continuesHumongous(),
2915 "claim_region() should have filtered out continues humongous regions");
2917 if (_cm->verbose_low())
2918 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2919 _task_id, hr);
2921 _curr_region = hr;
2922 _finger = hr->bottom();
2923 update_region_limit();
2924 }
2926 void CMTask::update_region_limit() {
2927 HeapRegion* hr = _curr_region;
2928 HeapWord* bottom = hr->bottom();
2929 HeapWord* limit = hr->next_top_at_mark_start();
2931 if (limit == bottom) {
2932 if (_cm->verbose_low())
2933 gclog_or_tty->print_cr("[%d] found an empty region "
2934 "["PTR_FORMAT", "PTR_FORMAT")",
2935 _task_id, bottom, limit);
2936 // The region was collected underneath our feet.
2937 // We set the finger to bottom to ensure that the bitmap
2938 // iteration that will follow this will not do anything.
2939 // (this is not a condition that holds when we set the region up,
2940 // as the region is not supposed to be empty in the first place)
2941 _finger = bottom;
2942 } else if (limit >= _region_limit) {
2943 assert(limit >= _finger, "peace of mind");
2944 } else {
2945 assert(limit < _region_limit, "only way to get here");
2946 // This can happen under some pretty unusual circumstances. An
2947 // evacuation pause empties the region underneath our feet (NTAMS
2948 // at bottom). We then do some allocation in the region (NTAMS
2949 // stays at bottom), followed by the region being used as a GC
2950 // alloc region (NTAMS will move to top() and the objects
2951 // originally below it will be grayed). All objects now marked in
2952 // the region are explicitly grayed, if below the global finger,
2953 // and we do not need in fact to scan anything else. So, we simply
2954 // set _finger to be limit to ensure that the bitmap iteration
2955 // doesn't do anything.
2956 _finger = limit;
2957 }
2959 _region_limit = limit;
2960 }
2962 void CMTask::giveup_current_region() {
2963 assert(_curr_region != NULL, "invariant");
2964 if (_cm->verbose_low())
2965 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2966 _task_id, _curr_region);
2967 clear_region_fields();
2968 }
2970 void CMTask::clear_region_fields() {
2971 // Values for these three fields that indicate that we're not
2972 // holding on to a region.
2973 _curr_region = NULL;
2974 _finger = NULL;
2975 _region_limit = NULL;
2977 _region_finger = NULL;
2978 }
2980 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2981 guarantee(nextMarkBitMap != NULL, "invariant");
2983 if (_cm->verbose_low())
2984 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2986 _nextMarkBitMap = nextMarkBitMap;
2987 clear_region_fields();
2988 clear_aborted_region();
2990 _calls = 0;
2991 _elapsed_time_ms = 0.0;
2992 _termination_time_ms = 0.0;
2993 _termination_start_time_ms = 0.0;
2995 #if _MARKING_STATS_
2996 _local_pushes = 0;
2997 _local_pops = 0;
2998 _local_max_size = 0;
2999 _objs_scanned = 0;
3000 _global_pushes = 0;
3001 _global_pops = 0;
3002 _global_max_size = 0;
3003 _global_transfers_to = 0;
3004 _global_transfers_from = 0;
3005 _region_stack_pops = 0;
3006 _regions_claimed = 0;
3007 _objs_found_on_bitmap = 0;
3008 _satb_buffers_processed = 0;
3009 _steal_attempts = 0;
3010 _steals = 0;
3011 _aborted = 0;
3012 _aborted_overflow = 0;
3013 _aborted_cm_aborted = 0;
3014 _aborted_yield = 0;
3015 _aborted_timed_out = 0;
3016 _aborted_satb = 0;
3017 _aborted_termination = 0;
3018 #endif // _MARKING_STATS_
3019 }
3021 bool CMTask::should_exit_termination() {
3022 regular_clock_call();
3023 // This is called when we are in the termination protocol. We should
3024 // quit if, for some reason, this task wants to abort or the global
3025 // stack is not empty (this means that we can get work from it).
3026 return !_cm->mark_stack_empty() || has_aborted();
3027 }
3029 // This determines whether the method below will check both the local
3030 // and global fingers when determining whether to push on the stack a
3031 // gray object (value 1) or whether it will only check the global one
3032 // (value 0). The tradeoffs are that the former will be a bit more
3033 // accurate and possibly push less on the stack, but it might also be
3034 // a little bit slower.
3036 #define _CHECK_BOTH_FINGERS_ 1
3038 void CMTask::deal_with_reference(oop obj) {
3039 if (_cm->verbose_high())
3040 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3041 _task_id, (void*) obj);
3043 ++_refs_reached;
3045 HeapWord* objAddr = (HeapWord*) obj;
3046 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3047 if (_g1h->is_in_g1_reserved(objAddr)) {
3048 assert(obj != NULL, "is_in_g1_reserved should ensure this");
3049 HeapRegion* hr = _g1h->heap_region_containing(obj);
3050 if (_g1h->is_obj_ill(obj, hr)) {
3051 if (_cm->verbose_high())
3052 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3053 _task_id, (void*) obj);
3055 // we need to mark it first
3056 if (_nextMarkBitMap->parMark(objAddr)) {
3057 // No OrderAccess:store_load() is needed. It is implicit in the
3058 // CAS done in parMark(objAddr) above
3059 HeapWord* global_finger = _cm->finger();
3061 #if _CHECK_BOTH_FINGERS_
3062 // we will check both the local and global fingers
3064 if (_finger != NULL && objAddr < _finger) {
3065 if (_cm->verbose_high())
3066 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3067 "pushing it", _task_id, _finger);
3068 push(obj);
3069 } else if (_curr_region != NULL && objAddr < _region_limit) {
3070 // do nothing
3071 } else if (objAddr < global_finger) {
3072 // Notice that the global finger might be moving forward
3073 // concurrently. This is not a problem. In the worst case, we
3074 // mark the object while it is above the global finger and, by
3075 // the time we read the global finger, it has moved forward
3076 // passed this object. In this case, the object will probably
3077 // be visited when a task is scanning the region and will also
3078 // be pushed on the stack. So, some duplicate work, but no
3079 // correctness problems.
3081 if (_cm->verbose_high())
3082 gclog_or_tty->print_cr("[%d] below the global finger "
3083 "("PTR_FORMAT"), pushing it",
3084 _task_id, global_finger);
3085 push(obj);
3086 } else {
3087 // do nothing
3088 }
3089 #else // _CHECK_BOTH_FINGERS_
3090 // we will only check the global finger
3092 if (objAddr < global_finger) {
3093 // see long comment above
3095 if (_cm->verbose_high())
3096 gclog_or_tty->print_cr("[%d] below the global finger "
3097 "("PTR_FORMAT"), pushing it",
3098 _task_id, global_finger);
3099 push(obj);
3100 }
3101 #endif // _CHECK_BOTH_FINGERS_
3102 }
3103 }
3104 }
3105 }
3107 void CMTask::push(oop obj) {
3108 HeapWord* objAddr = (HeapWord*) obj;
3109 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3110 assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3111 "invariant");
3112 assert(!_g1h->is_obj_ill(obj), "invariant");
3113 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3115 if (_cm->verbose_high())
3116 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3118 if (!_task_queue->push(obj)) {
3119 // The local task queue looks full. We need to push some entries
3120 // to the global stack.
3122 if (_cm->verbose_medium())
3123 gclog_or_tty->print_cr("[%d] task queue overflow, "
3124 "moving entries to the global stack",
3125 _task_id);
3126 move_entries_to_global_stack();
3128 // this should succeed since, even if we overflow the global
3129 // stack, we should have definitely removed some entries from the
3130 // local queue. So, there must be space on it.
3131 bool success = _task_queue->push(obj);
3132 assert(success, "invariant");
3133 }
3135 statsOnly( int tmp_size = _task_queue->size();
3136 if (tmp_size > _local_max_size)
3137 _local_max_size = tmp_size;
3138 ++_local_pushes );
3139 }
3141 void CMTask::reached_limit() {
3142 assert(_words_scanned >= _words_scanned_limit ||
3143 _refs_reached >= _refs_reached_limit ,
3144 "shouldn't have been called otherwise");
3145 regular_clock_call();
3146 }
3148 void CMTask::regular_clock_call() {
3149 if (has_aborted())
3150 return;
3152 // First, we need to recalculate the words scanned and refs reached
3153 // limits for the next clock call.
3154 recalculate_limits();
3156 // During the regular clock call we do the following
3158 // (1) If an overflow has been flagged, then we abort.
3159 if (_cm->has_overflown()) {
3160 set_has_aborted();
3161 return;
3162 }
3164 // If we are not concurrent (i.e. we're doing remark) we don't need
3165 // to check anything else. The other steps are only needed during
3166 // the concurrent marking phase.
3167 if (!concurrent())
3168 return;
3170 // (2) If marking has been aborted for Full GC, then we also abort.
3171 if (_cm->has_aborted()) {
3172 set_has_aborted();
3173 statsOnly( ++_aborted_cm_aborted );
3174 return;
3175 }
3177 double curr_time_ms = os::elapsedVTime() * 1000.0;
3179 // (3) If marking stats are enabled, then we update the step history.
3180 #if _MARKING_STATS_
3181 if (_words_scanned >= _words_scanned_limit)
3182 ++_clock_due_to_scanning;
3183 if (_refs_reached >= _refs_reached_limit)
3184 ++_clock_due_to_marking;
3186 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3187 _interval_start_time_ms = curr_time_ms;
3188 _all_clock_intervals_ms.add(last_interval_ms);
3190 if (_cm->verbose_medium()) {
3191 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3192 "scanned = %d%s, refs reached = %d%s",
3193 _task_id, last_interval_ms,
3194 _words_scanned,
3195 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3196 _refs_reached,
3197 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3198 }
3199 #endif // _MARKING_STATS_
3201 // (4) We check whether we should yield. If we have to, then we abort.
3202 if (_cm->should_yield()) {
3203 // We should yield. To do this we abort the task. The caller is
3204 // responsible for yielding.
3205 set_has_aborted();
3206 statsOnly( ++_aborted_yield );
3207 return;
3208 }
3210 // (5) We check whether we've reached our time quota. If we have,
3211 // then we abort.
3212 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3213 if (elapsed_time_ms > _time_target_ms) {
3214 set_has_aborted();
3215 _has_aborted_timed_out = true;
3216 statsOnly( ++_aborted_timed_out );
3217 return;
3218 }
3220 // (6) Finally, we check whether there are enough completed STAB
3221 // buffers available for processing. If there are, we abort.
3222 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3223 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3224 if (_cm->verbose_low())
3225 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3226 _task_id);
3227 // we do need to process SATB buffers, we'll abort and restart
3228 // the marking task to do so
3229 set_has_aborted();
3230 statsOnly( ++_aborted_satb );
3231 return;
3232 }
3233 }
3235 void CMTask::recalculate_limits() {
3236 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3237 _words_scanned_limit = _real_words_scanned_limit;
3239 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3240 _refs_reached_limit = _real_refs_reached_limit;
3241 }
3243 void CMTask::decrease_limits() {
3244 // This is called when we believe that we're going to do an infrequent
3245 // operation which will increase the per byte scanned cost (i.e. move
3246 // entries to/from the global stack). It basically tries to decrease the
3247 // scanning limit so that the clock is called earlier.
3249 if (_cm->verbose_medium())
3250 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3252 _words_scanned_limit = _real_words_scanned_limit -
3253 3 * words_scanned_period / 4;
3254 _refs_reached_limit = _real_refs_reached_limit -
3255 3 * refs_reached_period / 4;
3256 }
3258 void CMTask::move_entries_to_global_stack() {
3259 // local array where we'll store the entries that will be popped
3260 // from the local queue
3261 oop buffer[global_stack_transfer_size];
3263 int n = 0;
3264 oop obj;
3265 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3266 buffer[n] = obj;
3267 ++n;
3268 }
3270 if (n > 0) {
3271 // we popped at least one entry from the local queue
3273 statsOnly( ++_global_transfers_to; _local_pops += n );
3275 if (!_cm->mark_stack_push(buffer, n)) {
3276 if (_cm->verbose_low())
3277 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3278 set_has_aborted();
3279 } else {
3280 // the transfer was successful
3282 if (_cm->verbose_medium())
3283 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3284 _task_id, n);
3285 statsOnly( int tmp_size = _cm->mark_stack_size();
3286 if (tmp_size > _global_max_size)
3287 _global_max_size = tmp_size;
3288 _global_pushes += n );
3289 }
3290 }
3292 // this operation was quite expensive, so decrease the limits
3293 decrease_limits();
3294 }
3296 void CMTask::get_entries_from_global_stack() {
3297 // local array where we'll store the entries that will be popped
3298 // from the global stack.
3299 oop buffer[global_stack_transfer_size];
3300 int n;
3301 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3302 assert(n <= global_stack_transfer_size,
3303 "we should not pop more than the given limit");
3304 if (n > 0) {
3305 // yes, we did actually pop at least one entry
3307 statsOnly( ++_global_transfers_from; _global_pops += n );
3308 if (_cm->verbose_medium())
3309 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3310 _task_id, n);
3311 for (int i = 0; i < n; ++i) {
3312 bool success = _task_queue->push(buffer[i]);
3313 // We only call this when the local queue is empty or under a
3314 // given target limit. So, we do not expect this push to fail.
3315 assert(success, "invariant");
3316 }
3318 statsOnly( int tmp_size = _task_queue->size();
3319 if (tmp_size > _local_max_size)
3320 _local_max_size = tmp_size;
3321 _local_pushes += n );
3322 }
3324 // this operation was quite expensive, so decrease the limits
3325 decrease_limits();
3326 }
3328 void CMTask::drain_local_queue(bool partially) {
3329 if (has_aborted())
3330 return;
3332 // Decide what the target size is, depending whether we're going to
3333 // drain it partially (so that other tasks can steal if they run out
3334 // of things to do) or totally (at the very end).
3335 size_t target_size;
3336 if (partially)
3337 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3338 else
3339 target_size = 0;
3341 if (_task_queue->size() > target_size) {
3342 if (_cm->verbose_high())
3343 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3344 _task_id, target_size);
3346 oop obj;
3347 bool ret = _task_queue->pop_local(obj);
3348 while (ret) {
3349 statsOnly( ++_local_pops );
3351 if (_cm->verbose_high())
3352 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3353 (void*) obj);
3355 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3356 assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3357 "invariant");
3359 scan_object(obj);
3361 if (_task_queue->size() <= target_size || has_aborted())
3362 ret = false;
3363 else
3364 ret = _task_queue->pop_local(obj);
3365 }
3367 if (_cm->verbose_high())
3368 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3369 _task_id, _task_queue->size());
3370 }
3371 }
3373 void CMTask::drain_global_stack(bool partially) {
3374 if (has_aborted())
3375 return;
3377 // We have a policy to drain the local queue before we attempt to
3378 // drain the global stack.
3379 assert(partially || _task_queue->size() == 0, "invariant");
3381 // Decide what the target size is, depending whether we're going to
3382 // drain it partially (so that other tasks can steal if they run out
3383 // of things to do) or totally (at the very end). Notice that,
3384 // because we move entries from the global stack in chunks or
3385 // because another task might be doing the same, we might in fact
3386 // drop below the target. But, this is not a problem.
3387 size_t target_size;
3388 if (partially)
3389 target_size = _cm->partial_mark_stack_size_target();
3390 else
3391 target_size = 0;
3393 if (_cm->mark_stack_size() > target_size) {
3394 if (_cm->verbose_low())
3395 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3396 _task_id, target_size);
3398 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3399 get_entries_from_global_stack();
3400 drain_local_queue(partially);
3401 }
3403 if (_cm->verbose_low())
3404 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3405 _task_id, _cm->mark_stack_size());
3406 }
3407 }
3409 // SATB Queue has several assumptions on whether to call the par or
3410 // non-par versions of the methods. this is why some of the code is
3411 // replicated. We should really get rid of the single-threaded version
3412 // of the code to simplify things.
3413 void CMTask::drain_satb_buffers() {
3414 if (has_aborted())
3415 return;
3417 // We set this so that the regular clock knows that we're in the
3418 // middle of draining buffers and doesn't set the abort flag when it
3419 // notices that SATB buffers are available for draining. It'd be
3420 // very counter productive if it did that. :-)
3421 _draining_satb_buffers = true;
3423 CMObjectClosure oc(this);
3424 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3425 if (G1CollectedHeap::use_parallel_gc_threads())
3426 satb_mq_set.set_par_closure(_task_id, &oc);
3427 else
3428 satb_mq_set.set_closure(&oc);
3430 // This keeps claiming and applying the closure to completed buffers
3431 // until we run out of buffers or we need to abort.
3432 if (G1CollectedHeap::use_parallel_gc_threads()) {
3433 while (!has_aborted() &&
3434 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3435 if (_cm->verbose_medium())
3436 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3437 statsOnly( ++_satb_buffers_processed );
3438 regular_clock_call();
3439 }
3440 } else {
3441 while (!has_aborted() &&
3442 satb_mq_set.apply_closure_to_completed_buffer()) {
3443 if (_cm->verbose_medium())
3444 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3445 statsOnly( ++_satb_buffers_processed );
3446 regular_clock_call();
3447 }
3448 }
3450 if (!concurrent() && !has_aborted()) {
3451 // We should only do this during remark.
3452 if (G1CollectedHeap::use_parallel_gc_threads())
3453 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3454 else
3455 satb_mq_set.iterate_closure_all_threads();
3456 }
3458 _draining_satb_buffers = false;
3460 assert(has_aborted() ||
3461 concurrent() ||
3462 satb_mq_set.completed_buffers_num() == 0, "invariant");
3464 if (G1CollectedHeap::use_parallel_gc_threads())
3465 satb_mq_set.set_par_closure(_task_id, NULL);
3466 else
3467 satb_mq_set.set_closure(NULL);
3469 // again, this was a potentially expensive operation, decrease the
3470 // limits to get the regular clock call early
3471 decrease_limits();
3472 }
3474 void CMTask::drain_region_stack(BitMapClosure* bc) {
3475 if (has_aborted())
3476 return;
3478 assert(_region_finger == NULL,
3479 "it should be NULL when we're not scanning a region");
3481 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3482 if (_cm->verbose_low())
3483 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3484 _task_id, _cm->region_stack_size());
3486 MemRegion mr;
3488 if (!_aborted_region.is_empty()) {
3489 mr = _aborted_region;
3490 _aborted_region = MemRegion();
3492 if (_cm->verbose_low())
3493 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3494 _task_id, mr.start(), mr.end());
3495 } else {
3496 mr = _cm->region_stack_pop_lock_free();
3497 // it returns MemRegion() if the pop fails
3498 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3499 }
3501 while (mr.start() != NULL) {
3502 if (_cm->verbose_medium())
3503 gclog_or_tty->print_cr("[%d] we are scanning region "
3504 "["PTR_FORMAT", "PTR_FORMAT")",
3505 _task_id, mr.start(), mr.end());
3507 assert(mr.end() <= _cm->finger(),
3508 "otherwise the region shouldn't be on the stack");
3509 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3510 if (_nextMarkBitMap->iterate(bc, mr)) {
3511 assert(!has_aborted(),
3512 "cannot abort the task without aborting the bitmap iteration");
3514 // We finished iterating over the region without aborting.
3515 regular_clock_call();
3516 if (has_aborted())
3517 mr = MemRegion();
3518 else {
3519 mr = _cm->region_stack_pop_lock_free();
3520 // it returns MemRegion() if the pop fails
3521 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3522 }
3523 } else {
3524 assert(has_aborted(), "currently the only way to do so");
3526 // The only way to abort the bitmap iteration is to return
3527 // false from the do_bit() method. However, inside the
3528 // do_bit() method we move the _region_finger to point to the
3529 // object currently being looked at. So, if we bail out, we
3530 // have definitely set _region_finger to something non-null.
3531 assert(_region_finger != NULL, "invariant");
3533 // Make sure that any previously aborted region has been
3534 // cleared.
3535 assert(_aborted_region.is_empty(), "aborted region not cleared");
3537 // The iteration was actually aborted. So now _region_finger
3538 // points to the address of the object we last scanned. If we
3539 // leave it there, when we restart this task, we will rescan
3540 // the object. It is easy to avoid this. We move the finger by
3541 // enough to point to the next possible object header (the
3542 // bitmap knows by how much we need to move it as it knows its
3543 // granularity).
3544 MemRegion newRegion =
3545 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3547 if (!newRegion.is_empty()) {
3548 if (_cm->verbose_low()) {
3549 gclog_or_tty->print_cr("[%d] recording unscanned region"
3550 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3551 _task_id,
3552 newRegion.start(), newRegion.end());
3553 }
3554 // Now record the part of the region we didn't scan to
3555 // make sure this task scans it later.
3556 _aborted_region = newRegion;
3557 }
3558 // break from while
3559 mr = MemRegion();
3560 }
3561 _region_finger = NULL;
3562 }
3564 if (_cm->verbose_low())
3565 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3566 _task_id, _cm->region_stack_size());
3567 }
3568 }
3570 void CMTask::print_stats() {
3571 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3572 _task_id, _calls);
3573 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3574 _elapsed_time_ms, _termination_time_ms);
3575 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3576 _step_times_ms.num(), _step_times_ms.avg(),
3577 _step_times_ms.sd());
3578 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3579 _step_times_ms.maximum(), _step_times_ms.sum());
3581 #if _MARKING_STATS_
3582 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3583 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3584 _all_clock_intervals_ms.sd());
3585 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3586 _all_clock_intervals_ms.maximum(),
3587 _all_clock_intervals_ms.sum());
3588 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3589 _clock_due_to_scanning, _clock_due_to_marking);
3590 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3591 _objs_scanned, _objs_found_on_bitmap);
3592 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3593 _local_pushes, _local_pops, _local_max_size);
3594 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3595 _global_pushes, _global_pops, _global_max_size);
3596 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3597 _global_transfers_to,_global_transfers_from);
3598 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3599 _regions_claimed, _region_stack_pops);
3600 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3601 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3602 _steal_attempts, _steals);
3603 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3604 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3605 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3606 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3607 _aborted_timed_out, _aborted_satb, _aborted_termination);
3608 #endif // _MARKING_STATS_
3609 }
3611 /*****************************************************************************
3613 The do_marking_step(time_target_ms) method is the building block
3614 of the parallel marking framework. It can be called in parallel
3615 with other invocations of do_marking_step() on different tasks
3616 (but only one per task, obviously) and concurrently with the
3617 mutator threads, or during remark, hence it eliminates the need
3618 for two versions of the code. When called during remark, it will
3619 pick up from where the task left off during the concurrent marking
3620 phase. Interestingly, tasks are also claimable during evacuation
3621 pauses too, since do_marking_step() ensures that it aborts before
3622 it needs to yield.
3624 The data structures that is uses to do marking work are the
3625 following:
3627 (1) Marking Bitmap. If there are gray objects that appear only
3628 on the bitmap (this happens either when dealing with an overflow
3629 or when the initial marking phase has simply marked the roots
3630 and didn't push them on the stack), then tasks claim heap
3631 regions whose bitmap they then scan to find gray objects. A
3632 global finger indicates where the end of the last claimed region
3633 is. A local finger indicates how far into the region a task has
3634 scanned. The two fingers are used to determine how to gray an
3635 object (i.e. whether simply marking it is OK, as it will be
3636 visited by a task in the future, or whether it needs to be also
3637 pushed on a stack).
3639 (2) Local Queue. The local queue of the task which is accessed
3640 reasonably efficiently by the task. Other tasks can steal from
3641 it when they run out of work. Throughout the marking phase, a
3642 task attempts to keep its local queue short but not totally
3643 empty, so that entries are available for stealing by other
3644 tasks. Only when there is no more work, a task will totally
3645 drain its local queue.
3647 (3) Global Mark Stack. This handles local queue overflow. During
3648 marking only sets of entries are moved between it and the local
3649 queues, as access to it requires a mutex and more fine-grain
3650 interaction with it which might cause contention. If it
3651 overflows, then the marking phase should restart and iterate
3652 over the bitmap to identify gray objects. Throughout the marking
3653 phase, tasks attempt to keep the global mark stack at a small
3654 length but not totally empty, so that entries are available for
3655 popping by other tasks. Only when there is no more work, tasks
3656 will totally drain the global mark stack.
3658 (4) Global Region Stack. Entries on it correspond to areas of
3659 the bitmap that need to be scanned since they contain gray
3660 objects. Pushes on the region stack only happen during
3661 evacuation pauses and typically correspond to areas covered by
3662 GC LABS. If it overflows, then the marking phase should restart
3663 and iterate over the bitmap to identify gray objects. Tasks will
3664 try to totally drain the region stack as soon as possible.
3666 (5) SATB Buffer Queue. This is where completed SATB buffers are
3667 made available. Buffers are regularly removed from this queue
3668 and scanned for roots, so that the queue doesn't get too
3669 long. During remark, all completed buffers are processed, as
3670 well as the filled in parts of any uncompleted buffers.
3672 The do_marking_step() method tries to abort when the time target
3673 has been reached. There are a few other cases when the
3674 do_marking_step() method also aborts:
3676 (1) When the marking phase has been aborted (after a Full GC).
3678 (2) When a global overflow (either on the global stack or the
3679 region stack) has been triggered. Before the task aborts, it
3680 will actually sync up with the other tasks to ensure that all
3681 the marking data structures (local queues, stacks, fingers etc.)
3682 are re-initialised so that when do_marking_step() completes,
3683 the marking phase can immediately restart.
3685 (3) When enough completed SATB buffers are available. The
3686 do_marking_step() method only tries to drain SATB buffers right
3687 at the beginning. So, if enough buffers are available, the
3688 marking step aborts and the SATB buffers are processed at
3689 the beginning of the next invocation.
3691 (4) To yield. when we have to yield then we abort and yield
3692 right at the end of do_marking_step(). This saves us from a lot
3693 of hassle as, by yielding we might allow a Full GC. If this
3694 happens then objects will be compacted underneath our feet, the
3695 heap might shrink, etc. We save checking for this by just
3696 aborting and doing the yield right at the end.
3698 From the above it follows that the do_marking_step() method should
3699 be called in a loop (or, otherwise, regularly) until it completes.
3701 If a marking step completes without its has_aborted() flag being
3702 true, it means it has completed the current marking phase (and
3703 also all other marking tasks have done so and have all synced up).
3705 A method called regular_clock_call() is invoked "regularly" (in
3706 sub ms intervals) throughout marking. It is this clock method that
3707 checks all the abort conditions which were mentioned above and
3708 decides when the task should abort. A work-based scheme is used to
3709 trigger this clock method: when the number of object words the
3710 marking phase has scanned or the number of references the marking
3711 phase has visited reach a given limit. Additional invocations to
3712 the method clock have been planted in a few other strategic places
3713 too. The initial reason for the clock method was to avoid calling
3714 vtime too regularly, as it is quite expensive. So, once it was in
3715 place, it was natural to piggy-back all the other conditions on it
3716 too and not constantly check them throughout the code.
3718 *****************************************************************************/
3720 void CMTask::do_marking_step(double time_target_ms) {
3721 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3722 assert(concurrent() == _cm->concurrent(), "they should be the same");
3724 assert(concurrent() || _cm->region_stack_empty(),
3725 "the region stack should have been cleared before remark");
3726 assert(concurrent() || !_cm->has_aborted_regions(),
3727 "aborted regions should have been cleared before remark");
3728 assert(_region_finger == NULL,
3729 "this should be non-null only when a region is being scanned");
3731 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3732 assert(_task_queues != NULL, "invariant");
3733 assert(_task_queue != NULL, "invariant");
3734 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3736 assert(!_claimed,
3737 "only one thread should claim this task at any one time");
3739 // OK, this doesn't safeguard again all possible scenarios, as it is
3740 // possible for two threads to set the _claimed flag at the same
3741 // time. But it is only for debugging purposes anyway and it will
3742 // catch most problems.
3743 _claimed = true;
3745 _start_time_ms = os::elapsedVTime() * 1000.0;
3746 statsOnly( _interval_start_time_ms = _start_time_ms );
3748 double diff_prediction_ms =
3749 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3750 _time_target_ms = time_target_ms - diff_prediction_ms;
3752 // set up the variables that are used in the work-based scheme to
3753 // call the regular clock method
3754 _words_scanned = 0;
3755 _refs_reached = 0;
3756 recalculate_limits();
3758 // clear all flags
3759 clear_has_aborted();
3760 _has_aborted_timed_out = false;
3761 _draining_satb_buffers = false;
3763 ++_calls;
3765 if (_cm->verbose_low())
3766 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3767 "target = %1.2lfms >>>>>>>>>>",
3768 _task_id, _calls, _time_target_ms);
3770 // Set up the bitmap and oop closures. Anything that uses them is
3771 // eventually called from this method, so it is OK to allocate these
3772 // statically.
3773 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3774 CMOopClosure oop_closure(_g1h, _cm, this);
3775 set_oop_closure(&oop_closure);
3777 if (_cm->has_overflown()) {
3778 // This can happen if the region stack or the mark stack overflows
3779 // during a GC pause and this task, after a yield point,
3780 // restarts. We have to abort as we need to get into the overflow
3781 // protocol which happens right at the end of this task.
3782 set_has_aborted();
3783 }
3785 // First drain any available SATB buffers. After this, we will not
3786 // look at SATB buffers before the next invocation of this method.
3787 // If enough completed SATB buffers are queued up, the regular clock
3788 // will abort this task so that it restarts.
3789 drain_satb_buffers();
3790 // ...then partially drain the local queue and the global stack
3791 drain_local_queue(true);
3792 drain_global_stack(true);
3794 // Then totally drain the region stack. We will not look at
3795 // it again before the next invocation of this method. Entries on
3796 // the region stack are only added during evacuation pauses, for
3797 // which we have to yield. When we do, we abort the task anyway so
3798 // it will look at the region stack again when it restarts.
3799 bitmap_closure.set_scanning_heap_region(false);
3800 drain_region_stack(&bitmap_closure);
3801 // ...then partially drain the local queue and the global stack
3802 drain_local_queue(true);
3803 drain_global_stack(true);
3805 do {
3806 if (!has_aborted() && _curr_region != NULL) {
3807 // This means that we're already holding on to a region.
3808 assert(_finger != NULL, "if region is not NULL, then the finger "
3809 "should not be NULL either");
3811 // We might have restarted this task after an evacuation pause
3812 // which might have evacuated the region we're holding on to
3813 // underneath our feet. Let's read its limit again to make sure
3814 // that we do not iterate over a region of the heap that
3815 // contains garbage (update_region_limit() will also move
3816 // _finger to the start of the region if it is found empty).
3817 update_region_limit();
3818 // We will start from _finger not from the start of the region,
3819 // as we might be restarting this task after aborting half-way
3820 // through scanning this region. In this case, _finger points to
3821 // the address where we last found a marked object. If this is a
3822 // fresh region, _finger points to start().
3823 MemRegion mr = MemRegion(_finger, _region_limit);
3825 if (_cm->verbose_low())
3826 gclog_or_tty->print_cr("[%d] we're scanning part "
3827 "["PTR_FORMAT", "PTR_FORMAT") "
3828 "of region "PTR_FORMAT,
3829 _task_id, _finger, _region_limit, _curr_region);
3831 // Let's iterate over the bitmap of the part of the
3832 // region that is left.
3833 bitmap_closure.set_scanning_heap_region(true);
3834 if (mr.is_empty() ||
3835 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3836 // We successfully completed iterating over the region. Now,
3837 // let's give up the region.
3838 giveup_current_region();
3839 regular_clock_call();
3840 } else {
3841 assert(has_aborted(), "currently the only way to do so");
3842 // The only way to abort the bitmap iteration is to return
3843 // false from the do_bit() method. However, inside the
3844 // do_bit() method we move the _finger to point to the
3845 // object currently being looked at. So, if we bail out, we
3846 // have definitely set _finger to something non-null.
3847 assert(_finger != NULL, "invariant");
3849 // Region iteration was actually aborted. So now _finger
3850 // points to the address of the object we last scanned. If we
3851 // leave it there, when we restart this task, we will rescan
3852 // the object. It is easy to avoid this. We move the finger by
3853 // enough to point to the next possible object header (the
3854 // bitmap knows by how much we need to move it as it knows its
3855 // granularity).
3856 assert(_finger < _region_limit, "invariant");
3857 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3858 // Check if bitmap iteration was aborted while scanning the last object
3859 if (new_finger >= _region_limit) {
3860 giveup_current_region();
3861 } else {
3862 move_finger_to(new_finger);
3863 }
3864 }
3865 }
3866 // At this point we have either completed iterating over the
3867 // region we were holding on to, or we have aborted.
3869 // We then partially drain the local queue and the global stack.
3870 // (Do we really need this?)
3871 drain_local_queue(true);
3872 drain_global_stack(true);
3874 // Read the note on the claim_region() method on why it might
3875 // return NULL with potentially more regions available for
3876 // claiming and why we have to check out_of_regions() to determine
3877 // whether we're done or not.
3878 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3879 // We are going to try to claim a new region. We should have
3880 // given up on the previous one.
3881 // Separated the asserts so that we know which one fires.
3882 assert(_curr_region == NULL, "invariant");
3883 assert(_finger == NULL, "invariant");
3884 assert(_region_limit == NULL, "invariant");
3885 if (_cm->verbose_low())
3886 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3887 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3888 if (claimed_region != NULL) {
3889 // Yes, we managed to claim one
3890 statsOnly( ++_regions_claimed );
3892 if (_cm->verbose_low())
3893 gclog_or_tty->print_cr("[%d] we successfully claimed "
3894 "region "PTR_FORMAT,
3895 _task_id, claimed_region);
3897 setup_for_region(claimed_region);
3898 assert(_curr_region == claimed_region, "invariant");
3899 }
3900 // It is important to call the regular clock here. It might take
3901 // a while to claim a region if, for example, we hit a large
3902 // block of empty regions. So we need to call the regular clock
3903 // method once round the loop to make sure it's called
3904 // frequently enough.
3905 regular_clock_call();
3906 }
3908 if (!has_aborted() && _curr_region == NULL) {
3909 assert(_cm->out_of_regions(),
3910 "at this point we should be out of regions");
3911 }
3912 } while ( _curr_region != NULL && !has_aborted());
3914 if (!has_aborted()) {
3915 // We cannot check whether the global stack is empty, since other
3916 // tasks might be pushing objects to it concurrently. We also cannot
3917 // check if the region stack is empty because if a thread is aborting
3918 // it can push a partially done region back.
3919 assert(_cm->out_of_regions(),
3920 "at this point we should be out of regions");
3922 if (_cm->verbose_low())
3923 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3925 // Try to reduce the number of available SATB buffers so that
3926 // remark has less work to do.
3927 drain_satb_buffers();
3928 }
3930 // Since we've done everything else, we can now totally drain the
3931 // local queue and global stack.
3932 drain_local_queue(false);
3933 drain_global_stack(false);
3935 // Attempt at work stealing from other task's queues.
3936 if (!has_aborted()) {
3937 // We have not aborted. This means that we have finished all that
3938 // we could. Let's try to do some stealing...
3940 // We cannot check whether the global stack is empty, since other
3941 // tasks might be pushing objects to it concurrently. We also cannot
3942 // check if the region stack is empty because if a thread is aborting
3943 // it can push a partially done region back.
3944 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3945 "only way to reach here");
3947 if (_cm->verbose_low())
3948 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3950 while (!has_aborted()) {
3951 oop obj;
3952 statsOnly( ++_steal_attempts );
3954 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3955 if (_cm->verbose_medium())
3956 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3957 _task_id, (void*) obj);
3959 statsOnly( ++_steals );
3961 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3962 "any stolen object should be marked");
3963 scan_object(obj);
3965 // And since we're towards the end, let's totally drain the
3966 // local queue and global stack.
3967 drain_local_queue(false);
3968 drain_global_stack(false);
3969 } else {
3970 break;
3971 }
3972 }
3973 }
3975 // We still haven't aborted. Now, let's try to get into the
3976 // termination protocol.
3977 if (!has_aborted()) {
3978 // We cannot check whether the global stack is empty, since other
3979 // tasks might be concurrently pushing objects on it. We also cannot
3980 // check if the region stack is empty because if a thread is aborting
3981 // it can push a partially done region back.
3982 // Separated the asserts so that we know which one fires.
3983 assert(_cm->out_of_regions(), "only way to reach here");
3984 assert(_task_queue->size() == 0, "only way to reach here");
3986 if (_cm->verbose_low())
3987 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3989 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3990 // The CMTask class also extends the TerminatorTerminator class,
3991 // hence its should_exit_termination() method will also decide
3992 // whether to exit the termination protocol or not.
3993 bool finished = _cm->terminator()->offer_termination(this);
3994 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3995 _termination_time_ms +=
3996 termination_end_time_ms - _termination_start_time_ms;
3998 if (finished) {
3999 // We're all done.
4001 if (_task_id == 0) {
4002 // let's allow task 0 to do this
4003 if (concurrent()) {
4004 assert(_cm->concurrent_marking_in_progress(), "invariant");
4005 // we need to set this to false before the next
4006 // safepoint. This way we ensure that the marking phase
4007 // doesn't observe any more heap expansions.
4008 _cm->clear_concurrent_marking_in_progress();
4009 }
4010 }
4012 // We can now guarantee that the global stack is empty, since
4013 // all other tasks have finished. We separated the guarantees so
4014 // that, if a condition is false, we can immediately find out
4015 // which one.
4016 guarantee(_cm->out_of_regions(), "only way to reach here");
4017 guarantee(_aborted_region.is_empty(), "only way to reach here");
4018 guarantee(_cm->region_stack_empty(), "only way to reach here");
4019 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4020 guarantee(_task_queue->size() == 0, "only way to reach here");
4021 guarantee(!_cm->has_overflown(), "only way to reach here");
4022 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4023 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4025 if (_cm->verbose_low())
4026 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4027 } else {
4028 // Apparently there's more work to do. Let's abort this task. It
4029 // will restart it and we can hopefully find more things to do.
4031 if (_cm->verbose_low())
4032 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4034 set_has_aborted();
4035 statsOnly( ++_aborted_termination );
4036 }
4037 }
4039 // Mainly for debugging purposes to make sure that a pointer to the
4040 // closure which was statically allocated in this frame doesn't
4041 // escape it by accident.
4042 set_oop_closure(NULL);
4043 double end_time_ms = os::elapsedVTime() * 1000.0;
4044 double elapsed_time_ms = end_time_ms - _start_time_ms;
4045 // Update the step history.
4046 _step_times_ms.add(elapsed_time_ms);
4048 if (has_aborted()) {
4049 // The task was aborted for some reason.
4051 statsOnly( ++_aborted );
4053 if (_has_aborted_timed_out) {
4054 double diff_ms = elapsed_time_ms - _time_target_ms;
4055 // Keep statistics of how well we did with respect to hitting
4056 // our target only if we actually timed out (if we aborted for
4057 // other reasons, then the results might get skewed).
4058 _marking_step_diffs_ms.add(diff_ms);
4059 }
4061 if (_cm->has_overflown()) {
4062 // This is the interesting one. We aborted because a global
4063 // overflow was raised. This means we have to restart the
4064 // marking phase and start iterating over regions. However, in
4065 // order to do this we have to make sure that all tasks stop
4066 // what they are doing and re-initialise in a safe manner. We
4067 // will achieve this with the use of two barrier sync points.
4069 if (_cm->verbose_low())
4070 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4072 _cm->enter_first_sync_barrier(_task_id);
4073 // When we exit this sync barrier we know that all tasks have
4074 // stopped doing marking work. So, it's now safe to
4075 // re-initialise our data structures. At the end of this method,
4076 // task 0 will clear the global data structures.
4078 statsOnly( ++_aborted_overflow );
4080 // We clear the local state of this task...
4081 clear_region_fields();
4083 // ...and enter the second barrier.
4084 _cm->enter_second_sync_barrier(_task_id);
4085 // At this point everything has bee re-initialised and we're
4086 // ready to restart.
4087 }
4089 if (_cm->verbose_low()) {
4090 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4091 "elapsed = %1.2lfms <<<<<<<<<<",
4092 _task_id, _time_target_ms, elapsed_time_ms);
4093 if (_cm->has_aborted())
4094 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4095 _task_id);
4096 }
4097 } else {
4098 if (_cm->verbose_low())
4099 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4100 "elapsed = %1.2lfms <<<<<<<<<<",
4101 _task_id, _time_target_ms, elapsed_time_ms);
4102 }
4104 _claimed = false;
4105 }
4107 CMTask::CMTask(int task_id,
4108 ConcurrentMark* cm,
4109 CMTaskQueue* task_queue,
4110 CMTaskQueueSet* task_queues)
4111 : _g1h(G1CollectedHeap::heap()),
4112 _task_id(task_id), _cm(cm),
4113 _claimed(false),
4114 _nextMarkBitMap(NULL), _hash_seed(17),
4115 _task_queue(task_queue),
4116 _task_queues(task_queues),
4117 _oop_closure(NULL),
4118 _aborted_region(MemRegion()) {
4119 guarantee(task_queue != NULL, "invariant");
4120 guarantee(task_queues != NULL, "invariant");
4122 statsOnly( _clock_due_to_scanning = 0;
4123 _clock_due_to_marking = 0 );
4125 _marking_step_diffs_ms.add(0.5);
4126 }