Wed, 25 Mar 2009 13:10:54 -0700
6543938: G1: remove the concept of popularity
Reviewed-by: iveresov, tonyp
1 /*
2 * Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
421 _has_overflown(false),
422 _concurrent(false),
423 _has_aborted(false),
424 _restart_for_overflow(false),
425 _concurrent_marking_in_progress(false),
426 _should_gray_objects(false),
428 // _verbose_level set below
430 _init_times(),
431 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
432 _cleanup_times(),
433 _total_counting_time(0.0),
434 _total_rs_scrub_time(0.0),
436 _parallel_workers(NULL),
437 _cleanup_co_tracker(G1CLGroup)
438 {
439 CMVerboseLevel verbose_level =
440 (CMVerboseLevel) G1MarkingVerboseLevel;
441 if (verbose_level < no_verbose)
442 verbose_level = no_verbose;
443 if (verbose_level > high_verbose)
444 verbose_level = high_verbose;
445 _verbose_level = verbose_level;
447 if (verbose_low())
448 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
449 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
451 _markStack.allocate(G1CMStackSize);
452 _regionStack.allocate(G1CMRegionStackSize);
454 // Create & start a ConcurrentMark thread.
455 if (G1ConcMark) {
456 _cmThread = new ConcurrentMarkThread(this);
457 assert(cmThread() != NULL, "CM Thread should have been created");
458 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
459 } else {
460 _cmThread = NULL;
461 }
462 _g1h = G1CollectedHeap::heap();
463 assert(CGC_lock != NULL, "Where's the CGC_lock?");
464 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
465 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
467 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
468 satb_qs.set_buffer_size(G1SATBLogBufferSize);
470 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
471 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
472 for (int i = 0 ; i < size; i++) {
473 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
474 }
476 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
477 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
479 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
480 _active_tasks = _max_task_num;
481 for (int i = 0; i < (int) _max_task_num; ++i) {
482 CMTaskQueue* task_queue = new CMTaskQueue();
483 task_queue->initialize();
484 _task_queues->register_queue(i, task_queue);
486 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
487 _accum_task_vtime[i] = 0.0;
488 }
490 if (ParallelMarkingThreads > ParallelGCThreads) {
491 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
492 "than ParallelGCThreads.");
493 }
494 if (ParallelGCThreads == 0) {
495 // if we are not running with any parallel GC threads we will not
496 // spawn any marking threads either
497 _parallel_marking_threads = 0;
498 _sleep_factor = 0.0;
499 _marking_task_overhead = 1.0;
500 } else {
501 if (ParallelMarkingThreads > 0) {
502 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc
503 // if both are set
505 _parallel_marking_threads = ParallelMarkingThreads;
506 _sleep_factor = 0.0;
507 _marking_task_overhead = 1.0;
508 } else if (G1MarkingOverheadPerc > 0) {
509 // we will calculate the number of parallel marking threads
510 // based on a target overhead with respect to the soft real-time
511 // goal
513 double marking_overhead = (double) G1MarkingOverheadPerc / 100.0;
514 double overall_cm_overhead =
515 (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS;
516 double cpu_ratio = 1.0 / (double) os::processor_count();
517 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
518 double marking_task_overhead =
519 overall_cm_overhead / marking_thread_num *
520 (double) os::processor_count();
521 double sleep_factor =
522 (1.0 - marking_task_overhead) / marking_task_overhead;
524 _parallel_marking_threads = (size_t) marking_thread_num;
525 _sleep_factor = sleep_factor;
526 _marking_task_overhead = marking_task_overhead;
527 } else {
528 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
529 _sleep_factor = 0.0;
530 _marking_task_overhead = 1.0;
531 }
533 if (parallel_marking_threads() > 1)
534 _cleanup_task_overhead = 1.0;
535 else
536 _cleanup_task_overhead = marking_task_overhead();
537 _cleanup_sleep_factor =
538 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
540 #if 0
541 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
542 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
543 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
544 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
545 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
546 #endif
548 guarantee( parallel_marking_threads() > 0, "peace of mind" );
549 _parallel_workers = new WorkGang("Parallel Marking Threads",
550 (int) parallel_marking_threads(), false, true);
551 if (_parallel_workers == NULL)
552 vm_exit_during_initialization("Failed necessary allocation.");
553 }
555 // so that the call below can read a sensible value
556 _heap_start = (HeapWord*) rs.base();
557 set_non_marking_state();
558 }
560 void ConcurrentMark::update_g1_committed(bool force) {
561 // If concurrent marking is not in progress, then we do not need to
562 // update _heap_end. This has a subtle and important
563 // side-effect. Imagine that two evacuation pauses happen between
564 // marking completion and remark. The first one can grow the
565 // heap (hence now the finger is below the heap end). Then, the
566 // second one could unnecessarily push regions on the region
567 // stack. This causes the invariant that the region stack is empty
568 // at the beginning of remark to be false. By ensuring that we do
569 // not observe heap expansions after marking is complete, then we do
570 // not have this problem.
571 if (!concurrent_marking_in_progress() && !force)
572 return;
574 MemRegion committed = _g1h->g1_committed();
575 tmp_guarantee_CM( committed.start() == _heap_start,
576 "start shouldn't change" );
577 HeapWord* new_end = committed.end();
578 if (new_end > _heap_end) {
579 // The heap has been expanded.
581 _heap_end = new_end;
582 }
583 // Notice that the heap can also shrink. However, this only happens
584 // during a Full GC (at least currently) and the entire marking
585 // phase will bail out and the task will not be restarted. So, let's
586 // do nothing.
587 }
589 void ConcurrentMark::reset() {
590 // Starting values for these two. This should be called in a STW
591 // phase. CM will be notified of any future g1_committed expansions
592 // will be at the end of evacuation pauses, when tasks are
593 // inactive.
594 MemRegion committed = _g1h->g1_committed();
595 _heap_start = committed.start();
596 _heap_end = committed.end();
598 guarantee( _heap_start != NULL &&
599 _heap_end != NULL &&
600 _heap_start < _heap_end, "heap bounds should look ok" );
602 // reset all the marking data structures and any necessary flags
603 clear_marking_state();
605 if (verbose_low())
606 gclog_or_tty->print_cr("[global] resetting");
608 // We do reset all of them, since different phases will use
609 // different number of active threads. So, it's easiest to have all
610 // of them ready.
611 for (int i = 0; i < (int) _max_task_num; ++i)
612 _tasks[i]->reset(_nextMarkBitMap);
614 // we need this to make sure that the flag is on during the evac
615 // pause with initial mark piggy-backed
616 set_concurrent_marking_in_progress();
617 }
619 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
620 guarantee( active_tasks <= _max_task_num, "we should not have more" );
622 _active_tasks = active_tasks;
623 // Need to update the three data structures below according to the
624 // number of active threads for this phase.
625 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
626 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
627 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
629 _concurrent = concurrent;
630 // We propagate this to all tasks, not just the active ones.
631 for (int i = 0; i < (int) _max_task_num; ++i)
632 _tasks[i]->set_concurrent(concurrent);
634 if (concurrent) {
635 set_concurrent_marking_in_progress();
636 } else {
637 // We currently assume that the concurrent flag has been set to
638 // false before we start remark. At this point we should also be
639 // in a STW phase.
640 guarantee( !concurrent_marking_in_progress(), "invariant" );
641 guarantee( _finger == _heap_end, "only way to get here" );
642 update_g1_committed(true);
643 }
644 }
646 void ConcurrentMark::set_non_marking_state() {
647 // We set the global marking state to some default values when we're
648 // not doing marking.
649 clear_marking_state();
650 _active_tasks = 0;
651 clear_concurrent_marking_in_progress();
652 }
654 ConcurrentMark::~ConcurrentMark() {
655 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
656 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
657 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
658 _par_cleanup_thread_state);
660 for (int i = 0; i < (int) _max_task_num; ++i) {
661 delete _task_queues->queue(i);
662 delete _tasks[i];
663 }
664 delete _task_queues;
665 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
666 }
668 // This closure is used to mark refs into the g1 generation
669 // from external roots in the CMS bit map.
670 // Called at the first checkpoint.
671 //
673 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
674 #if PRINT_REACHABLE_AT_INITIAL_MARK
675 static FILE* reachable_file = NULL;
677 class PrintReachableClosure: public OopsInGenClosure {
678 CMBitMap* _bm;
679 int _level;
680 public:
681 PrintReachableClosure(CMBitMap* bm) :
682 _bm(bm), _level(0) {
683 guarantee(reachable_file != NULL, "pre-condition");
684 }
685 void do_oop(oop* p) {
686 oop obj = *p;
687 HeapWord* obj_addr = (HeapWord*)obj;
688 if (obj == NULL) return;
689 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
690 _level, p, (void*) obj, _bm->isMarked(obj_addr));
691 if (!_bm->isMarked(obj_addr)) {
692 _bm->mark(obj_addr);
693 _level++;
694 obj->oop_iterate(this);
695 _level--;
696 }
697 }
698 };
699 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
701 #define SEND_HEAP_DUMP_TO_FILE 0
702 #if SEND_HEAP_DUMP_TO_FILE
703 static FILE* heap_dump_file = NULL;
704 #endif // SEND_HEAP_DUMP_TO_FILE
706 void ConcurrentMark::clearNextBitmap() {
707 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
709 // clear the mark bitmap (no grey objects to start with).
710 // We need to do this in chunks and offer to yield in between
711 // each chunk.
712 HeapWord* start = _nextMarkBitMap->startWord();
713 HeapWord* end = _nextMarkBitMap->endWord();
714 HeapWord* cur = start;
715 size_t chunkSize = M;
716 while (cur < end) {
717 HeapWord* next = cur + chunkSize;
718 if (next > end)
719 next = end;
720 MemRegion mr(cur,next);
721 _nextMarkBitMap->clearRange(mr);
722 cur = next;
723 do_yield_check();
724 }
725 }
727 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
728 public:
729 bool doHeapRegion(HeapRegion* r) {
730 if (!r->continuesHumongous()) {
731 r->note_start_of_marking(true);
732 }
733 return false;
734 }
735 };
737 void ConcurrentMark::checkpointRootsInitialPre() {
738 G1CollectedHeap* g1h = G1CollectedHeap::heap();
739 G1CollectorPolicy* g1p = g1h->g1_policy();
741 _has_aborted = false;
743 // Find all the reachable objects...
744 #if PRINT_REACHABLE_AT_INITIAL_MARK
745 guarantee(reachable_file == NULL, "Protocol");
746 char fn_buf[100];
747 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
748 reachable_file = fopen(fn_buf, "w");
749 // clear the mark bitmap (no grey objects to start with)
750 _nextMarkBitMap->clearAll();
751 PrintReachableClosure prcl(_nextMarkBitMap);
752 g1h->process_strong_roots(
753 false, // fake perm gen collection
754 SharedHeap::SO_AllClasses,
755 &prcl, // Regular roots
756 &prcl // Perm Gen Roots
757 );
758 // The root iteration above "consumed" dirty cards in the perm gen.
759 // Therefore, as a shortcut, we dirty all such cards.
760 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
761 fclose(reachable_file);
762 reachable_file = NULL;
763 // clear the mark bitmap again.
764 _nextMarkBitMap->clearAll();
765 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
766 COMPILER2_PRESENT(DerivedPointerTable::clear());
767 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
769 // Initialise marking structures. This has to be done in a STW phase.
770 reset();
771 }
773 class CMMarkRootsClosure: public OopsInGenClosure {
774 private:
775 ConcurrentMark* _cm;
776 G1CollectedHeap* _g1h;
777 bool _do_barrier;
779 public:
780 CMMarkRootsClosure(ConcurrentMark* cm,
781 G1CollectedHeap* g1h,
782 bool do_barrier) : _cm(cm), _g1h(g1h),
783 _do_barrier(do_barrier) { }
785 virtual void do_oop(narrowOop* p) {
786 guarantee(false, "NYI");
787 }
789 virtual void do_oop(oop* p) {
790 oop thisOop = *p;
791 if (thisOop != NULL) {
792 assert(thisOop->is_oop() || thisOop->mark() == NULL,
793 "expected an oop, possibly with mark word displaced");
794 HeapWord* addr = (HeapWord*)thisOop;
795 if (_g1h->is_in_g1_reserved(addr)) {
796 _cm->grayRoot(thisOop);
797 }
798 }
799 if (_do_barrier) {
800 assert(!_g1h->is_in_g1_reserved(p),
801 "Should be called on external roots");
802 do_barrier(p);
803 }
804 }
805 };
807 void ConcurrentMark::checkpointRootsInitialPost() {
808 G1CollectedHeap* g1h = G1CollectedHeap::heap();
810 // For each region note start of marking.
811 NoteStartOfMarkHRClosure startcl;
812 g1h->heap_region_iterate(&startcl);
814 // Start weak-reference discovery.
815 ReferenceProcessor* rp = g1h->ref_processor();
816 rp->verify_no_references_recorded();
817 rp->enable_discovery(); // enable ("weak") refs discovery
818 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
820 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
821 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
822 satb_mq_set.set_active_all_threads(true);
824 // update_g1_committed() will be called at the end of an evac pause
825 // when marking is on. So, it's also called at the end of the
826 // initial-mark pause to update the heap end, if the heap expands
827 // during it. No need to call it here.
829 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
831 size_t max_marking_threads =
832 MAX2((size_t) 1, parallel_marking_threads());
833 for (int i = 0; i < (int)_max_task_num; ++i) {
834 _tasks[i]->enable_co_tracker();
835 if (i < (int) max_marking_threads)
836 _tasks[i]->reset_co_tracker(marking_task_overhead());
837 else
838 _tasks[i]->reset_co_tracker(0.0);
839 }
840 }
842 // Checkpoint the roots into this generation from outside
843 // this generation. [Note this initial checkpoint need only
844 // be approximate -- we'll do a catch up phase subsequently.]
845 void ConcurrentMark::checkpointRootsInitial() {
846 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
847 G1CollectedHeap* g1h = G1CollectedHeap::heap();
849 double start = os::elapsedTime();
850 GCOverheadReporter::recordSTWStart(start);
852 // If there has not been a GC[n-1] since last GC[n] cycle completed,
853 // precede our marking with a collection of all
854 // younger generations to keep floating garbage to a minimum.
855 // YSR: we won't do this for now -- it's an optimization to be
856 // done post-beta.
858 // YSR: ignoring weak refs for now; will do at bug fixing stage
859 // EVM: assert(discoveredRefsAreClear());
862 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
863 g1p->record_concurrent_mark_init_start();
864 checkpointRootsInitialPre();
866 // YSR: when concurrent precleaning is in place, we'll
867 // need to clear the cached card table here
869 ResourceMark rm;
870 HandleMark hm;
872 g1h->ensure_parsability(false);
873 g1h->perm_gen()->save_marks();
875 CMMarkRootsClosure notOlder(this, g1h, false);
876 CMMarkRootsClosure older(this, g1h, true);
878 g1h->set_marking_started();
879 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
881 g1h->process_strong_roots(false, // fake perm gen collection
882 SharedHeap::SO_AllClasses,
883 ¬Older, // Regular roots
884 &older // Perm Gen Roots
885 );
886 checkpointRootsInitialPost();
888 // Statistics.
889 double end = os::elapsedTime();
890 _init_times.add((end - start) * 1000.0);
891 GCOverheadReporter::recordSTWEnd(end);
893 g1p->record_concurrent_mark_init_end();
894 }
896 /*
897 Notice that in the next two methods, we actually leave the STS
898 during the barrier sync and join it immediately afterwards. If we
899 do not do this, this then the following deadlock can occur: one
900 thread could be in the barrier sync code, waiting for the other
901 thread to also sync up, whereas another one could be trying to
902 yield, while also waiting for the other threads to sync up too.
904 Because the thread that does the sync barrier has left the STS, it
905 is possible to be suspended for a Full GC or an evacuation pause
906 could occur. This is actually safe, since the entering the sync
907 barrier is one of the last things do_marking_step() does, and it
908 doesn't manipulate any data structures afterwards.
909 */
911 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
912 if (verbose_low())
913 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
915 ConcurrentGCThread::stsLeave();
916 _first_overflow_barrier_sync.enter();
917 ConcurrentGCThread::stsJoin();
918 // at this point everyone should have synced up and not be doing any
919 // more work
921 if (verbose_low())
922 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
924 // let task 0 do this
925 if (task_num == 0) {
926 // task 0 is responsible for clearing the global data structures
927 clear_marking_state();
929 if (PrintGC) {
930 gclog_or_tty->date_stamp(PrintGCDateStamps);
931 gclog_or_tty->stamp(PrintGCTimeStamps);
932 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
933 }
934 }
936 // after this, each task should reset its own data structures then
937 // then go into the second barrier
938 }
940 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
941 if (verbose_low())
942 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
944 ConcurrentGCThread::stsLeave();
945 _second_overflow_barrier_sync.enter();
946 ConcurrentGCThread::stsJoin();
947 // at this point everything should be re-initialised and ready to go
949 if (verbose_low())
950 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
951 }
953 void ConcurrentMark::grayRoot(oop p) {
954 HeapWord* addr = (HeapWord*) p;
955 // We can't really check against _heap_start and _heap_end, since it
956 // is possible during an evacuation pause with piggy-backed
957 // initial-mark that the committed space is expanded during the
958 // pause without CM observing this change. So the assertions below
959 // is a bit conservative; but better than nothing.
960 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
961 "address should be within the heap bounds" );
963 if (!_nextMarkBitMap->isMarked(addr))
964 _nextMarkBitMap->parMark(addr);
965 }
967 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
968 // The objects on the region have already been marked "in bulk" by
969 // the caller. We only need to decide whether to push the region on
970 // the region stack or not.
972 if (!concurrent_marking_in_progress() || !_should_gray_objects)
973 // We're done with marking and waiting for remark. We do not need to
974 // push anything else on the region stack.
975 return;
977 HeapWord* finger = _finger;
979 if (verbose_low())
980 gclog_or_tty->print_cr("[global] attempting to push "
981 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
982 PTR_FORMAT, mr.start(), mr.end(), finger);
984 if (mr.start() < finger) {
985 // The finger is always heap region aligned and it is not possible
986 // for mr to span heap regions.
987 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
989 tmp_guarantee_CM( mr.start() <= mr.end() &&
990 _heap_start <= mr.start() &&
991 mr.end() <= _heap_end,
992 "region boundaries should fall within the committed space" );
993 if (verbose_low())
994 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
995 "below the finger, pushing it",
996 mr.start(), mr.end());
998 if (!region_stack_push(mr)) {
999 if (verbose_low())
1000 gclog_or_tty->print_cr("[global] region stack has overflown.");
1001 }
1002 }
1003 }
1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006 // The object is not marked by the caller. We need to at least mark
1007 // it and maybe push in on the stack.
1009 HeapWord* addr = (HeapWord*)p;
1010 if (!_nextMarkBitMap->isMarked(addr)) {
1011 // We definitely need to mark it, irrespective whether we bail out
1012 // because we're done with marking.
1013 if (_nextMarkBitMap->parMark(addr)) {
1014 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1015 // If we're done with concurrent marking and we're waiting for
1016 // remark, then we're not pushing anything on the stack.
1017 return;
1019 // No OrderAccess:store_load() is needed. It is implicit in the
1020 // CAS done in parMark(addr) above
1021 HeapWord* finger = _finger;
1023 if (addr < finger) {
1024 if (!mark_stack_push(oop(addr))) {
1025 if (verbose_low())
1026 gclog_or_tty->print_cr("[global] global stack overflow "
1027 "during parMark");
1028 }
1029 }
1030 }
1031 }
1032 }
1034 class CMConcurrentMarkingTask: public AbstractGangTask {
1035 private:
1036 ConcurrentMark* _cm;
1037 ConcurrentMarkThread* _cmt;
1039 public:
1040 void work(int worker_i) {
1041 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1042 "this should only be done by a conc GC thread" );
1044 double start_vtime = os::elapsedVTime();
1046 ConcurrentGCThread::stsJoin();
1048 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1049 CMTask* the_task = _cm->task(worker_i);
1050 the_task->start_co_tracker();
1051 the_task->record_start_time();
1052 if (!_cm->has_aborted()) {
1053 do {
1054 double start_vtime_sec = os::elapsedVTime();
1055 double start_time_sec = os::elapsedTime();
1056 the_task->do_marking_step(10.0);
1057 double end_time_sec = os::elapsedTime();
1058 double end_vtime_sec = os::elapsedVTime();
1059 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1060 double elapsed_time_sec = end_time_sec - start_time_sec;
1061 _cm->clear_has_overflown();
1063 bool ret = _cm->do_yield_check(worker_i);
1065 jlong sleep_time_ms;
1066 if (!_cm->has_aborted() && the_task->has_aborted()) {
1067 sleep_time_ms =
1068 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1069 ConcurrentGCThread::stsLeave();
1070 os::sleep(Thread::current(), sleep_time_ms, false);
1071 ConcurrentGCThread::stsJoin();
1072 }
1073 double end_time2_sec = os::elapsedTime();
1074 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1076 the_task->update_co_tracker();
1078 #if 0
1079 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1080 "overhead %1.4lf",
1081 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1082 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1083 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1084 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1085 #endif
1086 } while (!_cm->has_aborted() && the_task->has_aborted());
1087 }
1088 the_task->record_end_time();
1089 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1091 ConcurrentGCThread::stsLeave();
1093 double end_vtime = os::elapsedVTime();
1094 the_task->update_co_tracker(true);
1095 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1096 }
1098 CMConcurrentMarkingTask(ConcurrentMark* cm,
1099 ConcurrentMarkThread* cmt) :
1100 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1102 ~CMConcurrentMarkingTask() { }
1103 };
1105 void ConcurrentMark::markFromRoots() {
1106 // we might be tempted to assert that:
1107 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1108 // "inconsistent argument?");
1109 // However that wouldn't be right, because it's possible that
1110 // a safepoint is indeed in progress as a younger generation
1111 // stop-the-world GC happens even as we mark in this generation.
1113 _restart_for_overflow = false;
1115 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1117 CMConcurrentMarkingTask markingTask(this, cmThread());
1118 if (parallel_marking_threads() > 0)
1119 _parallel_workers->run_task(&markingTask);
1120 else
1121 markingTask.work(0);
1122 print_stats();
1123 }
1125 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1126 // world is stopped at this checkpoint
1127 assert(SafepointSynchronize::is_at_safepoint(),
1128 "world should be stopped");
1129 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1131 // If a full collection has happened, we shouldn't do this.
1132 if (has_aborted()) {
1133 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1134 return;
1135 }
1137 G1CollectorPolicy* g1p = g1h->g1_policy();
1138 g1p->record_concurrent_mark_remark_start();
1140 double start = os::elapsedTime();
1141 GCOverheadReporter::recordSTWStart(start);
1143 checkpointRootsFinalWork();
1145 double mark_work_end = os::elapsedTime();
1147 weakRefsWork(clear_all_soft_refs);
1149 if (has_overflown()) {
1150 // Oops. We overflowed. Restart concurrent marking.
1151 _restart_for_overflow = true;
1152 // Clear the flag. We do not need it any more.
1153 clear_has_overflown();
1154 if (G1TraceMarkStackOverflow)
1155 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1156 } else {
1157 // We're done with marking.
1158 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1159 }
1161 #if VERIFY_OBJS_PROCESSED
1162 _scan_obj_cl.objs_processed = 0;
1163 ThreadLocalObjQueue::objs_enqueued = 0;
1164 #endif
1166 // Statistics
1167 double now = os::elapsedTime();
1168 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1169 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1170 _remark_times.add((now - start) * 1000.0);
1172 GCOverheadReporter::recordSTWEnd(now);
1173 for (int i = 0; i < (int)_max_task_num; ++i)
1174 _tasks[i]->disable_co_tracker();
1175 _cleanup_co_tracker.enable();
1176 _cleanup_co_tracker.reset(cleanup_task_overhead());
1177 g1p->record_concurrent_mark_remark_end();
1178 }
1181 #define CARD_BM_TEST_MODE 0
1183 class CalcLiveObjectsClosure: public HeapRegionClosure {
1185 CMBitMapRO* _bm;
1186 ConcurrentMark* _cm;
1187 COTracker* _co_tracker;
1188 bool _changed;
1189 bool _yield;
1190 size_t _words_done;
1191 size_t _tot_live;
1192 size_t _tot_used;
1193 size_t _regions_done;
1194 double _start_vtime_sec;
1196 BitMap* _region_bm;
1197 BitMap* _card_bm;
1198 intptr_t _bottom_card_num;
1199 bool _final;
1201 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1202 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1203 #if CARD_BM_TEST_MODE
1204 guarantee(_card_bm->at(i - _bottom_card_num),
1205 "Should already be set.");
1206 #else
1207 _card_bm->par_at_put(i - _bottom_card_num, 1);
1208 #endif
1209 }
1210 }
1212 public:
1213 CalcLiveObjectsClosure(bool final,
1214 CMBitMapRO *bm, ConcurrentMark *cm,
1215 BitMap* region_bm, BitMap* card_bm,
1216 COTracker* co_tracker) :
1217 _bm(bm), _cm(cm), _changed(false), _yield(true),
1218 _words_done(0), _tot_live(0), _tot_used(0),
1219 _region_bm(region_bm), _card_bm(card_bm),
1220 _final(final), _co_tracker(co_tracker),
1221 _regions_done(0), _start_vtime_sec(0.0)
1222 {
1223 _bottom_card_num =
1224 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1225 CardTableModRefBS::card_shift);
1226 }
1228 bool doHeapRegion(HeapRegion* hr) {
1229 if (_co_tracker != NULL)
1230 _co_tracker->update();
1232 if (!_final && _regions_done == 0)
1233 _start_vtime_sec = os::elapsedVTime();
1235 if (hr->continuesHumongous()) {
1236 HeapRegion* hum_start = hr->humongous_start_region();
1237 // If the head region of the humongous region has been determined
1238 // to be alive, then all the tail regions should be marked
1239 // such as well.
1240 if (_region_bm->at(hum_start->hrs_index())) {
1241 _region_bm->par_at_put(hr->hrs_index(), 1);
1242 }
1243 return false;
1244 }
1246 HeapWord* nextTop = hr->next_top_at_mark_start();
1247 HeapWord* start = hr->top_at_conc_mark_count();
1248 assert(hr->bottom() <= start && start <= hr->end() &&
1249 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1250 start <= nextTop,
1251 "Preconditions.");
1252 // Otherwise, record the number of word's we'll examine.
1253 size_t words_done = (nextTop - start);
1254 // Find the first marked object at or after "start".
1255 start = _bm->getNextMarkedWordAddress(start, nextTop);
1256 size_t marked_bytes = 0;
1258 // Below, the term "card num" means the result of shifting an address
1259 // by the card shift -- address 0 corresponds to card number 0. One
1260 // must subtract the card num of the bottom of the heap to obtain a
1261 // card table index.
1262 // The first card num of the sequence of live cards currently being
1263 // constructed. -1 ==> no sequence.
1264 intptr_t start_card_num = -1;
1265 // The last card num of the sequence of live cards currently being
1266 // constructed. -1 ==> no sequence.
1267 intptr_t last_card_num = -1;
1269 while (start < nextTop) {
1270 if (_yield && _cm->do_yield_check()) {
1271 // We yielded. It might be for a full collection, in which case
1272 // all bets are off; terminate the traversal.
1273 if (_cm->has_aborted()) {
1274 _changed = false;
1275 return true;
1276 } else {
1277 // Otherwise, it might be a collection pause, and the region
1278 // we're looking at might be in the collection set. We'll
1279 // abandon this region.
1280 return false;
1281 }
1282 }
1283 oop obj = oop(start);
1284 int obj_sz = obj->size();
1285 // The card num of the start of the current object.
1286 intptr_t obj_card_num =
1287 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1289 HeapWord* obj_last = start + obj_sz - 1;
1290 intptr_t obj_last_card_num =
1291 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1293 if (obj_card_num != last_card_num) {
1294 if (start_card_num == -1) {
1295 assert(last_card_num == -1, "Both or neither.");
1296 start_card_num = obj_card_num;
1297 } else {
1298 assert(last_card_num != -1, "Both or neither.");
1299 assert(obj_card_num >= last_card_num, "Inv");
1300 if ((obj_card_num - last_card_num) > 1) {
1301 // Mark the last run, and start a new one.
1302 mark_card_num_range(start_card_num, last_card_num);
1303 start_card_num = obj_card_num;
1304 }
1305 }
1306 #if CARD_BM_TEST_MODE
1307 /*
1308 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1309 obj_card_num - _bottom_card_num,
1310 obj_last_card_num - _bottom_card_num);
1311 */
1312 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1313 _card_bm->par_at_put(j - _bottom_card_num, 1);
1314 }
1315 #endif
1316 }
1317 // In any case, we set the last card num.
1318 last_card_num = obj_last_card_num;
1320 marked_bytes += obj_sz * HeapWordSize;
1321 // Find the next marked object after this one.
1322 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1323 _changed = true;
1324 }
1325 // Handle the last range, if any.
1326 if (start_card_num != -1)
1327 mark_card_num_range(start_card_num, last_card_num);
1328 if (_final) {
1329 // Mark the allocated-since-marking portion...
1330 HeapWord* tp = hr->top();
1331 if (nextTop < tp) {
1332 start_card_num =
1333 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1334 last_card_num =
1335 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1336 mark_card_num_range(start_card_num, last_card_num);
1337 // This definitely means the region has live objects.
1338 _region_bm->par_at_put(hr->hrs_index(), 1);
1339 }
1340 }
1342 hr->add_to_marked_bytes(marked_bytes);
1343 // Update the live region bitmap.
1344 if (marked_bytes > 0) {
1345 _region_bm->par_at_put(hr->hrs_index(), 1);
1346 }
1347 hr->set_top_at_conc_mark_count(nextTop);
1348 _tot_live += hr->next_live_bytes();
1349 _tot_used += hr->used();
1350 _words_done = words_done;
1352 if (!_final) {
1353 ++_regions_done;
1354 if (_regions_done % 10 == 0) {
1355 double end_vtime_sec = os::elapsedVTime();
1356 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1357 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1358 jlong sleep_time_ms =
1359 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1360 #if 0
1361 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1362 "overhead %1.4lf",
1363 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1364 _co_tracker->concOverhead(os::elapsedTime()));
1365 #endif
1366 os::sleep(Thread::current(), sleep_time_ms, false);
1367 _start_vtime_sec = end_vtime_sec;
1368 }
1369 }
1370 }
1372 return false;
1373 }
1375 bool changed() { return _changed; }
1376 void reset() { _changed = false; _words_done = 0; }
1377 void no_yield() { _yield = false; }
1378 size_t words_done() { return _words_done; }
1379 size_t tot_live() { return _tot_live; }
1380 size_t tot_used() { return _tot_used; }
1381 };
1384 void ConcurrentMark::calcDesiredRegions() {
1385 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1386 _cleanup_co_tracker.start();
1388 _region_bm.clear();
1389 _card_bm.clear();
1390 CalcLiveObjectsClosure calccl(false /*final*/,
1391 nextMarkBitMap(), this,
1392 &_region_bm, &_card_bm,
1393 &_cleanup_co_tracker);
1394 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1395 g1h->heap_region_iterate(&calccl);
1397 do {
1398 calccl.reset();
1399 g1h->heap_region_iterate(&calccl);
1400 } while (calccl.changed());
1402 _cleanup_co_tracker.update(true);
1403 }
1405 class G1ParFinalCountTask: public AbstractGangTask {
1406 protected:
1407 G1CollectedHeap* _g1h;
1408 CMBitMap* _bm;
1409 size_t _n_workers;
1410 size_t *_live_bytes;
1411 size_t *_used_bytes;
1412 BitMap* _region_bm;
1413 BitMap* _card_bm;
1414 public:
1415 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1416 BitMap* region_bm, BitMap* card_bm) :
1417 AbstractGangTask("G1 final counting"), _g1h(g1h),
1418 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1419 {
1420 if (ParallelGCThreads > 0)
1421 _n_workers = _g1h->workers()->total_workers();
1422 else
1423 _n_workers = 1;
1424 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1425 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1426 }
1428 ~G1ParFinalCountTask() {
1429 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1430 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1431 }
1433 void work(int i) {
1434 CalcLiveObjectsClosure calccl(true /*final*/,
1435 _bm, _g1h->concurrent_mark(),
1436 _region_bm, _card_bm,
1437 NULL /* CO tracker */);
1438 calccl.no_yield();
1439 if (ParallelGCThreads > 0) {
1440 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1441 HeapRegion::FinalCountClaimValue);
1442 } else {
1443 _g1h->heap_region_iterate(&calccl);
1444 }
1445 assert(calccl.complete(), "Shouldn't have yielded!");
1447 guarantee( (size_t)i < _n_workers, "invariant" );
1448 _live_bytes[i] = calccl.tot_live();
1449 _used_bytes[i] = calccl.tot_used();
1450 }
1451 size_t live_bytes() {
1452 size_t live_bytes = 0;
1453 for (size_t i = 0; i < _n_workers; ++i)
1454 live_bytes += _live_bytes[i];
1455 return live_bytes;
1456 }
1457 size_t used_bytes() {
1458 size_t used_bytes = 0;
1459 for (size_t i = 0; i < _n_workers; ++i)
1460 used_bytes += _used_bytes[i];
1461 return used_bytes;
1462 }
1463 };
1465 class G1ParNoteEndTask;
1467 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1468 G1CollectedHeap* _g1;
1469 int _worker_num;
1470 size_t _max_live_bytes;
1471 size_t _regions_claimed;
1472 size_t _freed_bytes;
1473 size_t _cleared_h_regions;
1474 size_t _freed_regions;
1475 UncleanRegionList* _unclean_region_list;
1476 double _claimed_region_time;
1477 double _max_region_time;
1479 public:
1480 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1481 UncleanRegionList* list,
1482 int worker_num);
1483 size_t freed_bytes() { return _freed_bytes; }
1484 size_t cleared_h_regions() { return _cleared_h_regions; }
1485 size_t freed_regions() { return _freed_regions; }
1486 UncleanRegionList* unclean_region_list() {
1487 return _unclean_region_list;
1488 }
1490 bool doHeapRegion(HeapRegion *r);
1492 size_t max_live_bytes() { return _max_live_bytes; }
1493 size_t regions_claimed() { return _regions_claimed; }
1494 double claimed_region_time_sec() { return _claimed_region_time; }
1495 double max_region_time_sec() { return _max_region_time; }
1496 };
1498 class G1ParNoteEndTask: public AbstractGangTask {
1499 friend class G1NoteEndOfConcMarkClosure;
1500 protected:
1501 G1CollectedHeap* _g1h;
1502 size_t _max_live_bytes;
1503 size_t _freed_bytes;
1504 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1505 public:
1506 G1ParNoteEndTask(G1CollectedHeap* g1h,
1507 ConcurrentMark::ParCleanupThreadState**
1508 par_cleanup_thread_state) :
1509 AbstractGangTask("G1 note end"), _g1h(g1h),
1510 _max_live_bytes(0), _freed_bytes(0),
1511 _par_cleanup_thread_state(par_cleanup_thread_state)
1512 {}
1514 void work(int i) {
1515 double start = os::elapsedTime();
1516 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1517 &_par_cleanup_thread_state[i]->list,
1518 i);
1519 if (ParallelGCThreads > 0) {
1520 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1521 HeapRegion::NoteEndClaimValue);
1522 } else {
1523 _g1h->heap_region_iterate(&g1_note_end);
1524 }
1525 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1527 // Now finish up freeing the current thread's regions.
1528 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1529 g1_note_end.cleared_h_regions(),
1530 0, NULL);
1531 {
1532 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1533 _max_live_bytes += g1_note_end.max_live_bytes();
1534 _freed_bytes += g1_note_end.freed_bytes();
1535 }
1536 double end = os::elapsedTime();
1537 if (G1PrintParCleanupStats) {
1538 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1539 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1540 i, start, end, (end-start)*1000.0,
1541 g1_note_end.regions_claimed(),
1542 g1_note_end.claimed_region_time_sec()*1000.0,
1543 g1_note_end.max_region_time_sec()*1000.0);
1544 }
1545 }
1546 size_t max_live_bytes() { return _max_live_bytes; }
1547 size_t freed_bytes() { return _freed_bytes; }
1548 };
1550 class G1ParScrubRemSetTask: public AbstractGangTask {
1551 protected:
1552 G1RemSet* _g1rs;
1553 BitMap* _region_bm;
1554 BitMap* _card_bm;
1555 public:
1556 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1557 BitMap* region_bm, BitMap* card_bm) :
1558 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1559 _region_bm(region_bm), _card_bm(card_bm)
1560 {}
1562 void work(int i) {
1563 if (ParallelGCThreads > 0) {
1564 _g1rs->scrub_par(_region_bm, _card_bm, i,
1565 HeapRegion::ScrubRemSetClaimValue);
1566 } else {
1567 _g1rs->scrub(_region_bm, _card_bm);
1568 }
1569 }
1571 };
1573 G1NoteEndOfConcMarkClosure::
1574 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1575 UncleanRegionList* list,
1576 int worker_num)
1577 : _g1(g1), _worker_num(worker_num),
1578 _max_live_bytes(0), _regions_claimed(0),
1579 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1580 _claimed_region_time(0.0), _max_region_time(0.0),
1581 _unclean_region_list(list)
1582 {}
1584 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1585 // We use a claim value of zero here because all regions
1586 // were claimed with value 1 in the FinalCount task.
1587 r->reset_gc_time_stamp();
1588 if (!r->continuesHumongous()) {
1589 double start = os::elapsedTime();
1590 _regions_claimed++;
1591 r->note_end_of_marking();
1592 _max_live_bytes += r->max_live_bytes();
1593 _g1->free_region_if_totally_empty_work(r,
1594 _freed_bytes,
1595 _cleared_h_regions,
1596 _freed_regions,
1597 _unclean_region_list,
1598 true /*par*/);
1599 double region_time = (os::elapsedTime() - start);
1600 _claimed_region_time += region_time;
1601 if (region_time > _max_region_time) _max_region_time = region_time;
1602 }
1603 return false;
1604 }
1606 void ConcurrentMark::cleanup() {
1607 // world is stopped at this checkpoint
1608 assert(SafepointSynchronize::is_at_safepoint(),
1609 "world should be stopped");
1610 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1612 // If a full collection has happened, we shouldn't do this.
1613 if (has_aborted()) {
1614 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1615 return;
1616 }
1618 _cleanup_co_tracker.disable();
1620 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1621 g1p->record_concurrent_mark_cleanup_start();
1623 double start = os::elapsedTime();
1624 GCOverheadReporter::recordSTWStart(start);
1626 // Do counting once more with the world stopped for good measure.
1627 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1628 &_region_bm, &_card_bm);
1629 if (ParallelGCThreads > 0) {
1630 assert(g1h->check_heap_region_claim_values(
1631 HeapRegion::InitialClaimValue),
1632 "sanity check");
1634 int n_workers = g1h->workers()->total_workers();
1635 g1h->set_par_threads(n_workers);
1636 g1h->workers()->run_task(&g1_par_count_task);
1637 g1h->set_par_threads(0);
1639 assert(g1h->check_heap_region_claim_values(
1640 HeapRegion::FinalCountClaimValue),
1641 "sanity check");
1642 } else {
1643 g1_par_count_task.work(0);
1644 }
1646 size_t known_garbage_bytes =
1647 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1648 #if 0
1649 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1650 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1651 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1652 (double) known_garbage_bytes / (double) (1024 * 1024));
1653 #endif // 0
1654 g1p->set_known_garbage_bytes(known_garbage_bytes);
1656 size_t start_used_bytes = g1h->used();
1657 _at_least_one_mark_complete = true;
1658 g1h->set_marking_complete();
1660 double count_end = os::elapsedTime();
1661 double this_final_counting_time = (count_end - start);
1662 if (G1PrintParCleanupStats) {
1663 gclog_or_tty->print_cr("Cleanup:");
1664 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1665 this_final_counting_time*1000.0);
1666 }
1667 _total_counting_time += this_final_counting_time;
1669 // Install newly created mark bitMap as "prev".
1670 swapMarkBitMaps();
1672 g1h->reset_gc_time_stamp();
1674 // Note end of marking in all heap regions.
1675 double note_end_start = os::elapsedTime();
1676 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1677 if (ParallelGCThreads > 0) {
1678 int n_workers = g1h->workers()->total_workers();
1679 g1h->set_par_threads(n_workers);
1680 g1h->workers()->run_task(&g1_par_note_end_task);
1681 g1h->set_par_threads(0);
1683 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1684 "sanity check");
1685 } else {
1686 g1_par_note_end_task.work(0);
1687 }
1688 g1h->set_unclean_regions_coming(true);
1689 double note_end_end = os::elapsedTime();
1690 // Tell the mutators that there might be unclean regions coming...
1691 if (G1PrintParCleanupStats) {
1692 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1693 (note_end_end - note_end_start)*1000.0);
1694 }
1697 // call below, since it affects the metric by which we sort the heap
1698 // regions.
1699 if (G1ScrubRemSets) {
1700 double rs_scrub_start = os::elapsedTime();
1701 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1702 if (ParallelGCThreads > 0) {
1703 int n_workers = g1h->workers()->total_workers();
1704 g1h->set_par_threads(n_workers);
1705 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1706 g1h->set_par_threads(0);
1708 assert(g1h->check_heap_region_claim_values(
1709 HeapRegion::ScrubRemSetClaimValue),
1710 "sanity check");
1711 } else {
1712 g1_par_scrub_rs_task.work(0);
1713 }
1715 double rs_scrub_end = os::elapsedTime();
1716 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1717 _total_rs_scrub_time += this_rs_scrub_time;
1718 }
1720 // this will also free any regions totally full of garbage objects,
1721 // and sort the regions.
1722 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1723 g1_par_note_end_task.freed_bytes(),
1724 g1_par_note_end_task.max_live_bytes());
1726 // Statistics.
1727 double end = os::elapsedTime();
1728 _cleanup_times.add((end - start) * 1000.0);
1729 GCOverheadReporter::recordSTWEnd(end);
1731 // G1CollectedHeap::heap()->print();
1732 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1733 // G1CollectedHeap::heap()->get_gc_time_stamp());
1735 if (PrintGC || PrintGCDetails) {
1736 g1h->print_size_transition(gclog_or_tty,
1737 start_used_bytes,
1738 g1h->used(),
1739 g1h->capacity());
1740 }
1742 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1743 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1745 // We need to make this be a "collection" so any collection pause that
1746 // races with it goes around and waits for completeCleanup to finish.
1747 g1h->increment_total_collections();
1749 #ifndef PRODUCT
1750 if (G1VerifyConcMark) {
1751 G1CollectedHeap::heap()->prepare_for_verify();
1752 G1CollectedHeap::heap()->verify(true,false);
1753 }
1754 #endif
1755 }
1757 void ConcurrentMark::completeCleanup() {
1758 // A full collection intervened.
1759 if (has_aborted()) return;
1761 int first = 0;
1762 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1763 for (int t = 0; t < last; t++) {
1764 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1765 assert(list->well_formed(), "Inv");
1766 HeapRegion* hd = list->hd();
1767 while (hd != NULL) {
1768 // Now finish up the other stuff.
1769 hd->rem_set()->clear();
1770 HeapRegion* next_hd = hd->next_from_unclean_list();
1771 (void)list->pop();
1772 guarantee(list->hd() == next_hd, "how not?");
1773 _g1h->put_region_on_unclean_list(hd);
1774 if (!hd->isHumongous()) {
1775 // Add this to the _free_regions count by 1.
1776 _g1h->finish_free_region_work(0, 0, 1, NULL);
1777 }
1778 hd = list->hd();
1779 guarantee(hd == next_hd, "how not?");
1780 }
1781 }
1782 }
1785 class G1CMIsAliveClosure: public BoolObjectClosure {
1786 G1CollectedHeap* _g1;
1787 public:
1788 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1789 _g1(g1)
1790 {}
1792 void do_object(oop obj) {
1793 assert(false, "not to be invoked");
1794 }
1795 bool do_object_b(oop obj) {
1796 HeapWord* addr = (HeapWord*)obj;
1797 return addr != NULL &&
1798 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1799 }
1800 };
1802 class G1CMKeepAliveClosure: public OopClosure {
1803 G1CollectedHeap* _g1;
1804 ConcurrentMark* _cm;
1805 CMBitMap* _bitMap;
1806 public:
1807 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1808 CMBitMap* bitMap) :
1809 _g1(g1), _cm(cm),
1810 _bitMap(bitMap) {}
1812 void do_oop(narrowOop* p) {
1813 guarantee(false, "NYI");
1814 }
1816 void do_oop(oop* p) {
1817 oop thisOop = *p;
1818 HeapWord* addr = (HeapWord*)thisOop;
1819 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1820 _bitMap->mark(addr);
1821 _cm->mark_stack_push(thisOop);
1822 }
1823 }
1824 };
1826 class G1CMDrainMarkingStackClosure: public VoidClosure {
1827 CMMarkStack* _markStack;
1828 CMBitMap* _bitMap;
1829 G1CMKeepAliveClosure* _oopClosure;
1830 public:
1831 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1832 G1CMKeepAliveClosure* oopClosure) :
1833 _bitMap(bitMap),
1834 _markStack(markStack),
1835 _oopClosure(oopClosure)
1836 {}
1838 void do_void() {
1839 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1840 }
1841 };
1843 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1844 ResourceMark rm;
1845 HandleMark hm;
1846 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1847 ReferenceProcessor* rp = g1h->ref_processor();
1849 // Process weak references.
1850 rp->setup_policy(clear_all_soft_refs);
1851 assert(_markStack.isEmpty(), "mark stack should be empty");
1853 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1854 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1855 G1CMDrainMarkingStackClosure
1856 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1857 &g1KeepAliveClosure);
1859 // XXXYYY Also: copy the parallel ref processing code from CMS.
1860 rp->process_discovered_references(&g1IsAliveClosure,
1861 &g1KeepAliveClosure,
1862 &g1DrainMarkingStackClosure,
1863 NULL);
1864 assert(_markStack.overflow() || _markStack.isEmpty(),
1865 "mark stack should be empty (unless it overflowed)");
1866 if (_markStack.overflow()) {
1867 set_has_overflown();
1868 }
1870 rp->enqueue_discovered_references();
1871 rp->verify_no_references_recorded();
1872 assert(!rp->discovery_enabled(), "should have been disabled");
1874 // Now clean up stale oops in SymbolTable and StringTable
1875 SymbolTable::unlink(&g1IsAliveClosure);
1876 StringTable::unlink(&g1IsAliveClosure);
1877 }
1879 void ConcurrentMark::swapMarkBitMaps() {
1880 CMBitMapRO* temp = _prevMarkBitMap;
1881 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1882 _nextMarkBitMap = (CMBitMap*) temp;
1883 }
1885 class CMRemarkTask: public AbstractGangTask {
1886 private:
1887 ConcurrentMark *_cm;
1889 public:
1890 void work(int worker_i) {
1891 // Since all available tasks are actually started, we should
1892 // only proceed if we're supposed to be actived.
1893 if ((size_t)worker_i < _cm->active_tasks()) {
1894 CMTask* task = _cm->task(worker_i);
1895 task->record_start_time();
1896 do {
1897 task->do_marking_step(1000000000.0 /* something very large */);
1898 } while (task->has_aborted() && !_cm->has_overflown());
1899 // If we overflow, then we do not want to restart. We instead
1900 // want to abort remark and do concurrent marking again.
1901 task->record_end_time();
1902 }
1903 }
1905 CMRemarkTask(ConcurrentMark* cm) :
1906 AbstractGangTask("Par Remark"), _cm(cm) { }
1907 };
1909 void ConcurrentMark::checkpointRootsFinalWork() {
1910 ResourceMark rm;
1911 HandleMark hm;
1912 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1914 g1h->ensure_parsability(false);
1916 if (ParallelGCThreads > 0) {
1917 g1h->change_strong_roots_parity();
1918 // this is remark, so we'll use up all available threads
1919 int active_workers = ParallelGCThreads;
1920 set_phase(active_workers, false);
1922 CMRemarkTask remarkTask(this);
1923 // We will start all available threads, even if we decide that the
1924 // active_workers will be fewer. The extra ones will just bail out
1925 // immediately.
1926 int n_workers = g1h->workers()->total_workers();
1927 g1h->set_par_threads(n_workers);
1928 g1h->workers()->run_task(&remarkTask);
1929 g1h->set_par_threads(0);
1931 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1932 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1933 } else {
1934 g1h->change_strong_roots_parity();
1935 // this is remark, so we'll use up all available threads
1936 int active_workers = 1;
1937 set_phase(active_workers, false);
1939 CMRemarkTask remarkTask(this);
1940 // We will start all available threads, even if we decide that the
1941 // active_workers will be fewer. The extra ones will just bail out
1942 // immediately.
1943 remarkTask.work(0);
1945 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1946 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1947 }
1949 print_stats();
1951 if (!restart_for_overflow())
1952 set_non_marking_state();
1954 #if VERIFY_OBJS_PROCESSED
1955 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1956 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1957 _scan_obj_cl.objs_processed,
1958 ThreadLocalObjQueue::objs_enqueued);
1959 guarantee(_scan_obj_cl.objs_processed ==
1960 ThreadLocalObjQueue::objs_enqueued,
1961 "Different number of objs processed and enqueued.");
1962 }
1963 #endif
1964 }
1966 class ReachablePrinterOopClosure: public OopClosure {
1967 private:
1968 G1CollectedHeap* _g1h;
1969 CMBitMapRO* _bitmap;
1970 outputStream* _out;
1972 public:
1973 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1974 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1976 void do_oop(narrowOop* p) {
1977 guarantee(false, "NYI");
1978 }
1980 void do_oop(oop* p) {
1981 oop obj = *p;
1982 const char* str = NULL;
1983 const char* str2 = "";
1985 if (!_g1h->is_in_g1_reserved(obj))
1986 str = "outside G1 reserved";
1987 else {
1988 HeapRegion* hr = _g1h->heap_region_containing(obj);
1989 guarantee( hr != NULL, "invariant" );
1990 if (hr->obj_allocated_since_prev_marking(obj)) {
1991 str = "over TAMS";
1992 if (_bitmap->isMarked((HeapWord*) obj))
1993 str2 = " AND MARKED";
1994 } else if (_bitmap->isMarked((HeapWord*) obj))
1995 str = "marked";
1996 else
1997 str = "#### NOT MARKED ####";
1998 }
2000 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2001 p, (void*) obj, str, str2);
2002 }
2003 };
2005 class ReachablePrinterClosure: public BitMapClosure {
2006 private:
2007 CMBitMapRO* _bitmap;
2008 outputStream* _out;
2010 public:
2011 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2012 _bitmap(bitmap), _out(out) { }
2014 bool do_bit(size_t offset) {
2015 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2016 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2018 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2019 oop(addr)->oop_iterate(&oopCl);
2020 _out->print_cr("");
2022 return true;
2023 }
2024 };
2026 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2027 private:
2028 CMBitMapRO* _bitmap;
2029 outputStream* _out;
2031 public:
2032 void do_object(oop o) {
2033 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2035 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2036 o->oop_iterate(&oopCl);
2037 _out->print_cr("");
2038 }
2040 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2041 _bitmap(bitmap), _out(out) { }
2042 };
2044 class RegionReachablePrinterClosure : public HeapRegionClosure {
2045 private:
2046 CMBitMapRO* _bitmap;
2047 outputStream* _out;
2049 public:
2050 bool doHeapRegion(HeapRegion* hr) {
2051 HeapWord* b = hr->bottom();
2052 HeapWord* e = hr->end();
2053 HeapWord* t = hr->top();
2054 HeapWord* p = hr->prev_top_at_mark_start();
2055 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2056 "PTAMS: "PTR_FORMAT, b, e, t, p);
2057 _out->print_cr("");
2059 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2060 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2062 return false;
2063 }
2065 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2066 outputStream* out) :
2067 _bitmap(bitmap), _out(out) { }
2068 };
2070 void ConcurrentMark::print_prev_bitmap_reachable() {
2071 outputStream* out = gclog_or_tty;
2073 #if SEND_HEAP_DUMP_TO_FILE
2074 guarantee(heap_dump_file == NULL, "Protocol");
2075 char fn_buf[100];
2076 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2077 heap_dump_file = fopen(fn_buf, "w");
2078 fileStream fstream(heap_dump_file);
2079 out = &fstream;
2080 #endif // SEND_HEAP_DUMP_TO_FILE
2082 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2083 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2084 _g1h->heap_region_iterate(&rcl);
2085 out->print_cr("");
2087 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2088 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2089 _prevMarkBitMap->iterate(&cl);
2090 out->print_cr("");
2092 #if SEND_HEAP_DUMP_TO_FILE
2093 fclose(heap_dump_file);
2094 heap_dump_file = NULL;
2095 #endif // SEND_HEAP_DUMP_TO_FILE
2096 }
2098 // This note is for drainAllSATBBuffers and the code in between.
2099 // In the future we could reuse a task to do this work during an
2100 // evacuation pause (since now tasks are not active and can be claimed
2101 // during an evacuation pause). This was a late change to the code and
2102 // is currently not being taken advantage of.
2104 class CMGlobalObjectClosure : public ObjectClosure {
2105 private:
2106 ConcurrentMark* _cm;
2108 public:
2109 void do_object(oop obj) {
2110 _cm->deal_with_reference(obj);
2111 }
2113 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2114 };
2116 void ConcurrentMark::deal_with_reference(oop obj) {
2117 if (verbose_high())
2118 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2119 (void*) obj);
2122 HeapWord* objAddr = (HeapWord*) obj;
2123 if (_g1h->is_in_g1_reserved(objAddr)) {
2124 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2125 HeapRegion* hr = _g1h->heap_region_containing(obj);
2126 if (_g1h->is_obj_ill(obj, hr)) {
2127 if (verbose_high())
2128 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2129 "marked", (void*) obj);
2131 // we need to mark it first
2132 if (_nextMarkBitMap->parMark(objAddr)) {
2133 // No OrderAccess:store_load() is needed. It is implicit in the
2134 // CAS done in parMark(objAddr) above
2135 HeapWord* finger = _finger;
2136 if (objAddr < finger) {
2137 if (verbose_high())
2138 gclog_or_tty->print_cr("[global] below the global finger "
2139 "("PTR_FORMAT"), pushing it", finger);
2140 if (!mark_stack_push(obj)) {
2141 if (verbose_low())
2142 gclog_or_tty->print_cr("[global] global stack overflow during "
2143 "deal_with_reference");
2144 }
2145 }
2146 }
2147 }
2148 }
2149 }
2151 void ConcurrentMark::drainAllSATBBuffers() {
2152 CMGlobalObjectClosure oc(this);
2153 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2154 satb_mq_set.set_closure(&oc);
2156 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2157 if (verbose_medium())
2158 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2159 }
2161 // no need to check whether we should do this, as this is only
2162 // called during an evacuation pause
2163 satb_mq_set.iterate_closure_all_threads();
2165 satb_mq_set.set_closure(NULL);
2166 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2167 }
2169 void ConcurrentMark::markPrev(oop p) {
2170 // Note we are overriding the read-only view of the prev map here, via
2171 // the cast.
2172 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2173 }
2175 void ConcurrentMark::clear(oop p) {
2176 assert(p != NULL && p->is_oop(), "expected an oop");
2177 HeapWord* addr = (HeapWord*)p;
2178 assert(addr >= _nextMarkBitMap->startWord() ||
2179 addr < _nextMarkBitMap->endWord(), "in a region");
2181 _nextMarkBitMap->clear(addr);
2182 }
2184 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2185 // Note we are overriding the read-only view of the prev map here, via
2186 // the cast.
2187 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2188 _nextMarkBitMap->clearRange(mr);
2189 }
2191 HeapRegion*
2192 ConcurrentMark::claim_region(int task_num) {
2193 // "checkpoint" the finger
2194 HeapWord* finger = _finger;
2196 // _heap_end will not change underneath our feet; it only changes at
2197 // yield points.
2198 while (finger < _heap_end) {
2199 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2201 // is the gap between reading the finger and doing the CAS too long?
2203 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2204 HeapWord* bottom = curr_region->bottom();
2205 HeapWord* end = curr_region->end();
2206 HeapWord* limit = curr_region->next_top_at_mark_start();
2208 if (verbose_low())
2209 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2210 "["PTR_FORMAT", "PTR_FORMAT"), "
2211 "limit = "PTR_FORMAT,
2212 task_num, curr_region, bottom, end, limit);
2214 HeapWord* res =
2215 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2216 if (res == finger) {
2217 // we succeeded
2219 // notice that _finger == end cannot be guaranteed here since,
2220 // someone else might have moved the finger even further
2221 guarantee( _finger >= end, "the finger should have moved forward" );
2223 if (verbose_low())
2224 gclog_or_tty->print_cr("[%d] we were successful with region = "
2225 PTR_FORMAT, task_num, curr_region);
2227 if (limit > bottom) {
2228 if (verbose_low())
2229 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2230 "returning it ", task_num, curr_region);
2231 return curr_region;
2232 } else {
2233 tmp_guarantee_CM( limit == bottom,
2234 "the region limit should be at bottom" );
2235 if (verbose_low())
2236 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2237 "returning NULL", task_num, curr_region);
2238 // we return NULL and the caller should try calling
2239 // claim_region() again.
2240 return NULL;
2241 }
2242 } else {
2243 guarantee( _finger > finger, "the finger should have moved forward" );
2244 if (verbose_low())
2245 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2246 "global finger = "PTR_FORMAT", "
2247 "our finger = "PTR_FORMAT,
2248 task_num, _finger, finger);
2250 // read it again
2251 finger = _finger;
2252 }
2253 }
2255 return NULL;
2256 }
2258 void ConcurrentMark::oops_do(OopClosure* cl) {
2259 if (_markStack.size() > 0 && verbose_low())
2260 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2261 "size = %d", _markStack.size());
2262 // we first iterate over the contents of the mark stack...
2263 _markStack.oops_do(cl);
2265 for (int i = 0; i < (int)_max_task_num; ++i) {
2266 OopTaskQueue* queue = _task_queues->queue((int)i);
2268 if (queue->size() > 0 && verbose_low())
2269 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2270 "size = %d", i, queue->size());
2272 // ...then over the contents of the all the task queues.
2273 queue->oops_do(cl);
2274 }
2276 // finally, invalidate any entries that in the region stack that
2277 // point into the collection set
2278 if (_regionStack.invalidate_entries_into_cset()) {
2279 // otherwise, any gray objects copied during the evacuation pause
2280 // might not be visited.
2281 guarantee( _should_gray_objects, "invariant" );
2282 }
2283 }
2285 void ConcurrentMark::clear_marking_state() {
2286 _markStack.setEmpty();
2287 _markStack.clear_overflow();
2288 _regionStack.setEmpty();
2289 _regionStack.clear_overflow();
2290 clear_has_overflown();
2291 _finger = _heap_start;
2293 for (int i = 0; i < (int)_max_task_num; ++i) {
2294 OopTaskQueue* queue = _task_queues->queue(i);
2295 queue->set_empty();
2296 }
2297 }
2299 void ConcurrentMark::print_stats() {
2300 if (verbose_stats()) {
2301 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2302 for (size_t i = 0; i < _active_tasks; ++i) {
2303 _tasks[i]->print_stats();
2304 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2305 }
2306 }
2307 }
2309 class CSMarkOopClosure: public OopClosure {
2310 friend class CSMarkBitMapClosure;
2312 G1CollectedHeap* _g1h;
2313 CMBitMap* _bm;
2314 ConcurrentMark* _cm;
2315 oop* _ms;
2316 jint* _array_ind_stack;
2317 int _ms_size;
2318 int _ms_ind;
2319 int _array_increment;
2321 bool push(oop obj, int arr_ind = 0) {
2322 if (_ms_ind == _ms_size) {
2323 gclog_or_tty->print_cr("Mark stack is full.");
2324 return false;
2325 }
2326 _ms[_ms_ind] = obj;
2327 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2328 _ms_ind++;
2329 return true;
2330 }
2332 oop pop() {
2333 if (_ms_ind == 0) return NULL;
2334 else {
2335 _ms_ind--;
2336 return _ms[_ms_ind];
2337 }
2338 }
2340 bool drain() {
2341 while (_ms_ind > 0) {
2342 oop obj = pop();
2343 assert(obj != NULL, "Since index was non-zero.");
2344 if (obj->is_objArray()) {
2345 jint arr_ind = _array_ind_stack[_ms_ind];
2346 objArrayOop aobj = objArrayOop(obj);
2347 jint len = aobj->length();
2348 jint next_arr_ind = arr_ind + _array_increment;
2349 if (next_arr_ind < len) {
2350 push(obj, next_arr_ind);
2351 }
2352 // Now process this portion of this one.
2353 int lim = MIN2(next_arr_ind, len);
2354 assert(!UseCompressedOops, "This needs to be fixed");
2355 for (int j = arr_ind; j < lim; j++) {
2356 do_oop(aobj->obj_at_addr<oop>(j));
2357 }
2359 } else {
2360 obj->oop_iterate(this);
2361 }
2362 if (abort()) return false;
2363 }
2364 return true;
2365 }
2367 public:
2368 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2369 _g1h(G1CollectedHeap::heap()),
2370 _cm(cm),
2371 _bm(cm->nextMarkBitMap()),
2372 _ms_size(ms_size), _ms_ind(0),
2373 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2374 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2375 _array_increment(MAX2(ms_size/8, 16))
2376 {}
2378 ~CSMarkOopClosure() {
2379 FREE_C_HEAP_ARRAY(oop, _ms);
2380 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2381 }
2383 void do_oop(narrowOop* p) {
2384 guarantee(false, "NYI");
2385 }
2387 void do_oop(oop* p) {
2388 oop obj = *p;
2389 if (obj == NULL) return;
2390 if (obj->is_forwarded()) {
2391 // If the object has already been forwarded, we have to make sure
2392 // that it's marked. So follow the forwarding pointer. Note that
2393 // this does the right thing for self-forwarding pointers in the
2394 // evacuation failure case.
2395 obj = obj->forwardee();
2396 }
2397 HeapRegion* hr = _g1h->heap_region_containing(obj);
2398 if (hr != NULL) {
2399 if (hr->in_collection_set()) {
2400 if (_g1h->is_obj_ill(obj)) {
2401 _bm->mark((HeapWord*)obj);
2402 if (!push(obj)) {
2403 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2404 set_abort();
2405 }
2406 }
2407 } else {
2408 // Outside the collection set; we need to gray it
2409 _cm->deal_with_reference(obj);
2410 }
2411 }
2412 }
2413 };
2415 class CSMarkBitMapClosure: public BitMapClosure {
2416 G1CollectedHeap* _g1h;
2417 CMBitMap* _bitMap;
2418 ConcurrentMark* _cm;
2419 CSMarkOopClosure _oop_cl;
2420 public:
2421 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2422 _g1h(G1CollectedHeap::heap()),
2423 _bitMap(cm->nextMarkBitMap()),
2424 _oop_cl(cm, ms_size)
2425 {}
2427 ~CSMarkBitMapClosure() {}
2429 bool do_bit(size_t offset) {
2430 // convert offset into a HeapWord*
2431 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2432 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2433 "address out of range");
2434 assert(_bitMap->isMarked(addr), "tautology");
2435 oop obj = oop(addr);
2436 if (!obj->is_forwarded()) {
2437 if (!_oop_cl.push(obj)) return false;
2438 if (!_oop_cl.drain()) return false;
2439 }
2440 // Otherwise...
2441 return true;
2442 }
2443 };
2446 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2447 CMBitMap* _bm;
2448 CSMarkBitMapClosure _bit_cl;
2449 enum SomePrivateConstants {
2450 MSSize = 1000
2451 };
2452 bool _completed;
2453 public:
2454 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2455 _bm(cm->nextMarkBitMap()),
2456 _bit_cl(cm, MSSize),
2457 _completed(true)
2458 {}
2460 ~CompleteMarkingInCSHRClosure() {}
2462 bool doHeapRegion(HeapRegion* r) {
2463 if (!r->evacuation_failed()) {
2464 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2465 if (!mr.is_empty()) {
2466 if (!_bm->iterate(&_bit_cl, mr)) {
2467 _completed = false;
2468 return true;
2469 }
2470 }
2471 }
2472 return false;
2473 }
2475 bool completed() { return _completed; }
2476 };
2478 class ClearMarksInHRClosure: public HeapRegionClosure {
2479 CMBitMap* _bm;
2480 public:
2481 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2483 bool doHeapRegion(HeapRegion* r) {
2484 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2485 MemRegion usedMR = r->used_region();
2486 _bm->clearRange(r->used_region());
2487 }
2488 return false;
2489 }
2490 };
2492 void ConcurrentMark::complete_marking_in_collection_set() {
2493 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2495 if (!g1h->mark_in_progress()) {
2496 g1h->g1_policy()->record_mark_closure_time(0.0);
2497 return;
2498 }
2500 int i = 1;
2501 double start = os::elapsedTime();
2502 while (true) {
2503 i++;
2504 CompleteMarkingInCSHRClosure cmplt(this);
2505 g1h->collection_set_iterate(&cmplt);
2506 if (cmplt.completed()) break;
2507 }
2508 double end_time = os::elapsedTime();
2509 double elapsed_time_ms = (end_time - start) * 1000.0;
2510 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2511 if (PrintGCDetails) {
2512 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2513 }
2515 ClearMarksInHRClosure clr(nextMarkBitMap());
2516 g1h->collection_set_iterate(&clr);
2517 }
2519 // The next two methods deal with the following optimisation. Some
2520 // objects are gray by being marked and located above the finger. If
2521 // they are copied, during an evacuation pause, below the finger then
2522 // the need to be pushed on the stack. The observation is that, if
2523 // there are no regions in the collection set located above the
2524 // finger, then the above cannot happen, hence we do not need to
2525 // explicitly gray any objects when copying them to below the
2526 // finger. The global stack will be scanned to ensure that, if it
2527 // points to objects being copied, it will update their
2528 // location. There is a tricky situation with the gray objects in
2529 // region stack that are being coped, however. See the comment in
2530 // newCSet().
2532 void ConcurrentMark::newCSet() {
2533 if (!concurrent_marking_in_progress())
2534 // nothing to do if marking is not in progress
2535 return;
2537 // find what the lowest finger is among the global and local fingers
2538 _min_finger = _finger;
2539 for (int i = 0; i < (int)_max_task_num; ++i) {
2540 CMTask* task = _tasks[i];
2541 HeapWord* task_finger = task->finger();
2542 if (task_finger != NULL && task_finger < _min_finger)
2543 _min_finger = task_finger;
2544 }
2546 _should_gray_objects = false;
2548 // This fixes a very subtle and fustrating bug. It might be the case
2549 // that, during en evacuation pause, heap regions that contain
2550 // objects that are gray (by being in regions contained in the
2551 // region stack) are included in the collection set. Since such gray
2552 // objects will be moved, and because it's not easy to redirect
2553 // region stack entries to point to a new location (because objects
2554 // in one region might be scattered to multiple regions after they
2555 // are copied), one option is to ensure that all marked objects
2556 // copied during a pause are pushed on the stack. Notice, however,
2557 // that this problem can only happen when the region stack is not
2558 // empty during an evacuation pause. So, we make the fix a bit less
2559 // conservative and ensure that regions are pushed on the stack,
2560 // irrespective whether all collection set regions are below the
2561 // finger, if the region stack is not empty. This is expected to be
2562 // a rare case, so I don't think it's necessary to be smarted about it.
2563 if (!region_stack_empty())
2564 _should_gray_objects = true;
2565 }
2567 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2568 if (!concurrent_marking_in_progress())
2569 return;
2571 HeapWord* region_end = hr->end();
2572 if (region_end > _min_finger)
2573 _should_gray_objects = true;
2574 }
2576 void ConcurrentMark::disable_co_trackers() {
2577 if (has_aborted()) {
2578 if (_cleanup_co_tracker.enabled())
2579 _cleanup_co_tracker.disable();
2580 for (int i = 0; i < (int)_max_task_num; ++i) {
2581 CMTask* task = _tasks[i];
2582 if (task->co_tracker_enabled())
2583 task->disable_co_tracker();
2584 }
2585 } else {
2586 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2587 for (int i = 0; i < (int)_max_task_num; ++i) {
2588 CMTask* task = _tasks[i];
2589 guarantee( !task->co_tracker_enabled(), "invariant" );
2590 }
2591 }
2592 }
2594 // abandon current marking iteration due to a Full GC
2595 void ConcurrentMark::abort() {
2596 // If we're not marking, nothing to do.
2597 if (!G1ConcMark) return;
2599 // Clear all marks to force marking thread to do nothing
2600 _nextMarkBitMap->clearAll();
2601 // Empty mark stack
2602 clear_marking_state();
2603 for (int i = 0; i < (int)_max_task_num; ++i)
2604 _tasks[i]->clear_region_fields();
2605 _has_aborted = true;
2607 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2608 satb_mq_set.abandon_partial_marking();
2609 satb_mq_set.set_active_all_threads(false);
2610 }
2612 static void print_ms_time_info(const char* prefix, const char* name,
2613 NumberSeq& ns) {
2614 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2615 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2616 if (ns.num() > 0) {
2617 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2618 prefix, ns.sd(), ns.maximum());
2619 }
2620 }
2622 void ConcurrentMark::print_summary_info() {
2623 gclog_or_tty->print_cr(" Concurrent marking:");
2624 print_ms_time_info(" ", "init marks", _init_times);
2625 print_ms_time_info(" ", "remarks", _remark_times);
2626 {
2627 print_ms_time_info(" ", "final marks", _remark_mark_times);
2628 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2630 }
2631 print_ms_time_info(" ", "cleanups", _cleanup_times);
2632 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2633 _total_counting_time,
2634 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2635 (double)_cleanup_times.num()
2636 : 0.0));
2637 if (G1ScrubRemSets) {
2638 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2639 _total_rs_scrub_time,
2640 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2641 (double)_cleanup_times.num()
2642 : 0.0));
2643 }
2644 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2645 (_init_times.sum() + _remark_times.sum() +
2646 _cleanup_times.sum())/1000.0);
2647 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2648 "(%8.2f s marking, %8.2f s counting).",
2649 cmThread()->vtime_accum(),
2650 cmThread()->vtime_mark_accum(),
2651 cmThread()->vtime_count_accum());
2652 }
2654 // Closures
2655 // XXX: there seems to be a lot of code duplication here;
2656 // should refactor and consolidate the shared code.
2658 // This closure is used to mark refs into the CMS generation in
2659 // the CMS bit map. Called at the first checkpoint.
2661 // We take a break if someone is trying to stop the world.
2662 bool ConcurrentMark::do_yield_check(int worker_i) {
2663 if (should_yield()) {
2664 if (worker_i == 0)
2665 _g1h->g1_policy()->record_concurrent_pause();
2666 cmThread()->yield();
2667 if (worker_i == 0)
2668 _g1h->g1_policy()->record_concurrent_pause_end();
2669 return true;
2670 } else {
2671 return false;
2672 }
2673 }
2675 bool ConcurrentMark::should_yield() {
2676 return cmThread()->should_yield();
2677 }
2679 bool ConcurrentMark::containing_card_is_marked(void* p) {
2680 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2681 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2682 }
2684 bool ConcurrentMark::containing_cards_are_marked(void* start,
2685 void* last) {
2686 return
2687 containing_card_is_marked(start) &&
2688 containing_card_is_marked(last);
2689 }
2691 #ifndef PRODUCT
2692 // for debugging purposes
2693 void ConcurrentMark::print_finger() {
2694 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2695 _heap_start, _heap_end, _finger);
2696 for (int i = 0; i < (int) _max_task_num; ++i) {
2697 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2698 }
2699 gclog_or_tty->print_cr("");
2700 }
2701 #endif
2703 // Closure for iteration over bitmaps
2704 class CMBitMapClosure : public BitMapClosure {
2705 private:
2706 // the bitmap that is being iterated over
2707 CMBitMap* _nextMarkBitMap;
2708 ConcurrentMark* _cm;
2709 CMTask* _task;
2710 // true if we're scanning a heap region claimed by the task (so that
2711 // we move the finger along), false if we're not, i.e. currently when
2712 // scanning a heap region popped from the region stack (so that we
2713 // do not move the task finger along; it'd be a mistake if we did so).
2714 bool _scanning_heap_region;
2716 public:
2717 CMBitMapClosure(CMTask *task,
2718 ConcurrentMark* cm,
2719 CMBitMap* nextMarkBitMap)
2720 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2722 void set_scanning_heap_region(bool scanning_heap_region) {
2723 _scanning_heap_region = scanning_heap_region;
2724 }
2726 bool do_bit(size_t offset) {
2727 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2728 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2729 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2731 if (_scanning_heap_region) {
2732 statsOnly( _task->increase_objs_found_on_bitmap() );
2733 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2734 // We move that task's local finger along.
2735 _task->move_finger_to(addr);
2736 } else {
2737 // We move the task's region finger along.
2738 _task->move_region_finger_to(addr);
2739 }
2741 _task->scan_object(oop(addr));
2742 // we only partially drain the local queue and global stack
2743 _task->drain_local_queue(true);
2744 _task->drain_global_stack(true);
2746 // if the has_aborted flag has been raised, we need to bail out of
2747 // the iteration
2748 return !_task->has_aborted();
2749 }
2750 };
2752 // Closure for iterating over objects, currently only used for
2753 // processing SATB buffers.
2754 class CMObjectClosure : public ObjectClosure {
2755 private:
2756 CMTask* _task;
2758 public:
2759 void do_object(oop obj) {
2760 _task->deal_with_reference(obj);
2761 }
2763 CMObjectClosure(CMTask* task) : _task(task) { }
2764 };
2766 // Closure for iterating over object fields
2767 class CMOopClosure : public OopClosure {
2768 private:
2769 G1CollectedHeap* _g1h;
2770 ConcurrentMark* _cm;
2771 CMTask* _task;
2773 public:
2774 void do_oop(narrowOop* p) {
2775 guarantee(false, "NYI");
2776 }
2778 void do_oop(oop* p) {
2779 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2781 oop obj = *p;
2782 if (_cm->verbose_high())
2783 gclog_or_tty->print_cr("[%d] we're looking at location "
2784 "*"PTR_FORMAT" = "PTR_FORMAT,
2785 _task->task_id(), p, (void*) obj);
2786 _task->deal_with_reference(obj);
2787 }
2789 CMOopClosure(G1CollectedHeap* g1h,
2790 ConcurrentMark* cm,
2791 CMTask* task)
2792 : _g1h(g1h), _cm(cm), _task(task) { }
2793 };
2795 void CMTask::setup_for_region(HeapRegion* hr) {
2796 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2797 "claim_region() should have filtered out continues humongous regions" );
2799 if (_cm->verbose_low())
2800 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2801 _task_id, hr);
2803 _curr_region = hr;
2804 _finger = hr->bottom();
2805 update_region_limit();
2806 }
2808 void CMTask::update_region_limit() {
2809 HeapRegion* hr = _curr_region;
2810 HeapWord* bottom = hr->bottom();
2811 HeapWord* limit = hr->next_top_at_mark_start();
2813 if (limit == bottom) {
2814 if (_cm->verbose_low())
2815 gclog_or_tty->print_cr("[%d] found an empty region "
2816 "["PTR_FORMAT", "PTR_FORMAT")",
2817 _task_id, bottom, limit);
2818 // The region was collected underneath our feet.
2819 // We set the finger to bottom to ensure that the bitmap
2820 // iteration that will follow this will not do anything.
2821 // (this is not a condition that holds when we set the region up,
2822 // as the region is not supposed to be empty in the first place)
2823 _finger = bottom;
2824 } else if (limit >= _region_limit) {
2825 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2826 } else {
2827 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2828 // This can happen under some pretty unusual circumstances. An
2829 // evacuation pause empties the region underneath our feet (NTAMS
2830 // at bottom). We then do some allocation in the region (NTAMS
2831 // stays at bottom), followed by the region being used as a GC
2832 // alloc region (NTAMS will move to top() and the objects
2833 // originally below it will be grayed). All objects now marked in
2834 // the region are explicitly grayed, if below the global finger,
2835 // and we do not need in fact to scan anything else. So, we simply
2836 // set _finger to be limit to ensure that the bitmap iteration
2837 // doesn't do anything.
2838 _finger = limit;
2839 }
2841 _region_limit = limit;
2842 }
2844 void CMTask::giveup_current_region() {
2845 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2846 if (_cm->verbose_low())
2847 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2848 _task_id, _curr_region);
2849 clear_region_fields();
2850 }
2852 void CMTask::clear_region_fields() {
2853 // Values for these three fields that indicate that we're not
2854 // holding on to a region.
2855 _curr_region = NULL;
2856 _finger = NULL;
2857 _region_limit = NULL;
2859 _region_finger = NULL;
2860 }
2862 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2863 guarantee( nextMarkBitMap != NULL, "invariant" );
2865 if (_cm->verbose_low())
2866 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2868 _nextMarkBitMap = nextMarkBitMap;
2869 clear_region_fields();
2871 _calls = 0;
2872 _elapsed_time_ms = 0.0;
2873 _termination_time_ms = 0.0;
2874 _termination_start_time_ms = 0.0;
2876 #if _MARKING_STATS_
2877 _local_pushes = 0;
2878 _local_pops = 0;
2879 _local_max_size = 0;
2880 _objs_scanned = 0;
2881 _global_pushes = 0;
2882 _global_pops = 0;
2883 _global_max_size = 0;
2884 _global_transfers_to = 0;
2885 _global_transfers_from = 0;
2886 _region_stack_pops = 0;
2887 _regions_claimed = 0;
2888 _objs_found_on_bitmap = 0;
2889 _satb_buffers_processed = 0;
2890 _steal_attempts = 0;
2891 _steals = 0;
2892 _aborted = 0;
2893 _aborted_overflow = 0;
2894 _aborted_cm_aborted = 0;
2895 _aborted_yield = 0;
2896 _aborted_timed_out = 0;
2897 _aborted_satb = 0;
2898 _aborted_termination = 0;
2899 #endif // _MARKING_STATS_
2900 }
2902 bool CMTask::should_exit_termination() {
2903 regular_clock_call();
2904 // This is called when we are in the termination protocol. We should
2905 // quit if, for some reason, this task wants to abort or the global
2906 // stack is not empty (this means that we can get work from it).
2907 return !_cm->mark_stack_empty() || has_aborted();
2908 }
2910 // This determines whether the method below will check both the local
2911 // and global fingers when determining whether to push on the stack a
2912 // gray object (value 1) or whether it will only check the global one
2913 // (value 0). The tradeoffs are that the former will be a bit more
2914 // accurate and possibly push less on the stack, but it might also be
2915 // a little bit slower.
2917 #define _CHECK_BOTH_FINGERS_ 1
2919 void CMTask::deal_with_reference(oop obj) {
2920 if (_cm->verbose_high())
2921 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2922 _task_id, (void*) obj);
2924 ++_refs_reached;
2926 HeapWord* objAddr = (HeapWord*) obj;
2927 if (_g1h->is_in_g1_reserved(objAddr)) {
2928 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2929 HeapRegion* hr = _g1h->heap_region_containing(obj);
2930 if (_g1h->is_obj_ill(obj, hr)) {
2931 if (_cm->verbose_high())
2932 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2933 _task_id, (void*) obj);
2935 // we need to mark it first
2936 if (_nextMarkBitMap->parMark(objAddr)) {
2937 // No OrderAccess:store_load() is needed. It is implicit in the
2938 // CAS done in parMark(objAddr) above
2939 HeapWord* global_finger = _cm->finger();
2941 #if _CHECK_BOTH_FINGERS_
2942 // we will check both the local and global fingers
2944 if (_finger != NULL && objAddr < _finger) {
2945 if (_cm->verbose_high())
2946 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2947 "pushing it", _task_id, _finger);
2948 push(obj);
2949 } else if (_curr_region != NULL && objAddr < _region_limit) {
2950 // do nothing
2951 } else if (objAddr < global_finger) {
2952 // Notice that the global finger might be moving forward
2953 // concurrently. This is not a problem. In the worst case, we
2954 // mark the object while it is above the global finger and, by
2955 // the time we read the global finger, it has moved forward
2956 // passed this object. In this case, the object will probably
2957 // be visited when a task is scanning the region and will also
2958 // be pushed on the stack. So, some duplicate work, but no
2959 // correctness problems.
2961 if (_cm->verbose_high())
2962 gclog_or_tty->print_cr("[%d] below the global finger "
2963 "("PTR_FORMAT"), pushing it",
2964 _task_id, global_finger);
2965 push(obj);
2966 } else {
2967 // do nothing
2968 }
2969 #else // _CHECK_BOTH_FINGERS_
2970 // we will only check the global finger
2972 if (objAddr < global_finger) {
2973 // see long comment above
2975 if (_cm->verbose_high())
2976 gclog_or_tty->print_cr("[%d] below the global finger "
2977 "("PTR_FORMAT"), pushing it",
2978 _task_id, global_finger);
2979 push(obj);
2980 }
2981 #endif // _CHECK_BOTH_FINGERS_
2982 }
2983 }
2984 }
2985 }
2987 void CMTask::push(oop obj) {
2988 HeapWord* objAddr = (HeapWord*) obj;
2989 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2990 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2991 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2993 if (_cm->verbose_high())
2994 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2996 if (!_task_queue->push(obj)) {
2997 // The local task queue looks full. We need to push some entries
2998 // to the global stack.
3000 if (_cm->verbose_medium())
3001 gclog_or_tty->print_cr("[%d] task queue overflow, "
3002 "moving entries to the global stack",
3003 _task_id);
3004 move_entries_to_global_stack();
3006 // this should succeed since, even if we overflow the global
3007 // stack, we should have definitely removed some entries from the
3008 // local queue. So, there must be space on it.
3009 bool success = _task_queue->push(obj);
3010 tmp_guarantee_CM( success, "invariant" );
3011 }
3013 statsOnly( int tmp_size = _task_queue->size();
3014 if (tmp_size > _local_max_size)
3015 _local_max_size = tmp_size;
3016 ++_local_pushes );
3017 }
3019 void CMTask::reached_limit() {
3020 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3021 _refs_reached >= _refs_reached_limit ,
3022 "shouldn't have been called otherwise" );
3023 regular_clock_call();
3024 }
3026 void CMTask::regular_clock_call() {
3027 if (has_aborted())
3028 return;
3030 // First, we need to recalculate the words scanned and refs reached
3031 // limits for the next clock call.
3032 recalculate_limits();
3034 // During the regular clock call we do the following
3036 // (1) If an overflow has been flagged, then we abort.
3037 if (_cm->has_overflown()) {
3038 set_has_aborted();
3039 return;
3040 }
3042 // If we are not concurrent (i.e. we're doing remark) we don't need
3043 // to check anything else. The other steps are only needed during
3044 // the concurrent marking phase.
3045 if (!concurrent())
3046 return;
3048 // (2) If marking has been aborted for Full GC, then we also abort.
3049 if (_cm->has_aborted()) {
3050 set_has_aborted();
3051 statsOnly( ++_aborted_cm_aborted );
3052 return;
3053 }
3055 double curr_time_ms = os::elapsedVTime() * 1000.0;
3057 // (3) If marking stats are enabled, then we update the step history.
3058 #if _MARKING_STATS_
3059 if (_words_scanned >= _words_scanned_limit)
3060 ++_clock_due_to_scanning;
3061 if (_refs_reached >= _refs_reached_limit)
3062 ++_clock_due_to_marking;
3064 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3065 _interval_start_time_ms = curr_time_ms;
3066 _all_clock_intervals_ms.add(last_interval_ms);
3068 if (_cm->verbose_medium()) {
3069 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3070 "scanned = %d%s, refs reached = %d%s",
3071 _task_id, last_interval_ms,
3072 _words_scanned,
3073 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3074 _refs_reached,
3075 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3076 }
3077 #endif // _MARKING_STATS_
3079 // (4) We check whether we should yield. If we have to, then we abort.
3080 if (_cm->should_yield()) {
3081 // We should yield. To do this we abort the task. The caller is
3082 // responsible for yielding.
3083 set_has_aborted();
3084 statsOnly( ++_aborted_yield );
3085 return;
3086 }
3088 // (5) We check whether we've reached our time quota. If we have,
3089 // then we abort.
3090 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3091 if (elapsed_time_ms > _time_target_ms) {
3092 set_has_aborted();
3093 _has_aborted_timed_out = true;
3094 statsOnly( ++_aborted_timed_out );
3095 return;
3096 }
3098 // (6) Finally, we check whether there are enough completed STAB
3099 // buffers available for processing. If there are, we abort.
3100 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3101 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3102 if (_cm->verbose_low())
3103 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3104 _task_id);
3105 // we do need to process SATB buffers, we'll abort and restart
3106 // the marking task to do so
3107 set_has_aborted();
3108 statsOnly( ++_aborted_satb );
3109 return;
3110 }
3111 }
3113 void CMTask::recalculate_limits() {
3114 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3115 _words_scanned_limit = _real_words_scanned_limit;
3117 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3118 _refs_reached_limit = _real_refs_reached_limit;
3119 }
3121 void CMTask::decrease_limits() {
3122 // This is called when we believe that we're going to do an infrequent
3123 // operation which will increase the per byte scanned cost (i.e. move
3124 // entries to/from the global stack). It basically tries to decrease the
3125 // scanning limit so that the clock is called earlier.
3127 if (_cm->verbose_medium())
3128 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3130 _words_scanned_limit = _real_words_scanned_limit -
3131 3 * words_scanned_period / 4;
3132 _refs_reached_limit = _real_refs_reached_limit -
3133 3 * refs_reached_period / 4;
3134 }
3136 void CMTask::move_entries_to_global_stack() {
3137 // local array where we'll store the entries that will be popped
3138 // from the local queue
3139 oop buffer[global_stack_transfer_size];
3141 int n = 0;
3142 oop obj;
3143 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3144 buffer[n] = obj;
3145 ++n;
3146 }
3148 if (n > 0) {
3149 // we popped at least one entry from the local queue
3151 statsOnly( ++_global_transfers_to; _local_pops += n );
3153 if (!_cm->mark_stack_push(buffer, n)) {
3154 if (_cm->verbose_low())
3155 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3156 set_has_aborted();
3157 } else {
3158 // the transfer was successful
3160 if (_cm->verbose_medium())
3161 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3162 _task_id, n);
3163 statsOnly( int tmp_size = _cm->mark_stack_size();
3164 if (tmp_size > _global_max_size)
3165 _global_max_size = tmp_size;
3166 _global_pushes += n );
3167 }
3168 }
3170 // this operation was quite expensive, so decrease the limits
3171 decrease_limits();
3172 }
3174 void CMTask::get_entries_from_global_stack() {
3175 // local array where we'll store the entries that will be popped
3176 // from the global stack.
3177 oop buffer[global_stack_transfer_size];
3178 int n;
3179 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3180 tmp_guarantee_CM( n <= global_stack_transfer_size,
3181 "we should not pop more than the given limit" );
3182 if (n > 0) {
3183 // yes, we did actually pop at least one entry
3185 statsOnly( ++_global_transfers_from; _global_pops += n );
3186 if (_cm->verbose_medium())
3187 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3188 _task_id, n);
3189 for (int i = 0; i < n; ++i) {
3190 bool success = _task_queue->push(buffer[i]);
3191 // We only call this when the local queue is empty or under a
3192 // given target limit. So, we do not expect this push to fail.
3193 tmp_guarantee_CM( success, "invariant" );
3194 }
3196 statsOnly( int tmp_size = _task_queue->size();
3197 if (tmp_size > _local_max_size)
3198 _local_max_size = tmp_size;
3199 _local_pushes += n );
3200 }
3202 // this operation was quite expensive, so decrease the limits
3203 decrease_limits();
3204 }
3206 void CMTask::drain_local_queue(bool partially) {
3207 if (has_aborted())
3208 return;
3210 // Decide what the target size is, depending whether we're going to
3211 // drain it partially (so that other tasks can steal if they run out
3212 // of things to do) or totally (at the very end).
3213 size_t target_size;
3214 if (partially)
3215 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3216 else
3217 target_size = 0;
3219 if (_task_queue->size() > target_size) {
3220 if (_cm->verbose_high())
3221 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3222 _task_id, target_size);
3224 oop obj;
3225 bool ret = _task_queue->pop_local(obj);
3226 while (ret) {
3227 statsOnly( ++_local_pops );
3229 if (_cm->verbose_high())
3230 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3231 (void*) obj);
3233 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3234 "invariant" );
3236 scan_object(obj);
3238 if (_task_queue->size() <= target_size || has_aborted())
3239 ret = false;
3240 else
3241 ret = _task_queue->pop_local(obj);
3242 }
3244 if (_cm->verbose_high())
3245 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3246 _task_id, _task_queue->size());
3247 }
3248 }
3250 void CMTask::drain_global_stack(bool partially) {
3251 if (has_aborted())
3252 return;
3254 // We have a policy to drain the local queue before we attempt to
3255 // drain the global stack.
3256 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3258 // Decide what the target size is, depending whether we're going to
3259 // drain it partially (so that other tasks can steal if they run out
3260 // of things to do) or totally (at the very end). Notice that,
3261 // because we move entries from the global stack in chunks or
3262 // because another task might be doing the same, we might in fact
3263 // drop below the target. But, this is not a problem.
3264 size_t target_size;
3265 if (partially)
3266 target_size = _cm->partial_mark_stack_size_target();
3267 else
3268 target_size = 0;
3270 if (_cm->mark_stack_size() > target_size) {
3271 if (_cm->verbose_low())
3272 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3273 _task_id, target_size);
3275 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3276 get_entries_from_global_stack();
3277 drain_local_queue(partially);
3278 }
3280 if (_cm->verbose_low())
3281 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3282 _task_id, _cm->mark_stack_size());
3283 }
3284 }
3286 // SATB Queue has several assumptions on whether to call the par or
3287 // non-par versions of the methods. this is why some of the code is
3288 // replicated. We should really get rid of the single-threaded version
3289 // of the code to simplify things.
3290 void CMTask::drain_satb_buffers() {
3291 if (has_aborted())
3292 return;
3294 // We set this so that the regular clock knows that we're in the
3295 // middle of draining buffers and doesn't set the abort flag when it
3296 // notices that SATB buffers are available for draining. It'd be
3297 // very counter productive if it did that. :-)
3298 _draining_satb_buffers = true;
3300 CMObjectClosure oc(this);
3301 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3302 if (ParallelGCThreads > 0)
3303 satb_mq_set.set_par_closure(_task_id, &oc);
3304 else
3305 satb_mq_set.set_closure(&oc);
3307 // This keeps claiming and applying the closure to completed buffers
3308 // until we run out of buffers or we need to abort.
3309 if (ParallelGCThreads > 0) {
3310 while (!has_aborted() &&
3311 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3312 if (_cm->verbose_medium())
3313 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3314 statsOnly( ++_satb_buffers_processed );
3315 regular_clock_call();
3316 }
3317 } else {
3318 while (!has_aborted() &&
3319 satb_mq_set.apply_closure_to_completed_buffer()) {
3320 if (_cm->verbose_medium())
3321 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3322 statsOnly( ++_satb_buffers_processed );
3323 regular_clock_call();
3324 }
3325 }
3327 if (!concurrent() && !has_aborted()) {
3328 // We should only do this during remark.
3329 if (ParallelGCThreads > 0)
3330 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3331 else
3332 satb_mq_set.iterate_closure_all_threads();
3333 }
3335 _draining_satb_buffers = false;
3337 tmp_guarantee_CM( has_aborted() ||
3338 concurrent() ||
3339 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3341 if (ParallelGCThreads > 0)
3342 satb_mq_set.set_par_closure(_task_id, NULL);
3343 else
3344 satb_mq_set.set_closure(NULL);
3346 // again, this was a potentially expensive operation, decrease the
3347 // limits to get the regular clock call early
3348 decrease_limits();
3349 }
3351 void CMTask::drain_region_stack(BitMapClosure* bc) {
3352 if (has_aborted())
3353 return;
3355 tmp_guarantee_CM( _region_finger == NULL,
3356 "it should be NULL when we're not scanning a region" );
3358 if (!_cm->region_stack_empty()) {
3359 if (_cm->verbose_low())
3360 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3361 _task_id, _cm->region_stack_size());
3363 MemRegion mr = _cm->region_stack_pop();
3364 // it returns MemRegion() if the pop fails
3365 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3367 while (mr.start() != NULL) {
3368 if (_cm->verbose_medium())
3369 gclog_or_tty->print_cr("[%d] we are scanning region "
3370 "["PTR_FORMAT", "PTR_FORMAT")",
3371 _task_id, mr.start(), mr.end());
3372 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3373 "otherwise the region shouldn't be on the stack" );
3374 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3375 if (_nextMarkBitMap->iterate(bc, mr)) {
3376 tmp_guarantee_CM( !has_aborted(),
3377 "cannot abort the task without aborting the bitmap iteration" );
3379 // We finished iterating over the region without aborting.
3380 regular_clock_call();
3381 if (has_aborted())
3382 mr = MemRegion();
3383 else {
3384 mr = _cm->region_stack_pop();
3385 // it returns MemRegion() if the pop fails
3386 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3387 }
3388 } else {
3389 guarantee( has_aborted(), "currently the only way to do so" );
3391 // The only way to abort the bitmap iteration is to return
3392 // false from the do_bit() method. However, inside the
3393 // do_bit() method we move the _region_finger to point to the
3394 // object currently being looked at. So, if we bail out, we
3395 // have definitely set _region_finger to something non-null.
3396 guarantee( _region_finger != NULL, "invariant" );
3398 // The iteration was actually aborted. So now _region_finger
3399 // points to the address of the object we last scanned. If we
3400 // leave it there, when we restart this task, we will rescan
3401 // the object. It is easy to avoid this. We move the finger by
3402 // enough to point to the next possible object header (the
3403 // bitmap knows by how much we need to move it as it knows its
3404 // granularity).
3405 MemRegion newRegion =
3406 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3408 if (!newRegion.is_empty()) {
3409 if (_cm->verbose_low()) {
3410 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3411 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3412 _task_id,
3413 newRegion.start(), newRegion.end());
3414 }
3415 // Now push the part of the region we didn't scan on the
3416 // region stack to make sure a task scans it later.
3417 _cm->region_stack_push(newRegion);
3418 }
3419 // break from while
3420 mr = MemRegion();
3421 }
3422 _region_finger = NULL;
3423 }
3425 // We only push regions on the region stack during evacuation
3426 // pauses. So if we come out the above iteration because we region
3427 // stack is empty, it will remain empty until the next yield
3428 // point. So, the guarantee below is safe.
3429 guarantee( has_aborted() || _cm->region_stack_empty(),
3430 "only way to exit the loop" );
3432 if (_cm->verbose_low())
3433 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3434 _task_id, _cm->region_stack_size());
3435 }
3436 }
3438 void CMTask::print_stats() {
3439 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3440 _task_id, _calls);
3441 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3442 _elapsed_time_ms, _termination_time_ms);
3443 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3444 _step_times_ms.num(), _step_times_ms.avg(),
3445 _step_times_ms.sd());
3446 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3447 _step_times_ms.maximum(), _step_times_ms.sum());
3449 #if _MARKING_STATS_
3450 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3451 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3452 _all_clock_intervals_ms.sd());
3453 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3454 _all_clock_intervals_ms.maximum(),
3455 _all_clock_intervals_ms.sum());
3456 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3457 _clock_due_to_scanning, _clock_due_to_marking);
3458 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3459 _objs_scanned, _objs_found_on_bitmap);
3460 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3461 _local_pushes, _local_pops, _local_max_size);
3462 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3463 _global_pushes, _global_pops, _global_max_size);
3464 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3465 _global_transfers_to,_global_transfers_from);
3466 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3467 _regions_claimed, _region_stack_pops);
3468 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3469 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3470 _steal_attempts, _steals);
3471 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3472 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3473 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3474 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3475 _aborted_timed_out, _aborted_satb, _aborted_termination);
3476 #endif // _MARKING_STATS_
3477 }
3479 /*****************************************************************************
3481 The do_marking_step(time_target_ms) method is the building block
3482 of the parallel marking framework. It can be called in parallel
3483 with other invocations of do_marking_step() on different tasks
3484 (but only one per task, obviously) and concurrently with the
3485 mutator threads, or during remark, hence it eliminates the need
3486 for two versions of the code. When called during remark, it will
3487 pick up from where the task left off during the concurrent marking
3488 phase. Interestingly, tasks are also claimable during evacuation
3489 pauses too, since do_marking_step() ensures that it aborts before
3490 it needs to yield.
3492 The data structures that is uses to do marking work are the
3493 following:
3495 (1) Marking Bitmap. If there are gray objects that appear only
3496 on the bitmap (this happens either when dealing with an overflow
3497 or when the initial marking phase has simply marked the roots
3498 and didn't push them on the stack), then tasks claim heap
3499 regions whose bitmap they then scan to find gray objects. A
3500 global finger indicates where the end of the last claimed region
3501 is. A local finger indicates how far into the region a task has
3502 scanned. The two fingers are used to determine how to gray an
3503 object (i.e. whether simply marking it is OK, as it will be
3504 visited by a task in the future, or whether it needs to be also
3505 pushed on a stack).
3507 (2) Local Queue. The local queue of the task which is accessed
3508 reasonably efficiently by the task. Other tasks can steal from
3509 it when they run out of work. Throughout the marking phase, a
3510 task attempts to keep its local queue short but not totally
3511 empty, so that entries are available for stealing by other
3512 tasks. Only when there is no more work, a task will totally
3513 drain its local queue.
3515 (3) Global Mark Stack. This handles local queue overflow. During
3516 marking only sets of entries are moved between it and the local
3517 queues, as access to it requires a mutex and more fine-grain
3518 interaction with it which might cause contention. If it
3519 overflows, then the marking phase should restart and iterate
3520 over the bitmap to identify gray objects. Throughout the marking
3521 phase, tasks attempt to keep the global mark stack at a small
3522 length but not totally empty, so that entries are available for
3523 popping by other tasks. Only when there is no more work, tasks
3524 will totally drain the global mark stack.
3526 (4) Global Region Stack. Entries on it correspond to areas of
3527 the bitmap that need to be scanned since they contain gray
3528 objects. Pushes on the region stack only happen during
3529 evacuation pauses and typically correspond to areas covered by
3530 GC LABS. If it overflows, then the marking phase should restart
3531 and iterate over the bitmap to identify gray objects. Tasks will
3532 try to totally drain the region stack as soon as possible.
3534 (5) SATB Buffer Queue. This is where completed SATB buffers are
3535 made available. Buffers are regularly removed from this queue
3536 and scanned for roots, so that the queue doesn't get too
3537 long. During remark, all completed buffers are processed, as
3538 well as the filled in parts of any uncompleted buffers.
3540 The do_marking_step() method tries to abort when the time target
3541 has been reached. There are a few other cases when the
3542 do_marking_step() method also aborts:
3544 (1) When the marking phase has been aborted (after a Full GC).
3546 (2) When a global overflow (either on the global stack or the
3547 region stack) has been triggered. Before the task aborts, it
3548 will actually sync up with the other tasks to ensure that all
3549 the marking data structures (local queues, stacks, fingers etc.)
3550 are re-initialised so that when do_marking_step() completes,
3551 the marking phase can immediately restart.
3553 (3) When enough completed SATB buffers are available. The
3554 do_marking_step() method only tries to drain SATB buffers right
3555 at the beginning. So, if enough buffers are available, the
3556 marking step aborts and the SATB buffers are processed at
3557 the beginning of the next invocation.
3559 (4) To yield. when we have to yield then we abort and yield
3560 right at the end of do_marking_step(). This saves us from a lot
3561 of hassle as, by yielding we might allow a Full GC. If this
3562 happens then objects will be compacted underneath our feet, the
3563 heap might shrink, etc. We save checking for this by just
3564 aborting and doing the yield right at the end.
3566 From the above it follows that the do_marking_step() method should
3567 be called in a loop (or, otherwise, regularly) until it completes.
3569 If a marking step completes without its has_aborted() flag being
3570 true, it means it has completed the current marking phase (and
3571 also all other marking tasks have done so and have all synced up).
3573 A method called regular_clock_call() is invoked "regularly" (in
3574 sub ms intervals) throughout marking. It is this clock method that
3575 checks all the abort conditions which were mentioned above and
3576 decides when the task should abort. A work-based scheme is used to
3577 trigger this clock method: when the number of object words the
3578 marking phase has scanned or the number of references the marking
3579 phase has visited reach a given limit. Additional invocations to
3580 the method clock have been planted in a few other strategic places
3581 too. The initial reason for the clock method was to avoid calling
3582 vtime too regularly, as it is quite expensive. So, once it was in
3583 place, it was natural to piggy-back all the other conditions on it
3584 too and not constantly check them throughout the code.
3586 *****************************************************************************/
3588 void CMTask::do_marking_step(double time_target_ms) {
3589 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3590 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3592 guarantee( concurrent() || _cm->region_stack_empty(),
3593 "the region stack should have been cleared before remark" );
3594 guarantee( _region_finger == NULL,
3595 "this should be non-null only when a region is being scanned" );
3597 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3598 guarantee( _task_queues != NULL, "invariant" );
3599 guarantee( _task_queue != NULL, "invariant" );
3600 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3602 guarantee( !_claimed,
3603 "only one thread should claim this task at any one time" );
3605 // OK, this doesn't safeguard again all possible scenarios, as it is
3606 // possible for two threads to set the _claimed flag at the same
3607 // time. But it is only for debugging purposes anyway and it will
3608 // catch most problems.
3609 _claimed = true;
3611 _start_time_ms = os::elapsedVTime() * 1000.0;
3612 statsOnly( _interval_start_time_ms = _start_time_ms );
3614 double diff_prediction_ms =
3615 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3616 _time_target_ms = time_target_ms - diff_prediction_ms;
3618 // set up the variables that are used in the work-based scheme to
3619 // call the regular clock method
3620 _words_scanned = 0;
3621 _refs_reached = 0;
3622 recalculate_limits();
3624 // clear all flags
3625 clear_has_aborted();
3626 _has_aborted_timed_out = false;
3627 _draining_satb_buffers = false;
3629 ++_calls;
3631 if (_cm->verbose_low())
3632 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3633 "target = %1.2lfms >>>>>>>>>>",
3634 _task_id, _calls, _time_target_ms);
3636 // Set up the bitmap and oop closures. Anything that uses them is
3637 // eventually called from this method, so it is OK to allocate these
3638 // statically.
3639 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3640 CMOopClosure oop_closure(_g1h, _cm, this);
3641 set_oop_closure(&oop_closure);
3643 if (_cm->has_overflown()) {
3644 // This can happen if the region stack or the mark stack overflows
3645 // during a GC pause and this task, after a yield point,
3646 // restarts. We have to abort as we need to get into the overflow
3647 // protocol which happens right at the end of this task.
3648 set_has_aborted();
3649 }
3651 // First drain any available SATB buffers. After this, we will not
3652 // look at SATB buffers before the next invocation of this method.
3653 // If enough completed SATB buffers are queued up, the regular clock
3654 // will abort this task so that it restarts.
3655 drain_satb_buffers();
3656 // ...then partially drain the local queue and the global stack
3657 drain_local_queue(true);
3658 drain_global_stack(true);
3660 // Then totally drain the region stack. We will not look at
3661 // it again before the next invocation of this method. Entries on
3662 // the region stack are only added during evacuation pauses, for
3663 // which we have to yield. When we do, we abort the task anyway so
3664 // it will look at the region stack again when it restarts.
3665 bitmap_closure.set_scanning_heap_region(false);
3666 drain_region_stack(&bitmap_closure);
3667 // ...then partially drain the local queue and the global stack
3668 drain_local_queue(true);
3669 drain_global_stack(true);
3671 do {
3672 if (!has_aborted() && _curr_region != NULL) {
3673 // This means that we're already holding on to a region.
3674 tmp_guarantee_CM( _finger != NULL,
3675 "if region is not NULL, then the finger "
3676 "should not be NULL either" );
3678 // We might have restarted this task after an evacuation pause
3679 // which might have evacuated the region we're holding on to
3680 // underneath our feet. Let's read its limit again to make sure
3681 // that we do not iterate over a region of the heap that
3682 // contains garbage (update_region_limit() will also move
3683 // _finger to the start of the region if it is found empty).
3684 update_region_limit();
3685 // We will start from _finger not from the start of the region,
3686 // as we might be restarting this task after aborting half-way
3687 // through scanning this region. In this case, _finger points to
3688 // the address where we last found a marked object. If this is a
3689 // fresh region, _finger points to start().
3690 MemRegion mr = MemRegion(_finger, _region_limit);
3692 if (_cm->verbose_low())
3693 gclog_or_tty->print_cr("[%d] we're scanning part "
3694 "["PTR_FORMAT", "PTR_FORMAT") "
3695 "of region "PTR_FORMAT,
3696 _task_id, _finger, _region_limit, _curr_region);
3698 // Let's iterate over the bitmap of the part of the
3699 // region that is left.
3700 bitmap_closure.set_scanning_heap_region(true);
3701 if (mr.is_empty() ||
3702 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3703 // We successfully completed iterating over the region. Now,
3704 // let's give up the region.
3705 giveup_current_region();
3706 regular_clock_call();
3707 } else {
3708 guarantee( has_aborted(), "currently the only way to do so" );
3709 // The only way to abort the bitmap iteration is to return
3710 // false from the do_bit() method. However, inside the
3711 // do_bit() method we move the _finger to point to the
3712 // object currently being looked at. So, if we bail out, we
3713 // have definitely set _finger to something non-null.
3714 guarantee( _finger != NULL, "invariant" );
3716 // Region iteration was actually aborted. So now _finger
3717 // points to the address of the object we last scanned. If we
3718 // leave it there, when we restart this task, we will rescan
3719 // the object. It is easy to avoid this. We move the finger by
3720 // enough to point to the next possible object header (the
3721 // bitmap knows by how much we need to move it as it knows its
3722 // granularity).
3723 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3724 }
3725 }
3726 // At this point we have either completed iterating over the
3727 // region we were holding on to, or we have aborted.
3729 // We then partially drain the local queue and the global stack.
3730 // (Do we really need this?)
3731 drain_local_queue(true);
3732 drain_global_stack(true);
3734 // Read the note on the claim_region() method on why it might
3735 // return NULL with potentially more regions available for
3736 // claiming and why we have to check out_of_regions() to determine
3737 // whether we're done or not.
3738 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3739 // We are going to try to claim a new region. We should have
3740 // given up on the previous one.
3741 tmp_guarantee_CM( _curr_region == NULL &&
3742 _finger == NULL &&
3743 _region_limit == NULL, "invariant" );
3744 if (_cm->verbose_low())
3745 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3746 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3747 if (claimed_region != NULL) {
3748 // Yes, we managed to claim one
3749 statsOnly( ++_regions_claimed );
3751 if (_cm->verbose_low())
3752 gclog_or_tty->print_cr("[%d] we successfully claimed "
3753 "region "PTR_FORMAT,
3754 _task_id, claimed_region);
3756 setup_for_region(claimed_region);
3757 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3758 }
3759 // It is important to call the regular clock here. It might take
3760 // a while to claim a region if, for example, we hit a large
3761 // block of empty regions. So we need to call the regular clock
3762 // method once round the loop to make sure it's called
3763 // frequently enough.
3764 regular_clock_call();
3765 }
3767 if (!has_aborted() && _curr_region == NULL) {
3768 tmp_guarantee_CM( _cm->out_of_regions(),
3769 "at this point we should be out of regions" );
3770 }
3771 } while ( _curr_region != NULL && !has_aborted());
3773 if (!has_aborted()) {
3774 // We cannot check whether the global stack is empty, since other
3775 // tasks might be pushing objects to it concurrently. We also cannot
3776 // check if the region stack is empty because if a thread is aborting
3777 // it can push a partially done region back.
3778 tmp_guarantee_CM( _cm->out_of_regions(),
3779 "at this point we should be out of regions" );
3781 if (_cm->verbose_low())
3782 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3784 // Try to reduce the number of available SATB buffers so that
3785 // remark has less work to do.
3786 drain_satb_buffers();
3787 }
3789 // Since we've done everything else, we can now totally drain the
3790 // local queue and global stack.
3791 drain_local_queue(false);
3792 drain_global_stack(false);
3794 // Attempt at work stealing from other task's queues.
3795 if (!has_aborted()) {
3796 // We have not aborted. This means that we have finished all that
3797 // we could. Let's try to do some stealing...
3799 // We cannot check whether the global stack is empty, since other
3800 // tasks might be pushing objects to it concurrently. We also cannot
3801 // check if the region stack is empty because if a thread is aborting
3802 // it can push a partially done region back.
3803 guarantee( _cm->out_of_regions() &&
3804 _task_queue->size() == 0, "only way to reach here" );
3806 if (_cm->verbose_low())
3807 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3809 while (!has_aborted()) {
3810 oop obj;
3811 statsOnly( ++_steal_attempts );
3813 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3814 if (_cm->verbose_medium())
3815 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3816 _task_id, (void*) obj);
3818 statsOnly( ++_steals );
3820 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3821 "any stolen object should be marked" );
3822 scan_object(obj);
3824 // And since we're towards the end, let's totally drain the
3825 // local queue and global stack.
3826 drain_local_queue(false);
3827 drain_global_stack(false);
3828 } else {
3829 break;
3830 }
3831 }
3832 }
3834 // We still haven't aborted. Now, let's try to get into the
3835 // termination protocol.
3836 if (!has_aborted()) {
3837 // We cannot check whether the global stack is empty, since other
3838 // tasks might be concurrently pushing objects on it. We also cannot
3839 // check if the region stack is empty because if a thread is aborting
3840 // it can push a partially done region back.
3841 guarantee( _cm->out_of_regions() &&
3842 _task_queue->size() == 0, "only way to reach here" );
3844 if (_cm->verbose_low())
3845 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3847 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3848 // The CMTask class also extends the TerminatorTerminator class,
3849 // hence its should_exit_termination() method will also decide
3850 // whether to exit the termination protocol or not.
3851 bool finished = _cm->terminator()->offer_termination(this);
3852 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3853 _termination_time_ms +=
3854 termination_end_time_ms - _termination_start_time_ms;
3856 if (finished) {
3857 // We're all done.
3859 if (_task_id == 0) {
3860 // let's allow task 0 to do this
3861 if (concurrent()) {
3862 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3863 // we need to set this to false before the next
3864 // safepoint. This way we ensure that the marking phase
3865 // doesn't observe any more heap expansions.
3866 _cm->clear_concurrent_marking_in_progress();
3867 }
3868 }
3870 // We can now guarantee that the global stack is empty, since
3871 // all other tasks have finished.
3872 guarantee( _cm->out_of_regions() &&
3873 _cm->region_stack_empty() &&
3874 _cm->mark_stack_empty() &&
3875 _task_queue->size() == 0 &&
3876 !_cm->has_overflown() &&
3877 !_cm->mark_stack_overflow() &&
3878 !_cm->region_stack_overflow(),
3879 "only way to reach here" );
3881 if (_cm->verbose_low())
3882 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3883 } else {
3884 // Apparently there's more work to do. Let's abort this task. It
3885 // will restart it and we can hopefully find more things to do.
3887 if (_cm->verbose_low())
3888 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3890 set_has_aborted();
3891 statsOnly( ++_aborted_termination );
3892 }
3893 }
3895 // Mainly for debugging purposes to make sure that a pointer to the
3896 // closure which was statically allocated in this frame doesn't
3897 // escape it by accident.
3898 set_oop_closure(NULL);
3899 double end_time_ms = os::elapsedVTime() * 1000.0;
3900 double elapsed_time_ms = end_time_ms - _start_time_ms;
3901 // Update the step history.
3902 _step_times_ms.add(elapsed_time_ms);
3904 if (has_aborted()) {
3905 // The task was aborted for some reason.
3907 statsOnly( ++_aborted );
3909 if (_has_aborted_timed_out) {
3910 double diff_ms = elapsed_time_ms - _time_target_ms;
3911 // Keep statistics of how well we did with respect to hitting
3912 // our target only if we actually timed out (if we aborted for
3913 // other reasons, then the results might get skewed).
3914 _marking_step_diffs_ms.add(diff_ms);
3915 }
3917 if (_cm->has_overflown()) {
3918 // This is the interesting one. We aborted because a global
3919 // overflow was raised. This means we have to restart the
3920 // marking phase and start iterating over regions. However, in
3921 // order to do this we have to make sure that all tasks stop
3922 // what they are doing and re-initialise in a safe manner. We
3923 // will achieve this with the use of two barrier sync points.
3925 if (_cm->verbose_low())
3926 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3928 _cm->enter_first_sync_barrier(_task_id);
3929 // When we exit this sync barrier we know that all tasks have
3930 // stopped doing marking work. So, it's now safe to
3931 // re-initialise our data structures. At the end of this method,
3932 // task 0 will clear the global data structures.
3934 statsOnly( ++_aborted_overflow );
3936 // We clear the local state of this task...
3937 clear_region_fields();
3939 // ...and enter the second barrier.
3940 _cm->enter_second_sync_barrier(_task_id);
3941 // At this point everything has bee re-initialised and we're
3942 // ready to restart.
3943 }
3945 if (_cm->verbose_low()) {
3946 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3947 "elapsed = %1.2lfms <<<<<<<<<<",
3948 _task_id, _time_target_ms, elapsed_time_ms);
3949 if (_cm->has_aborted())
3950 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3951 _task_id);
3952 }
3953 } else {
3954 if (_cm->verbose_low())
3955 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3956 "elapsed = %1.2lfms <<<<<<<<<<",
3957 _task_id, _time_target_ms, elapsed_time_ms);
3958 }
3960 _claimed = false;
3961 }
3963 CMTask::CMTask(int task_id,
3964 ConcurrentMark* cm,
3965 CMTaskQueue* task_queue,
3966 CMTaskQueueSet* task_queues)
3967 : _g1h(G1CollectedHeap::heap()),
3968 _co_tracker(G1CMGroup),
3969 _task_id(task_id), _cm(cm),
3970 _claimed(false),
3971 _nextMarkBitMap(NULL), _hash_seed(17),
3972 _task_queue(task_queue),
3973 _task_queues(task_queues),
3974 _oop_closure(NULL) {
3975 guarantee( task_queue != NULL, "invariant" );
3976 guarantee( task_queues != NULL, "invariant" );
3978 statsOnly( _clock_due_to_scanning = 0;
3979 _clock_due_to_marking = 0 );
3981 _marking_step_diffs_ms.add(0.5);
3982 }