Tue, 04 Aug 2009 16:00:17 -0700
6819077: G1: first GC thread coming late into the GC.
Summary: The first worker thread is delayed when entering the GC because it clears the card count table that is used in identifying hot cards. Replace the card count table with a dynamically sized evicting hash table that includes an epoch based counter.
Reviewed-by: iveresov, tonyp
1 /*
2 * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
421 _has_overflown(false),
422 _concurrent(false),
423 _has_aborted(false),
424 _restart_for_overflow(false),
425 _concurrent_marking_in_progress(false),
426 _should_gray_objects(false),
428 // _verbose_level set below
430 _init_times(),
431 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
432 _cleanup_times(),
433 _total_counting_time(0.0),
434 _total_rs_scrub_time(0.0),
436 _parallel_workers(NULL),
437 _cleanup_co_tracker(G1CLGroup)
438 {
439 CMVerboseLevel verbose_level =
440 (CMVerboseLevel) G1MarkingVerboseLevel;
441 if (verbose_level < no_verbose)
442 verbose_level = no_verbose;
443 if (verbose_level > high_verbose)
444 verbose_level = high_verbose;
445 _verbose_level = verbose_level;
447 if (verbose_low())
448 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
449 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
451 _markStack.allocate(G1MarkStackSize);
452 _regionStack.allocate(G1MarkRegionStackSize);
454 // Create & start a ConcurrentMark thread.
455 _cmThread = new ConcurrentMarkThread(this);
456 assert(cmThread() != NULL, "CM Thread should have been created");
457 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
459 _g1h = G1CollectedHeap::heap();
460 assert(CGC_lock != NULL, "Where's the CGC_lock?");
461 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
462 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
464 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
465 satb_qs.set_buffer_size(G1SATBLogBufferSize);
467 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
468 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
469 for (int i = 0 ; i < size; i++) {
470 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
471 }
473 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
474 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
476 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
477 _active_tasks = _max_task_num;
478 for (int i = 0; i < (int) _max_task_num; ++i) {
479 CMTaskQueue* task_queue = new CMTaskQueue();
480 task_queue->initialize();
481 _task_queues->register_queue(i, task_queue);
483 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
484 _accum_task_vtime[i] = 0.0;
485 }
487 if (ParallelMarkingThreads > ParallelGCThreads) {
488 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
489 "than ParallelGCThreads.");
490 }
491 if (ParallelGCThreads == 0) {
492 // if we are not running with any parallel GC threads we will not
493 // spawn any marking threads either
494 _parallel_marking_threads = 0;
495 _sleep_factor = 0.0;
496 _marking_task_overhead = 1.0;
497 } else {
498 if (ParallelMarkingThreads > 0) {
499 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
500 // if both are set
502 _parallel_marking_threads = ParallelMarkingThreads;
503 _sleep_factor = 0.0;
504 _marking_task_overhead = 1.0;
505 } else if (G1MarkingOverheadPercent > 0) {
506 // we will calculate the number of parallel marking threads
507 // based on a target overhead with respect to the soft real-time
508 // goal
510 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
511 double overall_cm_overhead =
512 (double) MaxGCPauseMillis * marking_overhead /
513 (double) GCPauseIntervalMillis;
514 double cpu_ratio = 1.0 / (double) os::processor_count();
515 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
516 double marking_task_overhead =
517 overall_cm_overhead / marking_thread_num *
518 (double) os::processor_count();
519 double sleep_factor =
520 (1.0 - marking_task_overhead) / marking_task_overhead;
522 _parallel_marking_threads = (size_t) marking_thread_num;
523 _sleep_factor = sleep_factor;
524 _marking_task_overhead = marking_task_overhead;
525 } else {
526 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
527 _sleep_factor = 0.0;
528 _marking_task_overhead = 1.0;
529 }
531 if (parallel_marking_threads() > 1)
532 _cleanup_task_overhead = 1.0;
533 else
534 _cleanup_task_overhead = marking_task_overhead();
535 _cleanup_sleep_factor =
536 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
538 #if 0
539 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
540 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
541 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
542 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
543 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
544 #endif
546 guarantee( parallel_marking_threads() > 0, "peace of mind" );
547 _parallel_workers = new WorkGang("Parallel Marking Threads",
548 (int) parallel_marking_threads(), false, true);
549 if (_parallel_workers == NULL)
550 vm_exit_during_initialization("Failed necessary allocation.");
551 }
553 // so that the call below can read a sensible value
554 _heap_start = (HeapWord*) rs.base();
555 set_non_marking_state();
556 }
558 void ConcurrentMark::update_g1_committed(bool force) {
559 // If concurrent marking is not in progress, then we do not need to
560 // update _heap_end. This has a subtle and important
561 // side-effect. Imagine that two evacuation pauses happen between
562 // marking completion and remark. The first one can grow the
563 // heap (hence now the finger is below the heap end). Then, the
564 // second one could unnecessarily push regions on the region
565 // stack. This causes the invariant that the region stack is empty
566 // at the beginning of remark to be false. By ensuring that we do
567 // not observe heap expansions after marking is complete, then we do
568 // not have this problem.
569 if (!concurrent_marking_in_progress() && !force)
570 return;
572 MemRegion committed = _g1h->g1_committed();
573 tmp_guarantee_CM( committed.start() == _heap_start,
574 "start shouldn't change" );
575 HeapWord* new_end = committed.end();
576 if (new_end > _heap_end) {
577 // The heap has been expanded.
579 _heap_end = new_end;
580 }
581 // Notice that the heap can also shrink. However, this only happens
582 // during a Full GC (at least currently) and the entire marking
583 // phase will bail out and the task will not be restarted. So, let's
584 // do nothing.
585 }
587 void ConcurrentMark::reset() {
588 // Starting values for these two. This should be called in a STW
589 // phase. CM will be notified of any future g1_committed expansions
590 // will be at the end of evacuation pauses, when tasks are
591 // inactive.
592 MemRegion committed = _g1h->g1_committed();
593 _heap_start = committed.start();
594 _heap_end = committed.end();
596 guarantee( _heap_start != NULL &&
597 _heap_end != NULL &&
598 _heap_start < _heap_end, "heap bounds should look ok" );
600 // reset all the marking data structures and any necessary flags
601 clear_marking_state();
603 if (verbose_low())
604 gclog_or_tty->print_cr("[global] resetting");
606 // We do reset all of them, since different phases will use
607 // different number of active threads. So, it's easiest to have all
608 // of them ready.
609 for (int i = 0; i < (int) _max_task_num; ++i)
610 _tasks[i]->reset(_nextMarkBitMap);
612 // we need this to make sure that the flag is on during the evac
613 // pause with initial mark piggy-backed
614 set_concurrent_marking_in_progress();
615 }
617 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
618 guarantee( active_tasks <= _max_task_num, "we should not have more" );
620 _active_tasks = active_tasks;
621 // Need to update the three data structures below according to the
622 // number of active threads for this phase.
623 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
624 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
625 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
627 _concurrent = concurrent;
628 // We propagate this to all tasks, not just the active ones.
629 for (int i = 0; i < (int) _max_task_num; ++i)
630 _tasks[i]->set_concurrent(concurrent);
632 if (concurrent) {
633 set_concurrent_marking_in_progress();
634 } else {
635 // We currently assume that the concurrent flag has been set to
636 // false before we start remark. At this point we should also be
637 // in a STW phase.
638 guarantee( !concurrent_marking_in_progress(), "invariant" );
639 guarantee( _finger == _heap_end, "only way to get here" );
640 update_g1_committed(true);
641 }
642 }
644 void ConcurrentMark::set_non_marking_state() {
645 // We set the global marking state to some default values when we're
646 // not doing marking.
647 clear_marking_state();
648 _active_tasks = 0;
649 clear_concurrent_marking_in_progress();
650 }
652 ConcurrentMark::~ConcurrentMark() {
653 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
654 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
655 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
656 _par_cleanup_thread_state);
658 for (int i = 0; i < (int) _max_task_num; ++i) {
659 delete _task_queues->queue(i);
660 delete _tasks[i];
661 }
662 delete _task_queues;
663 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
664 }
666 // This closure is used to mark refs into the g1 generation
667 // from external roots in the CMS bit map.
668 // Called at the first checkpoint.
669 //
671 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
672 #if PRINT_REACHABLE_AT_INITIAL_MARK
673 static FILE* reachable_file = NULL;
675 class PrintReachableClosure: public OopsInGenClosure {
676 CMBitMap* _bm;
677 int _level;
678 public:
679 PrintReachableClosure(CMBitMap* bm) :
680 _bm(bm), _level(0) {
681 guarantee(reachable_file != NULL, "pre-condition");
682 }
683 void do_oop(oop* p) {
684 oop obj = *p;
685 HeapWord* obj_addr = (HeapWord*)obj;
686 if (obj == NULL) return;
687 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
688 _level, p, (void*) obj, _bm->isMarked(obj_addr));
689 if (!_bm->isMarked(obj_addr)) {
690 _bm->mark(obj_addr);
691 _level++;
692 obj->oop_iterate(this);
693 _level--;
694 }
695 }
696 };
697 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
699 #define SEND_HEAP_DUMP_TO_FILE 0
700 #if SEND_HEAP_DUMP_TO_FILE
701 static FILE* heap_dump_file = NULL;
702 #endif // SEND_HEAP_DUMP_TO_FILE
704 void ConcurrentMark::clearNextBitmap() {
705 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
707 // clear the mark bitmap (no grey objects to start with).
708 // We need to do this in chunks and offer to yield in between
709 // each chunk.
710 HeapWord* start = _nextMarkBitMap->startWord();
711 HeapWord* end = _nextMarkBitMap->endWord();
712 HeapWord* cur = start;
713 size_t chunkSize = M;
714 while (cur < end) {
715 HeapWord* next = cur + chunkSize;
716 if (next > end)
717 next = end;
718 MemRegion mr(cur,next);
719 _nextMarkBitMap->clearRange(mr);
720 cur = next;
721 do_yield_check();
722 }
723 }
725 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
726 public:
727 bool doHeapRegion(HeapRegion* r) {
728 if (!r->continuesHumongous()) {
729 r->note_start_of_marking(true);
730 }
731 return false;
732 }
733 };
735 void ConcurrentMark::checkpointRootsInitialPre() {
736 G1CollectedHeap* g1h = G1CollectedHeap::heap();
737 G1CollectorPolicy* g1p = g1h->g1_policy();
739 _has_aborted = false;
741 // Find all the reachable objects...
742 #if PRINT_REACHABLE_AT_INITIAL_MARK
743 guarantee(reachable_file == NULL, "Protocol");
744 char fn_buf[100];
745 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
746 reachable_file = fopen(fn_buf, "w");
747 // clear the mark bitmap (no grey objects to start with)
748 _nextMarkBitMap->clearAll();
749 PrintReachableClosure prcl(_nextMarkBitMap);
750 g1h->process_strong_roots(
751 false, // fake perm gen collection
752 SharedHeap::SO_AllClasses,
753 &prcl, // Regular roots
754 &prcl // Perm Gen Roots
755 );
756 // The root iteration above "consumed" dirty cards in the perm gen.
757 // Therefore, as a shortcut, we dirty all such cards.
758 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
759 fclose(reachable_file);
760 reachable_file = NULL;
761 // clear the mark bitmap again.
762 _nextMarkBitMap->clearAll();
763 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
764 COMPILER2_PRESENT(DerivedPointerTable::clear());
765 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
767 // Initialise marking structures. This has to be done in a STW phase.
768 reset();
769 }
771 class CMMarkRootsClosure: public OopsInGenClosure {
772 private:
773 ConcurrentMark* _cm;
774 G1CollectedHeap* _g1h;
775 bool _do_barrier;
777 public:
778 CMMarkRootsClosure(ConcurrentMark* cm,
779 G1CollectedHeap* g1h,
780 bool do_barrier) : _cm(cm), _g1h(g1h),
781 _do_barrier(do_barrier) { }
783 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
784 virtual void do_oop( oop* p) { do_oop_work(p); }
786 template <class T> void do_oop_work(T* p) {
787 T heap_oop = oopDesc::load_heap_oop(p);
788 if (!oopDesc::is_null(heap_oop)) {
789 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
790 assert(obj->is_oop() || obj->mark() == NULL,
791 "expected an oop, possibly with mark word displaced");
792 HeapWord* addr = (HeapWord*)obj;
793 if (_g1h->is_in_g1_reserved(addr)) {
794 _cm->grayRoot(obj);
795 }
796 }
797 if (_do_barrier) {
798 assert(!_g1h->is_in_g1_reserved(p),
799 "Should be called on external roots");
800 do_barrier(p);
801 }
802 }
803 };
805 void ConcurrentMark::checkpointRootsInitialPost() {
806 G1CollectedHeap* g1h = G1CollectedHeap::heap();
808 // For each region note start of marking.
809 NoteStartOfMarkHRClosure startcl;
810 g1h->heap_region_iterate(&startcl);
812 // Start weak-reference discovery.
813 ReferenceProcessor* rp = g1h->ref_processor();
814 rp->verify_no_references_recorded();
815 rp->enable_discovery(); // enable ("weak") refs discovery
816 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
818 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
819 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
820 satb_mq_set.set_active_all_threads(true);
822 // update_g1_committed() will be called at the end of an evac pause
823 // when marking is on. So, it's also called at the end of the
824 // initial-mark pause to update the heap end, if the heap expands
825 // during it. No need to call it here.
827 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
829 size_t max_marking_threads =
830 MAX2((size_t) 1, parallel_marking_threads());
831 for (int i = 0; i < (int)_max_task_num; ++i) {
832 _tasks[i]->enable_co_tracker();
833 if (i < (int) max_marking_threads)
834 _tasks[i]->reset_co_tracker(marking_task_overhead());
835 else
836 _tasks[i]->reset_co_tracker(0.0);
837 }
838 }
840 // Checkpoint the roots into this generation from outside
841 // this generation. [Note this initial checkpoint need only
842 // be approximate -- we'll do a catch up phase subsequently.]
843 void ConcurrentMark::checkpointRootsInitial() {
844 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
845 G1CollectedHeap* g1h = G1CollectedHeap::heap();
847 double start = os::elapsedTime();
848 GCOverheadReporter::recordSTWStart(start);
850 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
851 g1p->record_concurrent_mark_init_start();
852 checkpointRootsInitialPre();
854 // YSR: when concurrent precleaning is in place, we'll
855 // need to clear the cached card table here
857 ResourceMark rm;
858 HandleMark hm;
860 g1h->ensure_parsability(false);
861 g1h->perm_gen()->save_marks();
863 CMMarkRootsClosure notOlder(this, g1h, false);
864 CMMarkRootsClosure older(this, g1h, true);
866 g1h->set_marking_started();
867 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
869 g1h->process_strong_roots(false, // fake perm gen collection
870 SharedHeap::SO_AllClasses,
871 ¬Older, // Regular roots
872 &older // Perm Gen Roots
873 );
874 checkpointRootsInitialPost();
876 // Statistics.
877 double end = os::elapsedTime();
878 _init_times.add((end - start) * 1000.0);
879 GCOverheadReporter::recordSTWEnd(end);
881 g1p->record_concurrent_mark_init_end();
882 }
884 /*
885 Notice that in the next two methods, we actually leave the STS
886 during the barrier sync and join it immediately afterwards. If we
887 do not do this, this then the following deadlock can occur: one
888 thread could be in the barrier sync code, waiting for the other
889 thread to also sync up, whereas another one could be trying to
890 yield, while also waiting for the other threads to sync up too.
892 Because the thread that does the sync barrier has left the STS, it
893 is possible to be suspended for a Full GC or an evacuation pause
894 could occur. This is actually safe, since the entering the sync
895 barrier is one of the last things do_marking_step() does, and it
896 doesn't manipulate any data structures afterwards.
897 */
899 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
900 if (verbose_low())
901 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
903 ConcurrentGCThread::stsLeave();
904 _first_overflow_barrier_sync.enter();
905 ConcurrentGCThread::stsJoin();
906 // at this point everyone should have synced up and not be doing any
907 // more work
909 if (verbose_low())
910 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
912 // let task 0 do this
913 if (task_num == 0) {
914 // task 0 is responsible for clearing the global data structures
915 clear_marking_state();
917 if (PrintGC) {
918 gclog_or_tty->date_stamp(PrintGCDateStamps);
919 gclog_or_tty->stamp(PrintGCTimeStamps);
920 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
921 }
922 }
924 // after this, each task should reset its own data structures then
925 // then go into the second barrier
926 }
928 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
929 if (verbose_low())
930 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
932 ConcurrentGCThread::stsLeave();
933 _second_overflow_barrier_sync.enter();
934 ConcurrentGCThread::stsJoin();
935 // at this point everything should be re-initialised and ready to go
937 if (verbose_low())
938 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
939 }
941 void ConcurrentMark::grayRoot(oop p) {
942 HeapWord* addr = (HeapWord*) p;
943 // We can't really check against _heap_start and _heap_end, since it
944 // is possible during an evacuation pause with piggy-backed
945 // initial-mark that the committed space is expanded during the
946 // pause without CM observing this change. So the assertions below
947 // is a bit conservative; but better than nothing.
948 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
949 "address should be within the heap bounds" );
951 if (!_nextMarkBitMap->isMarked(addr))
952 _nextMarkBitMap->parMark(addr);
953 }
955 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
956 // The objects on the region have already been marked "in bulk" by
957 // the caller. We only need to decide whether to push the region on
958 // the region stack or not.
960 if (!concurrent_marking_in_progress() || !_should_gray_objects)
961 // We're done with marking and waiting for remark. We do not need to
962 // push anything else on the region stack.
963 return;
965 HeapWord* finger = _finger;
967 if (verbose_low())
968 gclog_or_tty->print_cr("[global] attempting to push "
969 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
970 PTR_FORMAT, mr.start(), mr.end(), finger);
972 if (mr.start() < finger) {
973 // The finger is always heap region aligned and it is not possible
974 // for mr to span heap regions.
975 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
977 tmp_guarantee_CM( mr.start() <= mr.end() &&
978 _heap_start <= mr.start() &&
979 mr.end() <= _heap_end,
980 "region boundaries should fall within the committed space" );
981 if (verbose_low())
982 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
983 "below the finger, pushing it",
984 mr.start(), mr.end());
986 if (!region_stack_push(mr)) {
987 if (verbose_low())
988 gclog_or_tty->print_cr("[global] region stack has overflown.");
989 }
990 }
991 }
993 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
994 // The object is not marked by the caller. We need to at least mark
995 // it and maybe push in on the stack.
997 HeapWord* addr = (HeapWord*)p;
998 if (!_nextMarkBitMap->isMarked(addr)) {
999 // We definitely need to mark it, irrespective whether we bail out
1000 // because we're done with marking.
1001 if (_nextMarkBitMap->parMark(addr)) {
1002 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1003 // If we're done with concurrent marking and we're waiting for
1004 // remark, then we're not pushing anything on the stack.
1005 return;
1007 // No OrderAccess:store_load() is needed. It is implicit in the
1008 // CAS done in parMark(addr) above
1009 HeapWord* finger = _finger;
1011 if (addr < finger) {
1012 if (!mark_stack_push(oop(addr))) {
1013 if (verbose_low())
1014 gclog_or_tty->print_cr("[global] global stack overflow "
1015 "during parMark");
1016 }
1017 }
1018 }
1019 }
1020 }
1022 class CMConcurrentMarkingTask: public AbstractGangTask {
1023 private:
1024 ConcurrentMark* _cm;
1025 ConcurrentMarkThread* _cmt;
1027 public:
1028 void work(int worker_i) {
1029 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1030 "this should only be done by a conc GC thread" );
1032 double start_vtime = os::elapsedVTime();
1034 ConcurrentGCThread::stsJoin();
1036 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1037 CMTask* the_task = _cm->task(worker_i);
1038 the_task->start_co_tracker();
1039 the_task->record_start_time();
1040 if (!_cm->has_aborted()) {
1041 do {
1042 double start_vtime_sec = os::elapsedVTime();
1043 double start_time_sec = os::elapsedTime();
1044 the_task->do_marking_step(10.0);
1045 double end_time_sec = os::elapsedTime();
1046 double end_vtime_sec = os::elapsedVTime();
1047 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1048 double elapsed_time_sec = end_time_sec - start_time_sec;
1049 _cm->clear_has_overflown();
1051 bool ret = _cm->do_yield_check(worker_i);
1053 jlong sleep_time_ms;
1054 if (!_cm->has_aborted() && the_task->has_aborted()) {
1055 sleep_time_ms =
1056 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1057 ConcurrentGCThread::stsLeave();
1058 os::sleep(Thread::current(), sleep_time_ms, false);
1059 ConcurrentGCThread::stsJoin();
1060 }
1061 double end_time2_sec = os::elapsedTime();
1062 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1064 the_task->update_co_tracker();
1066 #if 0
1067 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1068 "overhead %1.4lf",
1069 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1070 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1071 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1072 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1073 #endif
1074 } while (!_cm->has_aborted() && the_task->has_aborted());
1075 }
1076 the_task->record_end_time();
1077 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1079 ConcurrentGCThread::stsLeave();
1081 double end_vtime = os::elapsedVTime();
1082 the_task->update_co_tracker(true);
1083 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1084 }
1086 CMConcurrentMarkingTask(ConcurrentMark* cm,
1087 ConcurrentMarkThread* cmt) :
1088 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1090 ~CMConcurrentMarkingTask() { }
1091 };
1093 void ConcurrentMark::markFromRoots() {
1094 // we might be tempted to assert that:
1095 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1096 // "inconsistent argument?");
1097 // However that wouldn't be right, because it's possible that
1098 // a safepoint is indeed in progress as a younger generation
1099 // stop-the-world GC happens even as we mark in this generation.
1101 _restart_for_overflow = false;
1103 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1105 CMConcurrentMarkingTask markingTask(this, cmThread());
1106 if (parallel_marking_threads() > 0)
1107 _parallel_workers->run_task(&markingTask);
1108 else
1109 markingTask.work(0);
1110 print_stats();
1111 }
1113 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1114 // world is stopped at this checkpoint
1115 assert(SafepointSynchronize::is_at_safepoint(),
1116 "world should be stopped");
1117 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1119 // If a full collection has happened, we shouldn't do this.
1120 if (has_aborted()) {
1121 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1122 return;
1123 }
1125 if (VerifyDuringGC) {
1126 HandleMark hm; // handle scope
1127 gclog_or_tty->print(" VerifyDuringGC:(before)");
1128 Universe::heap()->prepare_for_verify();
1129 Universe::verify(true, false, true);
1130 }
1132 G1CollectorPolicy* g1p = g1h->g1_policy();
1133 g1p->record_concurrent_mark_remark_start();
1135 double start = os::elapsedTime();
1136 GCOverheadReporter::recordSTWStart(start);
1138 checkpointRootsFinalWork();
1140 double mark_work_end = os::elapsedTime();
1142 weakRefsWork(clear_all_soft_refs);
1144 if (has_overflown()) {
1145 // Oops. We overflowed. Restart concurrent marking.
1146 _restart_for_overflow = true;
1147 // Clear the flag. We do not need it any more.
1148 clear_has_overflown();
1149 if (G1TraceMarkStackOverflow)
1150 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1151 } else {
1152 // We're done with marking.
1153 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1155 if (VerifyDuringGC) {
1156 HandleMark hm; // handle scope
1157 gclog_or_tty->print(" VerifyDuringGC:(after)");
1158 Universe::heap()->prepare_for_verify();
1159 Universe::heap()->verify(/* allow_dirty */ true,
1160 /* silent */ false,
1161 /* use_prev_marking */ false);
1162 }
1163 }
1165 #if VERIFY_OBJS_PROCESSED
1166 _scan_obj_cl.objs_processed = 0;
1167 ThreadLocalObjQueue::objs_enqueued = 0;
1168 #endif
1170 // Statistics
1171 double now = os::elapsedTime();
1172 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1173 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1174 _remark_times.add((now - start) * 1000.0);
1176 GCOverheadReporter::recordSTWEnd(now);
1177 for (int i = 0; i < (int)_max_task_num; ++i)
1178 _tasks[i]->disable_co_tracker();
1179 _cleanup_co_tracker.enable();
1180 _cleanup_co_tracker.reset(cleanup_task_overhead());
1181 g1p->record_concurrent_mark_remark_end();
1182 }
1185 #define CARD_BM_TEST_MODE 0
1187 class CalcLiveObjectsClosure: public HeapRegionClosure {
1189 CMBitMapRO* _bm;
1190 ConcurrentMark* _cm;
1191 COTracker* _co_tracker;
1192 bool _changed;
1193 bool _yield;
1194 size_t _words_done;
1195 size_t _tot_live;
1196 size_t _tot_used;
1197 size_t _regions_done;
1198 double _start_vtime_sec;
1200 BitMap* _region_bm;
1201 BitMap* _card_bm;
1202 intptr_t _bottom_card_num;
1203 bool _final;
1205 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1206 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1207 #if CARD_BM_TEST_MODE
1208 guarantee(_card_bm->at(i - _bottom_card_num),
1209 "Should already be set.");
1210 #else
1211 _card_bm->par_at_put(i - _bottom_card_num, 1);
1212 #endif
1213 }
1214 }
1216 public:
1217 CalcLiveObjectsClosure(bool final,
1218 CMBitMapRO *bm, ConcurrentMark *cm,
1219 BitMap* region_bm, BitMap* card_bm,
1220 COTracker* co_tracker) :
1221 _bm(bm), _cm(cm), _changed(false), _yield(true),
1222 _words_done(0), _tot_live(0), _tot_used(0),
1223 _region_bm(region_bm), _card_bm(card_bm),
1224 _final(final), _co_tracker(co_tracker),
1225 _regions_done(0), _start_vtime_sec(0.0)
1226 {
1227 _bottom_card_num =
1228 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1229 CardTableModRefBS::card_shift);
1230 }
1232 // It takes a region that's not empty (i.e., it has at least one
1233 // live object in it and sets its corresponding bit on the region
1234 // bitmap to 1. If the region is "starts humongous" it will also set
1235 // to 1 the bits on the region bitmap that correspond to its
1236 // associated "continues humongous" regions.
1237 void set_bit_for_region(HeapRegion* hr) {
1238 assert(!hr->continuesHumongous(), "should have filtered those out");
1240 size_t index = hr->hrs_index();
1241 if (!hr->startsHumongous()) {
1242 // Normal (non-humongous) case: just set the bit.
1243 _region_bm->par_at_put((BitMap::idx_t) index, true);
1244 } else {
1245 // Starts humongous case: calculate how many regions are part of
1246 // this humongous region and then set the bit range. It might
1247 // have been a bit more efficient to look at the object that
1248 // spans these humongous regions to calculate their number from
1249 // the object's size. However, it's a good idea to calculate
1250 // this based on the metadata itself, and not the region
1251 // contents, so that this code is not aware of what goes into
1252 // the humongous regions (in case this changes in the future).
1253 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1254 size_t end_index = index + 1;
1255 while (end_index < g1h->n_regions()) {
1256 HeapRegion* chr = g1h->region_at(end_index);
1257 if (!chr->continuesHumongous()) {
1258 break;
1259 }
1260 end_index += 1;
1261 }
1262 _region_bm->par_at_put_range((BitMap::idx_t) index,
1263 (BitMap::idx_t) end_index, true);
1264 }
1265 }
1267 bool doHeapRegion(HeapRegion* hr) {
1268 if (_co_tracker != NULL)
1269 _co_tracker->update();
1271 if (!_final && _regions_done == 0)
1272 _start_vtime_sec = os::elapsedVTime();
1274 if (hr->continuesHumongous()) {
1275 // We will ignore these here and process them when their
1276 // associated "starts humongous" region is processed (see
1277 // set_bit_for_heap_region()). Note that we cannot rely on their
1278 // associated "starts humongous" region to have their bit set to
1279 // 1 since, due to the region chunking in the parallel region
1280 // iteration, a "continues humongous" region might be visited
1281 // before its associated "starts humongous".
1282 return false;
1283 }
1285 HeapWord* nextTop = hr->next_top_at_mark_start();
1286 HeapWord* start = hr->top_at_conc_mark_count();
1287 assert(hr->bottom() <= start && start <= hr->end() &&
1288 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1289 start <= nextTop,
1290 "Preconditions.");
1291 // Otherwise, record the number of word's we'll examine.
1292 size_t words_done = (nextTop - start);
1293 // Find the first marked object at or after "start".
1294 start = _bm->getNextMarkedWordAddress(start, nextTop);
1295 size_t marked_bytes = 0;
1297 // Below, the term "card num" means the result of shifting an address
1298 // by the card shift -- address 0 corresponds to card number 0. One
1299 // must subtract the card num of the bottom of the heap to obtain a
1300 // card table index.
1301 // The first card num of the sequence of live cards currently being
1302 // constructed. -1 ==> no sequence.
1303 intptr_t start_card_num = -1;
1304 // The last card num of the sequence of live cards currently being
1305 // constructed. -1 ==> no sequence.
1306 intptr_t last_card_num = -1;
1308 while (start < nextTop) {
1309 if (_yield && _cm->do_yield_check()) {
1310 // We yielded. It might be for a full collection, in which case
1311 // all bets are off; terminate the traversal.
1312 if (_cm->has_aborted()) {
1313 _changed = false;
1314 return true;
1315 } else {
1316 // Otherwise, it might be a collection pause, and the region
1317 // we're looking at might be in the collection set. We'll
1318 // abandon this region.
1319 return false;
1320 }
1321 }
1322 oop obj = oop(start);
1323 int obj_sz = obj->size();
1324 // The card num of the start of the current object.
1325 intptr_t obj_card_num =
1326 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1328 HeapWord* obj_last = start + obj_sz - 1;
1329 intptr_t obj_last_card_num =
1330 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1332 if (obj_card_num != last_card_num) {
1333 if (start_card_num == -1) {
1334 assert(last_card_num == -1, "Both or neither.");
1335 start_card_num = obj_card_num;
1336 } else {
1337 assert(last_card_num != -1, "Both or neither.");
1338 assert(obj_card_num >= last_card_num, "Inv");
1339 if ((obj_card_num - last_card_num) > 1) {
1340 // Mark the last run, and start a new one.
1341 mark_card_num_range(start_card_num, last_card_num);
1342 start_card_num = obj_card_num;
1343 }
1344 }
1345 #if CARD_BM_TEST_MODE
1346 /*
1347 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1348 obj_card_num - _bottom_card_num,
1349 obj_last_card_num - _bottom_card_num);
1350 */
1351 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1352 _card_bm->par_at_put(j - _bottom_card_num, 1);
1353 }
1354 #endif
1355 }
1356 // In any case, we set the last card num.
1357 last_card_num = obj_last_card_num;
1359 marked_bytes += obj_sz * HeapWordSize;
1360 // Find the next marked object after this one.
1361 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1362 _changed = true;
1363 }
1364 // Handle the last range, if any.
1365 if (start_card_num != -1)
1366 mark_card_num_range(start_card_num, last_card_num);
1367 if (_final) {
1368 // Mark the allocated-since-marking portion...
1369 HeapWord* tp = hr->top();
1370 if (nextTop < tp) {
1371 start_card_num =
1372 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1373 last_card_num =
1374 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1375 mark_card_num_range(start_card_num, last_card_num);
1376 // This definitely means the region has live objects.
1377 set_bit_for_region(hr);
1378 }
1379 }
1381 hr->add_to_marked_bytes(marked_bytes);
1382 // Update the live region bitmap.
1383 if (marked_bytes > 0) {
1384 set_bit_for_region(hr);
1385 }
1386 hr->set_top_at_conc_mark_count(nextTop);
1387 _tot_live += hr->next_live_bytes();
1388 _tot_used += hr->used();
1389 _words_done = words_done;
1391 if (!_final) {
1392 ++_regions_done;
1393 if (_regions_done % 10 == 0) {
1394 double end_vtime_sec = os::elapsedVTime();
1395 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1396 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1397 jlong sleep_time_ms =
1398 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1399 #if 0
1400 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1401 "overhead %1.4lf",
1402 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1403 _co_tracker->concOverhead(os::elapsedTime()));
1404 #endif
1405 os::sleep(Thread::current(), sleep_time_ms, false);
1406 _start_vtime_sec = end_vtime_sec;
1407 }
1408 }
1409 }
1411 return false;
1412 }
1414 bool changed() { return _changed; }
1415 void reset() { _changed = false; _words_done = 0; }
1416 void no_yield() { _yield = false; }
1417 size_t words_done() { return _words_done; }
1418 size_t tot_live() { return _tot_live; }
1419 size_t tot_used() { return _tot_used; }
1420 };
1423 void ConcurrentMark::calcDesiredRegions() {
1424 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1425 _cleanup_co_tracker.start();
1427 _region_bm.clear();
1428 _card_bm.clear();
1429 CalcLiveObjectsClosure calccl(false /*final*/,
1430 nextMarkBitMap(), this,
1431 &_region_bm, &_card_bm,
1432 &_cleanup_co_tracker);
1433 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1434 g1h->heap_region_iterate(&calccl);
1436 do {
1437 calccl.reset();
1438 g1h->heap_region_iterate(&calccl);
1439 } while (calccl.changed());
1441 _cleanup_co_tracker.update(true);
1442 }
1444 class G1ParFinalCountTask: public AbstractGangTask {
1445 protected:
1446 G1CollectedHeap* _g1h;
1447 CMBitMap* _bm;
1448 size_t _n_workers;
1449 size_t *_live_bytes;
1450 size_t *_used_bytes;
1451 BitMap* _region_bm;
1452 BitMap* _card_bm;
1453 public:
1454 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1455 BitMap* region_bm, BitMap* card_bm) :
1456 AbstractGangTask("G1 final counting"), _g1h(g1h),
1457 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1458 {
1459 if (ParallelGCThreads > 0)
1460 _n_workers = _g1h->workers()->total_workers();
1461 else
1462 _n_workers = 1;
1463 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1464 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1465 }
1467 ~G1ParFinalCountTask() {
1468 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1469 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1470 }
1472 void work(int i) {
1473 CalcLiveObjectsClosure calccl(true /*final*/,
1474 _bm, _g1h->concurrent_mark(),
1475 _region_bm, _card_bm,
1476 NULL /* CO tracker */);
1477 calccl.no_yield();
1478 if (ParallelGCThreads > 0) {
1479 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1480 HeapRegion::FinalCountClaimValue);
1481 } else {
1482 _g1h->heap_region_iterate(&calccl);
1483 }
1484 assert(calccl.complete(), "Shouldn't have yielded!");
1486 guarantee( (size_t)i < _n_workers, "invariant" );
1487 _live_bytes[i] = calccl.tot_live();
1488 _used_bytes[i] = calccl.tot_used();
1489 }
1490 size_t live_bytes() {
1491 size_t live_bytes = 0;
1492 for (size_t i = 0; i < _n_workers; ++i)
1493 live_bytes += _live_bytes[i];
1494 return live_bytes;
1495 }
1496 size_t used_bytes() {
1497 size_t used_bytes = 0;
1498 for (size_t i = 0; i < _n_workers; ++i)
1499 used_bytes += _used_bytes[i];
1500 return used_bytes;
1501 }
1502 };
1504 class G1ParNoteEndTask;
1506 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1507 G1CollectedHeap* _g1;
1508 int _worker_num;
1509 size_t _max_live_bytes;
1510 size_t _regions_claimed;
1511 size_t _freed_bytes;
1512 size_t _cleared_h_regions;
1513 size_t _freed_regions;
1514 UncleanRegionList* _unclean_region_list;
1515 double _claimed_region_time;
1516 double _max_region_time;
1518 public:
1519 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1520 UncleanRegionList* list,
1521 int worker_num);
1522 size_t freed_bytes() { return _freed_bytes; }
1523 size_t cleared_h_regions() { return _cleared_h_regions; }
1524 size_t freed_regions() { return _freed_regions; }
1525 UncleanRegionList* unclean_region_list() {
1526 return _unclean_region_list;
1527 }
1529 bool doHeapRegion(HeapRegion *r);
1531 size_t max_live_bytes() { return _max_live_bytes; }
1532 size_t regions_claimed() { return _regions_claimed; }
1533 double claimed_region_time_sec() { return _claimed_region_time; }
1534 double max_region_time_sec() { return _max_region_time; }
1535 };
1537 class G1ParNoteEndTask: public AbstractGangTask {
1538 friend class G1NoteEndOfConcMarkClosure;
1539 protected:
1540 G1CollectedHeap* _g1h;
1541 size_t _max_live_bytes;
1542 size_t _freed_bytes;
1543 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1544 public:
1545 G1ParNoteEndTask(G1CollectedHeap* g1h,
1546 ConcurrentMark::ParCleanupThreadState**
1547 par_cleanup_thread_state) :
1548 AbstractGangTask("G1 note end"), _g1h(g1h),
1549 _max_live_bytes(0), _freed_bytes(0),
1550 _par_cleanup_thread_state(par_cleanup_thread_state)
1551 {}
1553 void work(int i) {
1554 double start = os::elapsedTime();
1555 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1556 &_par_cleanup_thread_state[i]->list,
1557 i);
1558 if (ParallelGCThreads > 0) {
1559 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1560 HeapRegion::NoteEndClaimValue);
1561 } else {
1562 _g1h->heap_region_iterate(&g1_note_end);
1563 }
1564 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1566 // Now finish up freeing the current thread's regions.
1567 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1568 g1_note_end.cleared_h_regions(),
1569 0, NULL);
1570 {
1571 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1572 _max_live_bytes += g1_note_end.max_live_bytes();
1573 _freed_bytes += g1_note_end.freed_bytes();
1574 }
1575 double end = os::elapsedTime();
1576 if (G1PrintParCleanupStats) {
1577 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1578 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1579 i, start, end, (end-start)*1000.0,
1580 g1_note_end.regions_claimed(),
1581 g1_note_end.claimed_region_time_sec()*1000.0,
1582 g1_note_end.max_region_time_sec()*1000.0);
1583 }
1584 }
1585 size_t max_live_bytes() { return _max_live_bytes; }
1586 size_t freed_bytes() { return _freed_bytes; }
1587 };
1589 class G1ParScrubRemSetTask: public AbstractGangTask {
1590 protected:
1591 G1RemSet* _g1rs;
1592 BitMap* _region_bm;
1593 BitMap* _card_bm;
1594 public:
1595 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1596 BitMap* region_bm, BitMap* card_bm) :
1597 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1598 _region_bm(region_bm), _card_bm(card_bm)
1599 {}
1601 void work(int i) {
1602 if (ParallelGCThreads > 0) {
1603 _g1rs->scrub_par(_region_bm, _card_bm, i,
1604 HeapRegion::ScrubRemSetClaimValue);
1605 } else {
1606 _g1rs->scrub(_region_bm, _card_bm);
1607 }
1608 }
1610 };
1612 G1NoteEndOfConcMarkClosure::
1613 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1614 UncleanRegionList* list,
1615 int worker_num)
1616 : _g1(g1), _worker_num(worker_num),
1617 _max_live_bytes(0), _regions_claimed(0),
1618 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1619 _claimed_region_time(0.0), _max_region_time(0.0),
1620 _unclean_region_list(list)
1621 {}
1623 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1624 // We use a claim value of zero here because all regions
1625 // were claimed with value 1 in the FinalCount task.
1626 r->reset_gc_time_stamp();
1627 if (!r->continuesHumongous()) {
1628 double start = os::elapsedTime();
1629 _regions_claimed++;
1630 r->note_end_of_marking();
1631 _max_live_bytes += r->max_live_bytes();
1632 _g1->free_region_if_totally_empty_work(r,
1633 _freed_bytes,
1634 _cleared_h_regions,
1635 _freed_regions,
1636 _unclean_region_list,
1637 true /*par*/);
1638 double region_time = (os::elapsedTime() - start);
1639 _claimed_region_time += region_time;
1640 if (region_time > _max_region_time) _max_region_time = region_time;
1641 }
1642 return false;
1643 }
1645 void ConcurrentMark::cleanup() {
1646 // world is stopped at this checkpoint
1647 assert(SafepointSynchronize::is_at_safepoint(),
1648 "world should be stopped");
1649 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1651 // If a full collection has happened, we shouldn't do this.
1652 if (has_aborted()) {
1653 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1654 return;
1655 }
1657 if (VerifyDuringGC) {
1658 HandleMark hm; // handle scope
1659 gclog_or_tty->print(" VerifyDuringGC:(before)");
1660 Universe::heap()->prepare_for_verify();
1661 Universe::verify(/* allow dirty */ true,
1662 /* silent */ false,
1663 /* prev marking */ true);
1664 }
1666 _cleanup_co_tracker.disable();
1668 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1669 g1p->record_concurrent_mark_cleanup_start();
1671 double start = os::elapsedTime();
1672 GCOverheadReporter::recordSTWStart(start);
1674 // Do counting once more with the world stopped for good measure.
1675 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1676 &_region_bm, &_card_bm);
1677 if (ParallelGCThreads > 0) {
1678 assert(g1h->check_heap_region_claim_values(
1679 HeapRegion::InitialClaimValue),
1680 "sanity check");
1682 int n_workers = g1h->workers()->total_workers();
1683 g1h->set_par_threads(n_workers);
1684 g1h->workers()->run_task(&g1_par_count_task);
1685 g1h->set_par_threads(0);
1687 assert(g1h->check_heap_region_claim_values(
1688 HeapRegion::FinalCountClaimValue),
1689 "sanity check");
1690 } else {
1691 g1_par_count_task.work(0);
1692 }
1694 size_t known_garbage_bytes =
1695 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1696 #if 0
1697 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1698 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1699 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1700 (double) known_garbage_bytes / (double) (1024 * 1024));
1701 #endif // 0
1702 g1p->set_known_garbage_bytes(known_garbage_bytes);
1704 size_t start_used_bytes = g1h->used();
1705 _at_least_one_mark_complete = true;
1706 g1h->set_marking_complete();
1708 double count_end = os::elapsedTime();
1709 double this_final_counting_time = (count_end - start);
1710 if (G1PrintParCleanupStats) {
1711 gclog_or_tty->print_cr("Cleanup:");
1712 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1713 this_final_counting_time*1000.0);
1714 }
1715 _total_counting_time += this_final_counting_time;
1717 // Install newly created mark bitMap as "prev".
1718 swapMarkBitMaps();
1720 g1h->reset_gc_time_stamp();
1722 // Note end of marking in all heap regions.
1723 double note_end_start = os::elapsedTime();
1724 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1725 if (ParallelGCThreads > 0) {
1726 int n_workers = g1h->workers()->total_workers();
1727 g1h->set_par_threads(n_workers);
1728 g1h->workers()->run_task(&g1_par_note_end_task);
1729 g1h->set_par_threads(0);
1731 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1732 "sanity check");
1733 } else {
1734 g1_par_note_end_task.work(0);
1735 }
1736 g1h->set_unclean_regions_coming(true);
1737 double note_end_end = os::elapsedTime();
1738 // Tell the mutators that there might be unclean regions coming...
1739 if (G1PrintParCleanupStats) {
1740 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1741 (note_end_end - note_end_start)*1000.0);
1742 }
1745 // call below, since it affects the metric by which we sort the heap
1746 // regions.
1747 if (G1ScrubRemSets) {
1748 double rs_scrub_start = os::elapsedTime();
1749 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1750 if (ParallelGCThreads > 0) {
1751 int n_workers = g1h->workers()->total_workers();
1752 g1h->set_par_threads(n_workers);
1753 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1754 g1h->set_par_threads(0);
1756 assert(g1h->check_heap_region_claim_values(
1757 HeapRegion::ScrubRemSetClaimValue),
1758 "sanity check");
1759 } else {
1760 g1_par_scrub_rs_task.work(0);
1761 }
1763 double rs_scrub_end = os::elapsedTime();
1764 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1765 _total_rs_scrub_time += this_rs_scrub_time;
1766 }
1768 // this will also free any regions totally full of garbage objects,
1769 // and sort the regions.
1770 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1771 g1_par_note_end_task.freed_bytes(),
1772 g1_par_note_end_task.max_live_bytes());
1774 // Statistics.
1775 double end = os::elapsedTime();
1776 _cleanup_times.add((end - start) * 1000.0);
1777 GCOverheadReporter::recordSTWEnd(end);
1779 // G1CollectedHeap::heap()->print();
1780 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1781 // G1CollectedHeap::heap()->get_gc_time_stamp());
1783 if (PrintGC || PrintGCDetails) {
1784 g1h->print_size_transition(gclog_or_tty,
1785 start_used_bytes,
1786 g1h->used(),
1787 g1h->capacity());
1788 }
1790 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1791 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1793 // We need to make this be a "collection" so any collection pause that
1794 // races with it goes around and waits for completeCleanup to finish.
1795 g1h->increment_total_collections();
1797 if (VerifyDuringGC) {
1798 HandleMark hm; // handle scope
1799 gclog_or_tty->print(" VerifyDuringGC:(after)");
1800 Universe::heap()->prepare_for_verify();
1801 Universe::verify(/* allow dirty */ true,
1802 /* silent */ false,
1803 /* prev marking */ true);
1804 }
1805 }
1807 void ConcurrentMark::completeCleanup() {
1808 // A full collection intervened.
1809 if (has_aborted()) return;
1811 int first = 0;
1812 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1813 for (int t = 0; t < last; t++) {
1814 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1815 assert(list->well_formed(), "Inv");
1816 HeapRegion* hd = list->hd();
1817 while (hd != NULL) {
1818 // Now finish up the other stuff.
1819 hd->rem_set()->clear();
1820 HeapRegion* next_hd = hd->next_from_unclean_list();
1821 (void)list->pop();
1822 guarantee(list->hd() == next_hd, "how not?");
1823 _g1h->put_region_on_unclean_list(hd);
1824 if (!hd->isHumongous()) {
1825 // Add this to the _free_regions count by 1.
1826 _g1h->finish_free_region_work(0, 0, 1, NULL);
1827 }
1828 hd = list->hd();
1829 guarantee(hd == next_hd, "how not?");
1830 }
1831 }
1832 }
1835 class G1CMIsAliveClosure: public BoolObjectClosure {
1836 G1CollectedHeap* _g1;
1837 public:
1838 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1839 _g1(g1)
1840 {}
1842 void do_object(oop obj) {
1843 assert(false, "not to be invoked");
1844 }
1845 bool do_object_b(oop obj) {
1846 HeapWord* addr = (HeapWord*)obj;
1847 return addr != NULL &&
1848 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1849 }
1850 };
1852 class G1CMKeepAliveClosure: public OopClosure {
1853 G1CollectedHeap* _g1;
1854 ConcurrentMark* _cm;
1855 CMBitMap* _bitMap;
1856 public:
1857 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1858 CMBitMap* bitMap) :
1859 _g1(g1), _cm(cm),
1860 _bitMap(bitMap) {}
1862 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1863 virtual void do_oop( oop* p) { do_oop_work(p); }
1865 template <class T> void do_oop_work(T* p) {
1866 oop thisOop = oopDesc::load_decode_heap_oop(p);
1867 HeapWord* addr = (HeapWord*)thisOop;
1868 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1869 _bitMap->mark(addr);
1870 _cm->mark_stack_push(thisOop);
1871 }
1872 }
1873 };
1875 class G1CMDrainMarkingStackClosure: public VoidClosure {
1876 CMMarkStack* _markStack;
1877 CMBitMap* _bitMap;
1878 G1CMKeepAliveClosure* _oopClosure;
1879 public:
1880 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1881 G1CMKeepAliveClosure* oopClosure) :
1882 _bitMap(bitMap),
1883 _markStack(markStack),
1884 _oopClosure(oopClosure)
1885 {}
1887 void do_void() {
1888 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1889 }
1890 };
1892 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1893 ResourceMark rm;
1894 HandleMark hm;
1895 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1896 ReferenceProcessor* rp = g1h->ref_processor();
1898 // Process weak references.
1899 rp->setup_policy(clear_all_soft_refs);
1900 assert(_markStack.isEmpty(), "mark stack should be empty");
1902 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1903 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1904 G1CMDrainMarkingStackClosure
1905 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1906 &g1KeepAliveClosure);
1908 // XXXYYY Also: copy the parallel ref processing code from CMS.
1909 rp->process_discovered_references(&g1IsAliveClosure,
1910 &g1KeepAliveClosure,
1911 &g1DrainMarkingStackClosure,
1912 NULL);
1913 assert(_markStack.overflow() || _markStack.isEmpty(),
1914 "mark stack should be empty (unless it overflowed)");
1915 if (_markStack.overflow()) {
1916 set_has_overflown();
1917 }
1919 rp->enqueue_discovered_references();
1920 rp->verify_no_references_recorded();
1921 assert(!rp->discovery_enabled(), "should have been disabled");
1923 // Now clean up stale oops in SymbolTable and StringTable
1924 SymbolTable::unlink(&g1IsAliveClosure);
1925 StringTable::unlink(&g1IsAliveClosure);
1926 }
1928 void ConcurrentMark::swapMarkBitMaps() {
1929 CMBitMapRO* temp = _prevMarkBitMap;
1930 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1931 _nextMarkBitMap = (CMBitMap*) temp;
1932 }
1934 class CMRemarkTask: public AbstractGangTask {
1935 private:
1936 ConcurrentMark *_cm;
1938 public:
1939 void work(int worker_i) {
1940 // Since all available tasks are actually started, we should
1941 // only proceed if we're supposed to be actived.
1942 if ((size_t)worker_i < _cm->active_tasks()) {
1943 CMTask* task = _cm->task(worker_i);
1944 task->record_start_time();
1945 do {
1946 task->do_marking_step(1000000000.0 /* something very large */);
1947 } while (task->has_aborted() && !_cm->has_overflown());
1948 // If we overflow, then we do not want to restart. We instead
1949 // want to abort remark and do concurrent marking again.
1950 task->record_end_time();
1951 }
1952 }
1954 CMRemarkTask(ConcurrentMark* cm) :
1955 AbstractGangTask("Par Remark"), _cm(cm) { }
1956 };
1958 void ConcurrentMark::checkpointRootsFinalWork() {
1959 ResourceMark rm;
1960 HandleMark hm;
1961 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1963 g1h->ensure_parsability(false);
1965 if (ParallelGCThreads > 0) {
1966 g1h->change_strong_roots_parity();
1967 // this is remark, so we'll use up all available threads
1968 int active_workers = ParallelGCThreads;
1969 set_phase(active_workers, false);
1971 CMRemarkTask remarkTask(this);
1972 // We will start all available threads, even if we decide that the
1973 // active_workers will be fewer. The extra ones will just bail out
1974 // immediately.
1975 int n_workers = g1h->workers()->total_workers();
1976 g1h->set_par_threads(n_workers);
1977 g1h->workers()->run_task(&remarkTask);
1978 g1h->set_par_threads(0);
1980 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1981 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1982 } else {
1983 g1h->change_strong_roots_parity();
1984 // this is remark, so we'll use up all available threads
1985 int active_workers = 1;
1986 set_phase(active_workers, false);
1988 CMRemarkTask remarkTask(this);
1989 // We will start all available threads, even if we decide that the
1990 // active_workers will be fewer. The extra ones will just bail out
1991 // immediately.
1992 remarkTask.work(0);
1994 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1995 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1996 }
1998 print_stats();
2000 if (!restart_for_overflow())
2001 set_non_marking_state();
2003 #if VERIFY_OBJS_PROCESSED
2004 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2005 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2006 _scan_obj_cl.objs_processed,
2007 ThreadLocalObjQueue::objs_enqueued);
2008 guarantee(_scan_obj_cl.objs_processed ==
2009 ThreadLocalObjQueue::objs_enqueued,
2010 "Different number of objs processed and enqueued.");
2011 }
2012 #endif
2013 }
2015 class ReachablePrinterOopClosure: public OopClosure {
2016 private:
2017 G1CollectedHeap* _g1h;
2018 CMBitMapRO* _bitmap;
2019 outputStream* _out;
2021 public:
2022 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
2023 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
2025 void do_oop(narrowOop* p) { do_oop_work(p); }
2026 void do_oop( oop* p) { do_oop_work(p); }
2028 template <class T> void do_oop_work(T* p) {
2029 oop obj = oopDesc::load_decode_heap_oop(p);
2030 const char* str = NULL;
2031 const char* str2 = "";
2033 if (!_g1h->is_in_g1_reserved(obj))
2034 str = "outside G1 reserved";
2035 else {
2036 HeapRegion* hr = _g1h->heap_region_containing(obj);
2037 guarantee( hr != NULL, "invariant" );
2038 if (hr->obj_allocated_since_prev_marking(obj)) {
2039 str = "over TAMS";
2040 if (_bitmap->isMarked((HeapWord*) obj))
2041 str2 = " AND MARKED";
2042 } else if (_bitmap->isMarked((HeapWord*) obj))
2043 str = "marked";
2044 else
2045 str = "#### NOT MARKED ####";
2046 }
2048 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2049 p, (void*) obj, str, str2);
2050 }
2051 };
2053 class ReachablePrinterClosure: public BitMapClosure {
2054 private:
2055 CMBitMapRO* _bitmap;
2056 outputStream* _out;
2058 public:
2059 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2060 _bitmap(bitmap), _out(out) { }
2062 bool do_bit(size_t offset) {
2063 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2064 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2066 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2067 oop(addr)->oop_iterate(&oopCl);
2068 _out->print_cr("");
2070 return true;
2071 }
2072 };
2074 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2075 private:
2076 CMBitMapRO* _bitmap;
2077 outputStream* _out;
2079 public:
2080 void do_object(oop o) {
2081 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2083 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2084 o->oop_iterate(&oopCl);
2085 _out->print_cr("");
2086 }
2088 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2089 _bitmap(bitmap), _out(out) { }
2090 };
2092 class RegionReachablePrinterClosure : public HeapRegionClosure {
2093 private:
2094 CMBitMapRO* _bitmap;
2095 outputStream* _out;
2097 public:
2098 bool doHeapRegion(HeapRegion* hr) {
2099 HeapWord* b = hr->bottom();
2100 HeapWord* e = hr->end();
2101 HeapWord* t = hr->top();
2102 HeapWord* p = hr->prev_top_at_mark_start();
2103 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2104 "PTAMS: "PTR_FORMAT, b, e, t, p);
2105 _out->print_cr("");
2107 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2108 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2110 return false;
2111 }
2113 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2114 outputStream* out) :
2115 _bitmap(bitmap), _out(out) { }
2116 };
2118 void ConcurrentMark::print_prev_bitmap_reachable() {
2119 outputStream* out = gclog_or_tty;
2121 #if SEND_HEAP_DUMP_TO_FILE
2122 guarantee(heap_dump_file == NULL, "Protocol");
2123 char fn_buf[100];
2124 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2125 heap_dump_file = fopen(fn_buf, "w");
2126 fileStream fstream(heap_dump_file);
2127 out = &fstream;
2128 #endif // SEND_HEAP_DUMP_TO_FILE
2130 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2131 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2132 _g1h->heap_region_iterate(&rcl);
2133 out->print_cr("");
2135 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2136 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2137 _prevMarkBitMap->iterate(&cl);
2138 out->print_cr("");
2140 #if SEND_HEAP_DUMP_TO_FILE
2141 fclose(heap_dump_file);
2142 heap_dump_file = NULL;
2143 #endif // SEND_HEAP_DUMP_TO_FILE
2144 }
2146 // This note is for drainAllSATBBuffers and the code in between.
2147 // In the future we could reuse a task to do this work during an
2148 // evacuation pause (since now tasks are not active and can be claimed
2149 // during an evacuation pause). This was a late change to the code and
2150 // is currently not being taken advantage of.
2152 class CMGlobalObjectClosure : public ObjectClosure {
2153 private:
2154 ConcurrentMark* _cm;
2156 public:
2157 void do_object(oop obj) {
2158 _cm->deal_with_reference(obj);
2159 }
2161 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2162 };
2164 void ConcurrentMark::deal_with_reference(oop obj) {
2165 if (verbose_high())
2166 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2167 (void*) obj);
2170 HeapWord* objAddr = (HeapWord*) obj;
2171 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2172 if (_g1h->is_in_g1_reserved(objAddr)) {
2173 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2174 HeapRegion* hr = _g1h->heap_region_containing(obj);
2175 if (_g1h->is_obj_ill(obj, hr)) {
2176 if (verbose_high())
2177 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2178 "marked", (void*) obj);
2180 // we need to mark it first
2181 if (_nextMarkBitMap->parMark(objAddr)) {
2182 // No OrderAccess:store_load() is needed. It is implicit in the
2183 // CAS done in parMark(objAddr) above
2184 HeapWord* finger = _finger;
2185 if (objAddr < finger) {
2186 if (verbose_high())
2187 gclog_or_tty->print_cr("[global] below the global finger "
2188 "("PTR_FORMAT"), pushing it", finger);
2189 if (!mark_stack_push(obj)) {
2190 if (verbose_low())
2191 gclog_or_tty->print_cr("[global] global stack overflow during "
2192 "deal_with_reference");
2193 }
2194 }
2195 }
2196 }
2197 }
2198 }
2200 void ConcurrentMark::drainAllSATBBuffers() {
2201 CMGlobalObjectClosure oc(this);
2202 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2203 satb_mq_set.set_closure(&oc);
2205 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2206 if (verbose_medium())
2207 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2208 }
2210 // no need to check whether we should do this, as this is only
2211 // called during an evacuation pause
2212 satb_mq_set.iterate_closure_all_threads();
2214 satb_mq_set.set_closure(NULL);
2215 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2216 }
2218 void ConcurrentMark::markPrev(oop p) {
2219 // Note we are overriding the read-only view of the prev map here, via
2220 // the cast.
2221 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2222 }
2224 void ConcurrentMark::clear(oop p) {
2225 assert(p != NULL && p->is_oop(), "expected an oop");
2226 HeapWord* addr = (HeapWord*)p;
2227 assert(addr >= _nextMarkBitMap->startWord() ||
2228 addr < _nextMarkBitMap->endWord(), "in a region");
2230 _nextMarkBitMap->clear(addr);
2231 }
2233 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2234 // Note we are overriding the read-only view of the prev map here, via
2235 // the cast.
2236 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2237 _nextMarkBitMap->clearRange(mr);
2238 }
2240 HeapRegion*
2241 ConcurrentMark::claim_region(int task_num) {
2242 // "checkpoint" the finger
2243 HeapWord* finger = _finger;
2245 // _heap_end will not change underneath our feet; it only changes at
2246 // yield points.
2247 while (finger < _heap_end) {
2248 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2250 // is the gap between reading the finger and doing the CAS too long?
2252 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2253 HeapWord* bottom = curr_region->bottom();
2254 HeapWord* end = curr_region->end();
2255 HeapWord* limit = curr_region->next_top_at_mark_start();
2257 if (verbose_low())
2258 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2259 "["PTR_FORMAT", "PTR_FORMAT"), "
2260 "limit = "PTR_FORMAT,
2261 task_num, curr_region, bottom, end, limit);
2263 HeapWord* res =
2264 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2265 if (res == finger) {
2266 // we succeeded
2268 // notice that _finger == end cannot be guaranteed here since,
2269 // someone else might have moved the finger even further
2270 guarantee( _finger >= end, "the finger should have moved forward" );
2272 if (verbose_low())
2273 gclog_or_tty->print_cr("[%d] we were successful with region = "
2274 PTR_FORMAT, task_num, curr_region);
2276 if (limit > bottom) {
2277 if (verbose_low())
2278 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2279 "returning it ", task_num, curr_region);
2280 return curr_region;
2281 } else {
2282 tmp_guarantee_CM( limit == bottom,
2283 "the region limit should be at bottom" );
2284 if (verbose_low())
2285 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2286 "returning NULL", task_num, curr_region);
2287 // we return NULL and the caller should try calling
2288 // claim_region() again.
2289 return NULL;
2290 }
2291 } else {
2292 guarantee( _finger > finger, "the finger should have moved forward" );
2293 if (verbose_low())
2294 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2295 "global finger = "PTR_FORMAT", "
2296 "our finger = "PTR_FORMAT,
2297 task_num, _finger, finger);
2299 // read it again
2300 finger = _finger;
2301 }
2302 }
2304 return NULL;
2305 }
2307 void ConcurrentMark::oops_do(OopClosure* cl) {
2308 if (_markStack.size() > 0 && verbose_low())
2309 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2310 "size = %d", _markStack.size());
2311 // we first iterate over the contents of the mark stack...
2312 _markStack.oops_do(cl);
2314 for (int i = 0; i < (int)_max_task_num; ++i) {
2315 OopTaskQueue* queue = _task_queues->queue((int)i);
2317 if (queue->size() > 0 && verbose_low())
2318 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2319 "size = %d", i, queue->size());
2321 // ...then over the contents of the all the task queues.
2322 queue->oops_do(cl);
2323 }
2325 // finally, invalidate any entries that in the region stack that
2326 // point into the collection set
2327 if (_regionStack.invalidate_entries_into_cset()) {
2328 // otherwise, any gray objects copied during the evacuation pause
2329 // might not be visited.
2330 guarantee( _should_gray_objects, "invariant" );
2331 }
2332 }
2334 void ConcurrentMark::clear_marking_state() {
2335 _markStack.setEmpty();
2336 _markStack.clear_overflow();
2337 _regionStack.setEmpty();
2338 _regionStack.clear_overflow();
2339 clear_has_overflown();
2340 _finger = _heap_start;
2342 for (int i = 0; i < (int)_max_task_num; ++i) {
2343 OopTaskQueue* queue = _task_queues->queue(i);
2344 queue->set_empty();
2345 }
2346 }
2348 void ConcurrentMark::print_stats() {
2349 if (verbose_stats()) {
2350 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2351 for (size_t i = 0; i < _active_tasks; ++i) {
2352 _tasks[i]->print_stats();
2353 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2354 }
2355 }
2356 }
2358 class CSMarkOopClosure: public OopClosure {
2359 friend class CSMarkBitMapClosure;
2361 G1CollectedHeap* _g1h;
2362 CMBitMap* _bm;
2363 ConcurrentMark* _cm;
2364 oop* _ms;
2365 jint* _array_ind_stack;
2366 int _ms_size;
2367 int _ms_ind;
2368 int _array_increment;
2370 bool push(oop obj, int arr_ind = 0) {
2371 if (_ms_ind == _ms_size) {
2372 gclog_or_tty->print_cr("Mark stack is full.");
2373 return false;
2374 }
2375 _ms[_ms_ind] = obj;
2376 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2377 _ms_ind++;
2378 return true;
2379 }
2381 oop pop() {
2382 if (_ms_ind == 0) return NULL;
2383 else {
2384 _ms_ind--;
2385 return _ms[_ms_ind];
2386 }
2387 }
2389 template <class T> bool drain() {
2390 while (_ms_ind > 0) {
2391 oop obj = pop();
2392 assert(obj != NULL, "Since index was non-zero.");
2393 if (obj->is_objArray()) {
2394 jint arr_ind = _array_ind_stack[_ms_ind];
2395 objArrayOop aobj = objArrayOop(obj);
2396 jint len = aobj->length();
2397 jint next_arr_ind = arr_ind + _array_increment;
2398 if (next_arr_ind < len) {
2399 push(obj, next_arr_ind);
2400 }
2401 // Now process this portion of this one.
2402 int lim = MIN2(next_arr_ind, len);
2403 for (int j = arr_ind; j < lim; j++) {
2404 do_oop(aobj->obj_at_addr<T>(j));
2405 }
2407 } else {
2408 obj->oop_iterate(this);
2409 }
2410 if (abort()) return false;
2411 }
2412 return true;
2413 }
2415 public:
2416 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2417 _g1h(G1CollectedHeap::heap()),
2418 _cm(cm),
2419 _bm(cm->nextMarkBitMap()),
2420 _ms_size(ms_size), _ms_ind(0),
2421 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2422 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2423 _array_increment(MAX2(ms_size/8, 16))
2424 {}
2426 ~CSMarkOopClosure() {
2427 FREE_C_HEAP_ARRAY(oop, _ms);
2428 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2429 }
2431 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2432 virtual void do_oop( oop* p) { do_oop_work(p); }
2434 template <class T> void do_oop_work(T* p) {
2435 T heap_oop = oopDesc::load_heap_oop(p);
2436 if (oopDesc::is_null(heap_oop)) return;
2437 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2438 if (obj->is_forwarded()) {
2439 // If the object has already been forwarded, we have to make sure
2440 // that it's marked. So follow the forwarding pointer. Note that
2441 // this does the right thing for self-forwarding pointers in the
2442 // evacuation failure case.
2443 obj = obj->forwardee();
2444 }
2445 HeapRegion* hr = _g1h->heap_region_containing(obj);
2446 if (hr != NULL) {
2447 if (hr->in_collection_set()) {
2448 if (_g1h->is_obj_ill(obj)) {
2449 _bm->mark((HeapWord*)obj);
2450 if (!push(obj)) {
2451 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2452 set_abort();
2453 }
2454 }
2455 } else {
2456 // Outside the collection set; we need to gray it
2457 _cm->deal_with_reference(obj);
2458 }
2459 }
2460 }
2461 };
2463 class CSMarkBitMapClosure: public BitMapClosure {
2464 G1CollectedHeap* _g1h;
2465 CMBitMap* _bitMap;
2466 ConcurrentMark* _cm;
2467 CSMarkOopClosure _oop_cl;
2468 public:
2469 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2470 _g1h(G1CollectedHeap::heap()),
2471 _bitMap(cm->nextMarkBitMap()),
2472 _oop_cl(cm, ms_size)
2473 {}
2475 ~CSMarkBitMapClosure() {}
2477 bool do_bit(size_t offset) {
2478 // convert offset into a HeapWord*
2479 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2480 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2481 "address out of range");
2482 assert(_bitMap->isMarked(addr), "tautology");
2483 oop obj = oop(addr);
2484 if (!obj->is_forwarded()) {
2485 if (!_oop_cl.push(obj)) return false;
2486 if (UseCompressedOops) {
2487 if (!_oop_cl.drain<narrowOop>()) return false;
2488 } else {
2489 if (!_oop_cl.drain<oop>()) return false;
2490 }
2491 }
2492 // Otherwise...
2493 return true;
2494 }
2495 };
2498 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2499 CMBitMap* _bm;
2500 CSMarkBitMapClosure _bit_cl;
2501 enum SomePrivateConstants {
2502 MSSize = 1000
2503 };
2504 bool _completed;
2505 public:
2506 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2507 _bm(cm->nextMarkBitMap()),
2508 _bit_cl(cm, MSSize),
2509 _completed(true)
2510 {}
2512 ~CompleteMarkingInCSHRClosure() {}
2514 bool doHeapRegion(HeapRegion* r) {
2515 if (!r->evacuation_failed()) {
2516 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2517 if (!mr.is_empty()) {
2518 if (!_bm->iterate(&_bit_cl, mr)) {
2519 _completed = false;
2520 return true;
2521 }
2522 }
2523 }
2524 return false;
2525 }
2527 bool completed() { return _completed; }
2528 };
2530 class ClearMarksInHRClosure: public HeapRegionClosure {
2531 CMBitMap* _bm;
2532 public:
2533 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2535 bool doHeapRegion(HeapRegion* r) {
2536 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2537 MemRegion usedMR = r->used_region();
2538 _bm->clearRange(r->used_region());
2539 }
2540 return false;
2541 }
2542 };
2544 void ConcurrentMark::complete_marking_in_collection_set() {
2545 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2547 if (!g1h->mark_in_progress()) {
2548 g1h->g1_policy()->record_mark_closure_time(0.0);
2549 return;
2550 }
2552 int i = 1;
2553 double start = os::elapsedTime();
2554 while (true) {
2555 i++;
2556 CompleteMarkingInCSHRClosure cmplt(this);
2557 g1h->collection_set_iterate(&cmplt);
2558 if (cmplt.completed()) break;
2559 }
2560 double end_time = os::elapsedTime();
2561 double elapsed_time_ms = (end_time - start) * 1000.0;
2562 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2563 if (PrintGCDetails) {
2564 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2565 }
2567 ClearMarksInHRClosure clr(nextMarkBitMap());
2568 g1h->collection_set_iterate(&clr);
2569 }
2571 // The next two methods deal with the following optimisation. Some
2572 // objects are gray by being marked and located above the finger. If
2573 // they are copied, during an evacuation pause, below the finger then
2574 // the need to be pushed on the stack. The observation is that, if
2575 // there are no regions in the collection set located above the
2576 // finger, then the above cannot happen, hence we do not need to
2577 // explicitly gray any objects when copying them to below the
2578 // finger. The global stack will be scanned to ensure that, if it
2579 // points to objects being copied, it will update their
2580 // location. There is a tricky situation with the gray objects in
2581 // region stack that are being coped, however. See the comment in
2582 // newCSet().
2584 void ConcurrentMark::newCSet() {
2585 if (!concurrent_marking_in_progress())
2586 // nothing to do if marking is not in progress
2587 return;
2589 // find what the lowest finger is among the global and local fingers
2590 _min_finger = _finger;
2591 for (int i = 0; i < (int)_max_task_num; ++i) {
2592 CMTask* task = _tasks[i];
2593 HeapWord* task_finger = task->finger();
2594 if (task_finger != NULL && task_finger < _min_finger)
2595 _min_finger = task_finger;
2596 }
2598 _should_gray_objects = false;
2600 // This fixes a very subtle and fustrating bug. It might be the case
2601 // that, during en evacuation pause, heap regions that contain
2602 // objects that are gray (by being in regions contained in the
2603 // region stack) are included in the collection set. Since such gray
2604 // objects will be moved, and because it's not easy to redirect
2605 // region stack entries to point to a new location (because objects
2606 // in one region might be scattered to multiple regions after they
2607 // are copied), one option is to ensure that all marked objects
2608 // copied during a pause are pushed on the stack. Notice, however,
2609 // that this problem can only happen when the region stack is not
2610 // empty during an evacuation pause. So, we make the fix a bit less
2611 // conservative and ensure that regions are pushed on the stack,
2612 // irrespective whether all collection set regions are below the
2613 // finger, if the region stack is not empty. This is expected to be
2614 // a rare case, so I don't think it's necessary to be smarted about it.
2615 if (!region_stack_empty())
2616 _should_gray_objects = true;
2617 }
2619 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2620 if (!concurrent_marking_in_progress())
2621 return;
2623 HeapWord* region_end = hr->end();
2624 if (region_end > _min_finger)
2625 _should_gray_objects = true;
2626 }
2628 void ConcurrentMark::disable_co_trackers() {
2629 if (has_aborted()) {
2630 if (_cleanup_co_tracker.enabled())
2631 _cleanup_co_tracker.disable();
2632 for (int i = 0; i < (int)_max_task_num; ++i) {
2633 CMTask* task = _tasks[i];
2634 if (task->co_tracker_enabled())
2635 task->disable_co_tracker();
2636 }
2637 } else {
2638 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2639 for (int i = 0; i < (int)_max_task_num; ++i) {
2640 CMTask* task = _tasks[i];
2641 guarantee( !task->co_tracker_enabled(), "invariant" );
2642 }
2643 }
2644 }
2646 // abandon current marking iteration due to a Full GC
2647 void ConcurrentMark::abort() {
2648 // Clear all marks to force marking thread to do nothing
2649 _nextMarkBitMap->clearAll();
2650 // Empty mark stack
2651 clear_marking_state();
2652 for (int i = 0; i < (int)_max_task_num; ++i)
2653 _tasks[i]->clear_region_fields();
2654 _has_aborted = true;
2656 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2657 satb_mq_set.abandon_partial_marking();
2658 satb_mq_set.set_active_all_threads(false);
2659 }
2661 static void print_ms_time_info(const char* prefix, const char* name,
2662 NumberSeq& ns) {
2663 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2664 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2665 if (ns.num() > 0) {
2666 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2667 prefix, ns.sd(), ns.maximum());
2668 }
2669 }
2671 void ConcurrentMark::print_summary_info() {
2672 gclog_or_tty->print_cr(" Concurrent marking:");
2673 print_ms_time_info(" ", "init marks", _init_times);
2674 print_ms_time_info(" ", "remarks", _remark_times);
2675 {
2676 print_ms_time_info(" ", "final marks", _remark_mark_times);
2677 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2679 }
2680 print_ms_time_info(" ", "cleanups", _cleanup_times);
2681 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2682 _total_counting_time,
2683 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2684 (double)_cleanup_times.num()
2685 : 0.0));
2686 if (G1ScrubRemSets) {
2687 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2688 _total_rs_scrub_time,
2689 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2690 (double)_cleanup_times.num()
2691 : 0.0));
2692 }
2693 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2694 (_init_times.sum() + _remark_times.sum() +
2695 _cleanup_times.sum())/1000.0);
2696 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2697 "(%8.2f s marking, %8.2f s counting).",
2698 cmThread()->vtime_accum(),
2699 cmThread()->vtime_mark_accum(),
2700 cmThread()->vtime_count_accum());
2701 }
2703 // Closures
2704 // XXX: there seems to be a lot of code duplication here;
2705 // should refactor and consolidate the shared code.
2707 // This closure is used to mark refs into the CMS generation in
2708 // the CMS bit map. Called at the first checkpoint.
2710 // We take a break if someone is trying to stop the world.
2711 bool ConcurrentMark::do_yield_check(int worker_i) {
2712 if (should_yield()) {
2713 if (worker_i == 0)
2714 _g1h->g1_policy()->record_concurrent_pause();
2715 cmThread()->yield();
2716 if (worker_i == 0)
2717 _g1h->g1_policy()->record_concurrent_pause_end();
2718 return true;
2719 } else {
2720 return false;
2721 }
2722 }
2724 bool ConcurrentMark::should_yield() {
2725 return cmThread()->should_yield();
2726 }
2728 bool ConcurrentMark::containing_card_is_marked(void* p) {
2729 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2730 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2731 }
2733 bool ConcurrentMark::containing_cards_are_marked(void* start,
2734 void* last) {
2735 return
2736 containing_card_is_marked(start) &&
2737 containing_card_is_marked(last);
2738 }
2740 #ifndef PRODUCT
2741 // for debugging purposes
2742 void ConcurrentMark::print_finger() {
2743 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2744 _heap_start, _heap_end, _finger);
2745 for (int i = 0; i < (int) _max_task_num; ++i) {
2746 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2747 }
2748 gclog_or_tty->print_cr("");
2749 }
2750 #endif
2752 // Closure for iteration over bitmaps
2753 class CMBitMapClosure : public BitMapClosure {
2754 private:
2755 // the bitmap that is being iterated over
2756 CMBitMap* _nextMarkBitMap;
2757 ConcurrentMark* _cm;
2758 CMTask* _task;
2759 // true if we're scanning a heap region claimed by the task (so that
2760 // we move the finger along), false if we're not, i.e. currently when
2761 // scanning a heap region popped from the region stack (so that we
2762 // do not move the task finger along; it'd be a mistake if we did so).
2763 bool _scanning_heap_region;
2765 public:
2766 CMBitMapClosure(CMTask *task,
2767 ConcurrentMark* cm,
2768 CMBitMap* nextMarkBitMap)
2769 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2771 void set_scanning_heap_region(bool scanning_heap_region) {
2772 _scanning_heap_region = scanning_heap_region;
2773 }
2775 bool do_bit(size_t offset) {
2776 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2777 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2778 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2780 if (_scanning_heap_region) {
2781 statsOnly( _task->increase_objs_found_on_bitmap() );
2782 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2783 // We move that task's local finger along.
2784 _task->move_finger_to(addr);
2785 } else {
2786 // We move the task's region finger along.
2787 _task->move_region_finger_to(addr);
2788 }
2790 _task->scan_object(oop(addr));
2791 // we only partially drain the local queue and global stack
2792 _task->drain_local_queue(true);
2793 _task->drain_global_stack(true);
2795 // if the has_aborted flag has been raised, we need to bail out of
2796 // the iteration
2797 return !_task->has_aborted();
2798 }
2799 };
2801 // Closure for iterating over objects, currently only used for
2802 // processing SATB buffers.
2803 class CMObjectClosure : public ObjectClosure {
2804 private:
2805 CMTask* _task;
2807 public:
2808 void do_object(oop obj) {
2809 _task->deal_with_reference(obj);
2810 }
2812 CMObjectClosure(CMTask* task) : _task(task) { }
2813 };
2815 // Closure for iterating over object fields
2816 class CMOopClosure : public OopClosure {
2817 private:
2818 G1CollectedHeap* _g1h;
2819 ConcurrentMark* _cm;
2820 CMTask* _task;
2822 public:
2823 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2824 virtual void do_oop( oop* p) { do_oop_work(p); }
2826 template <class T> void do_oop_work(T* p) {
2827 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2828 tmp_guarantee_CM( !_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(), "invariant" );
2830 oop obj = oopDesc::load_decode_heap_oop(p);
2831 if (_cm->verbose_high())
2832 gclog_or_tty->print_cr("[%d] we're looking at location "
2833 "*"PTR_FORMAT" = "PTR_FORMAT,
2834 _task->task_id(), p, (void*) obj);
2835 _task->deal_with_reference(obj);
2836 }
2838 CMOopClosure(G1CollectedHeap* g1h,
2839 ConcurrentMark* cm,
2840 CMTask* task)
2841 : _g1h(g1h), _cm(cm), _task(task) { }
2842 };
2844 void CMTask::setup_for_region(HeapRegion* hr) {
2845 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2846 "claim_region() should have filtered out continues humongous regions" );
2848 if (_cm->verbose_low())
2849 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2850 _task_id, hr);
2852 _curr_region = hr;
2853 _finger = hr->bottom();
2854 update_region_limit();
2855 }
2857 void CMTask::update_region_limit() {
2858 HeapRegion* hr = _curr_region;
2859 HeapWord* bottom = hr->bottom();
2860 HeapWord* limit = hr->next_top_at_mark_start();
2862 if (limit == bottom) {
2863 if (_cm->verbose_low())
2864 gclog_or_tty->print_cr("[%d] found an empty region "
2865 "["PTR_FORMAT", "PTR_FORMAT")",
2866 _task_id, bottom, limit);
2867 // The region was collected underneath our feet.
2868 // We set the finger to bottom to ensure that the bitmap
2869 // iteration that will follow this will not do anything.
2870 // (this is not a condition that holds when we set the region up,
2871 // as the region is not supposed to be empty in the first place)
2872 _finger = bottom;
2873 } else if (limit >= _region_limit) {
2874 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2875 } else {
2876 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2877 // This can happen under some pretty unusual circumstances. An
2878 // evacuation pause empties the region underneath our feet (NTAMS
2879 // at bottom). We then do some allocation in the region (NTAMS
2880 // stays at bottom), followed by the region being used as a GC
2881 // alloc region (NTAMS will move to top() and the objects
2882 // originally below it will be grayed). All objects now marked in
2883 // the region are explicitly grayed, if below the global finger,
2884 // and we do not need in fact to scan anything else. So, we simply
2885 // set _finger to be limit to ensure that the bitmap iteration
2886 // doesn't do anything.
2887 _finger = limit;
2888 }
2890 _region_limit = limit;
2891 }
2893 void CMTask::giveup_current_region() {
2894 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2895 if (_cm->verbose_low())
2896 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2897 _task_id, _curr_region);
2898 clear_region_fields();
2899 }
2901 void CMTask::clear_region_fields() {
2902 // Values for these three fields that indicate that we're not
2903 // holding on to a region.
2904 _curr_region = NULL;
2905 _finger = NULL;
2906 _region_limit = NULL;
2908 _region_finger = NULL;
2909 }
2911 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2912 guarantee( nextMarkBitMap != NULL, "invariant" );
2914 if (_cm->verbose_low())
2915 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2917 _nextMarkBitMap = nextMarkBitMap;
2918 clear_region_fields();
2920 _calls = 0;
2921 _elapsed_time_ms = 0.0;
2922 _termination_time_ms = 0.0;
2923 _termination_start_time_ms = 0.0;
2925 #if _MARKING_STATS_
2926 _local_pushes = 0;
2927 _local_pops = 0;
2928 _local_max_size = 0;
2929 _objs_scanned = 0;
2930 _global_pushes = 0;
2931 _global_pops = 0;
2932 _global_max_size = 0;
2933 _global_transfers_to = 0;
2934 _global_transfers_from = 0;
2935 _region_stack_pops = 0;
2936 _regions_claimed = 0;
2937 _objs_found_on_bitmap = 0;
2938 _satb_buffers_processed = 0;
2939 _steal_attempts = 0;
2940 _steals = 0;
2941 _aborted = 0;
2942 _aborted_overflow = 0;
2943 _aborted_cm_aborted = 0;
2944 _aborted_yield = 0;
2945 _aborted_timed_out = 0;
2946 _aborted_satb = 0;
2947 _aborted_termination = 0;
2948 #endif // _MARKING_STATS_
2949 }
2951 bool CMTask::should_exit_termination() {
2952 regular_clock_call();
2953 // This is called when we are in the termination protocol. We should
2954 // quit if, for some reason, this task wants to abort or the global
2955 // stack is not empty (this means that we can get work from it).
2956 return !_cm->mark_stack_empty() || has_aborted();
2957 }
2959 // This determines whether the method below will check both the local
2960 // and global fingers when determining whether to push on the stack a
2961 // gray object (value 1) or whether it will only check the global one
2962 // (value 0). The tradeoffs are that the former will be a bit more
2963 // accurate and possibly push less on the stack, but it might also be
2964 // a little bit slower.
2966 #define _CHECK_BOTH_FINGERS_ 1
2968 void CMTask::deal_with_reference(oop obj) {
2969 if (_cm->verbose_high())
2970 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2971 _task_id, (void*) obj);
2973 ++_refs_reached;
2975 HeapWord* objAddr = (HeapWord*) obj;
2976 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2977 if (_g1h->is_in_g1_reserved(objAddr)) {
2978 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2979 HeapRegion* hr = _g1h->heap_region_containing(obj);
2980 if (_g1h->is_obj_ill(obj, hr)) {
2981 if (_cm->verbose_high())
2982 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2983 _task_id, (void*) obj);
2985 // we need to mark it first
2986 if (_nextMarkBitMap->parMark(objAddr)) {
2987 // No OrderAccess:store_load() is needed. It is implicit in the
2988 // CAS done in parMark(objAddr) above
2989 HeapWord* global_finger = _cm->finger();
2991 #if _CHECK_BOTH_FINGERS_
2992 // we will check both the local and global fingers
2994 if (_finger != NULL && objAddr < _finger) {
2995 if (_cm->verbose_high())
2996 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2997 "pushing it", _task_id, _finger);
2998 push(obj);
2999 } else if (_curr_region != NULL && objAddr < _region_limit) {
3000 // do nothing
3001 } else if (objAddr < global_finger) {
3002 // Notice that the global finger might be moving forward
3003 // concurrently. This is not a problem. In the worst case, we
3004 // mark the object while it is above the global finger and, by
3005 // the time we read the global finger, it has moved forward
3006 // passed this object. In this case, the object will probably
3007 // be visited when a task is scanning the region and will also
3008 // be pushed on the stack. So, some duplicate work, but no
3009 // correctness problems.
3011 if (_cm->verbose_high())
3012 gclog_or_tty->print_cr("[%d] below the global finger "
3013 "("PTR_FORMAT"), pushing it",
3014 _task_id, global_finger);
3015 push(obj);
3016 } else {
3017 // do nothing
3018 }
3019 #else // _CHECK_BOTH_FINGERS_
3020 // we will only check the global finger
3022 if (objAddr < global_finger) {
3023 // see long comment above
3025 if (_cm->verbose_high())
3026 gclog_or_tty->print_cr("[%d] below the global finger "
3027 "("PTR_FORMAT"), pushing it",
3028 _task_id, global_finger);
3029 push(obj);
3030 }
3031 #endif // _CHECK_BOTH_FINGERS_
3032 }
3033 }
3034 }
3035 }
3037 void CMTask::push(oop obj) {
3038 HeapWord* objAddr = (HeapWord*) obj;
3039 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
3040 tmp_guarantee_CM( !_g1h->heap_region_containing(objAddr)->is_on_free_list(), "invariant" );
3041 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
3042 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
3044 if (_cm->verbose_high())
3045 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3047 if (!_task_queue->push(obj)) {
3048 // The local task queue looks full. We need to push some entries
3049 // to the global stack.
3051 if (_cm->verbose_medium())
3052 gclog_or_tty->print_cr("[%d] task queue overflow, "
3053 "moving entries to the global stack",
3054 _task_id);
3055 move_entries_to_global_stack();
3057 // this should succeed since, even if we overflow the global
3058 // stack, we should have definitely removed some entries from the
3059 // local queue. So, there must be space on it.
3060 bool success = _task_queue->push(obj);
3061 tmp_guarantee_CM( success, "invariant" );
3062 }
3064 statsOnly( int tmp_size = _task_queue->size();
3065 if (tmp_size > _local_max_size)
3066 _local_max_size = tmp_size;
3067 ++_local_pushes );
3068 }
3070 void CMTask::reached_limit() {
3071 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3072 _refs_reached >= _refs_reached_limit ,
3073 "shouldn't have been called otherwise" );
3074 regular_clock_call();
3075 }
3077 void CMTask::regular_clock_call() {
3078 if (has_aborted())
3079 return;
3081 // First, we need to recalculate the words scanned and refs reached
3082 // limits for the next clock call.
3083 recalculate_limits();
3085 // During the regular clock call we do the following
3087 // (1) If an overflow has been flagged, then we abort.
3088 if (_cm->has_overflown()) {
3089 set_has_aborted();
3090 return;
3091 }
3093 // If we are not concurrent (i.e. we're doing remark) we don't need
3094 // to check anything else. The other steps are only needed during
3095 // the concurrent marking phase.
3096 if (!concurrent())
3097 return;
3099 // (2) If marking has been aborted for Full GC, then we also abort.
3100 if (_cm->has_aborted()) {
3101 set_has_aborted();
3102 statsOnly( ++_aborted_cm_aborted );
3103 return;
3104 }
3106 double curr_time_ms = os::elapsedVTime() * 1000.0;
3108 // (3) If marking stats are enabled, then we update the step history.
3109 #if _MARKING_STATS_
3110 if (_words_scanned >= _words_scanned_limit)
3111 ++_clock_due_to_scanning;
3112 if (_refs_reached >= _refs_reached_limit)
3113 ++_clock_due_to_marking;
3115 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3116 _interval_start_time_ms = curr_time_ms;
3117 _all_clock_intervals_ms.add(last_interval_ms);
3119 if (_cm->verbose_medium()) {
3120 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3121 "scanned = %d%s, refs reached = %d%s",
3122 _task_id, last_interval_ms,
3123 _words_scanned,
3124 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3125 _refs_reached,
3126 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3127 }
3128 #endif // _MARKING_STATS_
3130 // (4) We check whether we should yield. If we have to, then we abort.
3131 if (_cm->should_yield()) {
3132 // We should yield. To do this we abort the task. The caller is
3133 // responsible for yielding.
3134 set_has_aborted();
3135 statsOnly( ++_aborted_yield );
3136 return;
3137 }
3139 // (5) We check whether we've reached our time quota. If we have,
3140 // then we abort.
3141 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3142 if (elapsed_time_ms > _time_target_ms) {
3143 set_has_aborted();
3144 _has_aborted_timed_out = true;
3145 statsOnly( ++_aborted_timed_out );
3146 return;
3147 }
3149 // (6) Finally, we check whether there are enough completed STAB
3150 // buffers available for processing. If there are, we abort.
3151 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3152 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3153 if (_cm->verbose_low())
3154 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3155 _task_id);
3156 // we do need to process SATB buffers, we'll abort and restart
3157 // the marking task to do so
3158 set_has_aborted();
3159 statsOnly( ++_aborted_satb );
3160 return;
3161 }
3162 }
3164 void CMTask::recalculate_limits() {
3165 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3166 _words_scanned_limit = _real_words_scanned_limit;
3168 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3169 _refs_reached_limit = _real_refs_reached_limit;
3170 }
3172 void CMTask::decrease_limits() {
3173 // This is called when we believe that we're going to do an infrequent
3174 // operation which will increase the per byte scanned cost (i.e. move
3175 // entries to/from the global stack). It basically tries to decrease the
3176 // scanning limit so that the clock is called earlier.
3178 if (_cm->verbose_medium())
3179 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3181 _words_scanned_limit = _real_words_scanned_limit -
3182 3 * words_scanned_period / 4;
3183 _refs_reached_limit = _real_refs_reached_limit -
3184 3 * refs_reached_period / 4;
3185 }
3187 void CMTask::move_entries_to_global_stack() {
3188 // local array where we'll store the entries that will be popped
3189 // from the local queue
3190 oop buffer[global_stack_transfer_size];
3192 int n = 0;
3193 oop obj;
3194 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3195 buffer[n] = obj;
3196 ++n;
3197 }
3199 if (n > 0) {
3200 // we popped at least one entry from the local queue
3202 statsOnly( ++_global_transfers_to; _local_pops += n );
3204 if (!_cm->mark_stack_push(buffer, n)) {
3205 if (_cm->verbose_low())
3206 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3207 set_has_aborted();
3208 } else {
3209 // the transfer was successful
3211 if (_cm->verbose_medium())
3212 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3213 _task_id, n);
3214 statsOnly( int tmp_size = _cm->mark_stack_size();
3215 if (tmp_size > _global_max_size)
3216 _global_max_size = tmp_size;
3217 _global_pushes += n );
3218 }
3219 }
3221 // this operation was quite expensive, so decrease the limits
3222 decrease_limits();
3223 }
3225 void CMTask::get_entries_from_global_stack() {
3226 // local array where we'll store the entries that will be popped
3227 // from the global stack.
3228 oop buffer[global_stack_transfer_size];
3229 int n;
3230 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3231 tmp_guarantee_CM( n <= global_stack_transfer_size,
3232 "we should not pop more than the given limit" );
3233 if (n > 0) {
3234 // yes, we did actually pop at least one entry
3236 statsOnly( ++_global_transfers_from; _global_pops += n );
3237 if (_cm->verbose_medium())
3238 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3239 _task_id, n);
3240 for (int i = 0; i < n; ++i) {
3241 bool success = _task_queue->push(buffer[i]);
3242 // We only call this when the local queue is empty or under a
3243 // given target limit. So, we do not expect this push to fail.
3244 tmp_guarantee_CM( success, "invariant" );
3245 }
3247 statsOnly( int tmp_size = _task_queue->size();
3248 if (tmp_size > _local_max_size)
3249 _local_max_size = tmp_size;
3250 _local_pushes += n );
3251 }
3253 // this operation was quite expensive, so decrease the limits
3254 decrease_limits();
3255 }
3257 void CMTask::drain_local_queue(bool partially) {
3258 if (has_aborted())
3259 return;
3261 // Decide what the target size is, depending whether we're going to
3262 // drain it partially (so that other tasks can steal if they run out
3263 // of things to do) or totally (at the very end).
3264 size_t target_size;
3265 if (partially)
3266 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3267 else
3268 target_size = 0;
3270 if (_task_queue->size() > target_size) {
3271 if (_cm->verbose_high())
3272 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3273 _task_id, target_size);
3275 oop obj;
3276 bool ret = _task_queue->pop_local(obj);
3277 while (ret) {
3278 statsOnly( ++_local_pops );
3280 if (_cm->verbose_high())
3281 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3282 (void*) obj);
3284 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3285 "invariant" );
3286 tmp_guarantee_CM( !_g1h->heap_region_containing(obj)->is_on_free_list(),
3287 "invariant" );
3289 scan_object(obj);
3291 if (_task_queue->size() <= target_size || has_aborted())
3292 ret = false;
3293 else
3294 ret = _task_queue->pop_local(obj);
3295 }
3297 if (_cm->verbose_high())
3298 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3299 _task_id, _task_queue->size());
3300 }
3301 }
3303 void CMTask::drain_global_stack(bool partially) {
3304 if (has_aborted())
3305 return;
3307 // We have a policy to drain the local queue before we attempt to
3308 // drain the global stack.
3309 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3311 // Decide what the target size is, depending whether we're going to
3312 // drain it partially (so that other tasks can steal if they run out
3313 // of things to do) or totally (at the very end). Notice that,
3314 // because we move entries from the global stack in chunks or
3315 // because another task might be doing the same, we might in fact
3316 // drop below the target. But, this is not a problem.
3317 size_t target_size;
3318 if (partially)
3319 target_size = _cm->partial_mark_stack_size_target();
3320 else
3321 target_size = 0;
3323 if (_cm->mark_stack_size() > target_size) {
3324 if (_cm->verbose_low())
3325 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3326 _task_id, target_size);
3328 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3329 get_entries_from_global_stack();
3330 drain_local_queue(partially);
3331 }
3333 if (_cm->verbose_low())
3334 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3335 _task_id, _cm->mark_stack_size());
3336 }
3337 }
3339 // SATB Queue has several assumptions on whether to call the par or
3340 // non-par versions of the methods. this is why some of the code is
3341 // replicated. We should really get rid of the single-threaded version
3342 // of the code to simplify things.
3343 void CMTask::drain_satb_buffers() {
3344 if (has_aborted())
3345 return;
3347 // We set this so that the regular clock knows that we're in the
3348 // middle of draining buffers and doesn't set the abort flag when it
3349 // notices that SATB buffers are available for draining. It'd be
3350 // very counter productive if it did that. :-)
3351 _draining_satb_buffers = true;
3353 CMObjectClosure oc(this);
3354 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3355 if (ParallelGCThreads > 0)
3356 satb_mq_set.set_par_closure(_task_id, &oc);
3357 else
3358 satb_mq_set.set_closure(&oc);
3360 // This keeps claiming and applying the closure to completed buffers
3361 // until we run out of buffers or we need to abort.
3362 if (ParallelGCThreads > 0) {
3363 while (!has_aborted() &&
3364 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3365 if (_cm->verbose_medium())
3366 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3367 statsOnly( ++_satb_buffers_processed );
3368 regular_clock_call();
3369 }
3370 } else {
3371 while (!has_aborted() &&
3372 satb_mq_set.apply_closure_to_completed_buffer()) {
3373 if (_cm->verbose_medium())
3374 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3375 statsOnly( ++_satb_buffers_processed );
3376 regular_clock_call();
3377 }
3378 }
3380 if (!concurrent() && !has_aborted()) {
3381 // We should only do this during remark.
3382 if (ParallelGCThreads > 0)
3383 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3384 else
3385 satb_mq_set.iterate_closure_all_threads();
3386 }
3388 _draining_satb_buffers = false;
3390 tmp_guarantee_CM( has_aborted() ||
3391 concurrent() ||
3392 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3394 if (ParallelGCThreads > 0)
3395 satb_mq_set.set_par_closure(_task_id, NULL);
3396 else
3397 satb_mq_set.set_closure(NULL);
3399 // again, this was a potentially expensive operation, decrease the
3400 // limits to get the regular clock call early
3401 decrease_limits();
3402 }
3404 void CMTask::drain_region_stack(BitMapClosure* bc) {
3405 if (has_aborted())
3406 return;
3408 tmp_guarantee_CM( _region_finger == NULL,
3409 "it should be NULL when we're not scanning a region" );
3411 if (!_cm->region_stack_empty()) {
3412 if (_cm->verbose_low())
3413 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3414 _task_id, _cm->region_stack_size());
3416 MemRegion mr = _cm->region_stack_pop();
3417 // it returns MemRegion() if the pop fails
3418 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3420 while (mr.start() != NULL) {
3421 if (_cm->verbose_medium())
3422 gclog_or_tty->print_cr("[%d] we are scanning region "
3423 "["PTR_FORMAT", "PTR_FORMAT")",
3424 _task_id, mr.start(), mr.end());
3425 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3426 "otherwise the region shouldn't be on the stack" );
3427 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3428 if (_nextMarkBitMap->iterate(bc, mr)) {
3429 tmp_guarantee_CM( !has_aborted(),
3430 "cannot abort the task without aborting the bitmap iteration" );
3432 // We finished iterating over the region without aborting.
3433 regular_clock_call();
3434 if (has_aborted())
3435 mr = MemRegion();
3436 else {
3437 mr = _cm->region_stack_pop();
3438 // it returns MemRegion() if the pop fails
3439 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3440 }
3441 } else {
3442 guarantee( has_aborted(), "currently the only way to do so" );
3444 // The only way to abort the bitmap iteration is to return
3445 // false from the do_bit() method. However, inside the
3446 // do_bit() method we move the _region_finger to point to the
3447 // object currently being looked at. So, if we bail out, we
3448 // have definitely set _region_finger to something non-null.
3449 guarantee( _region_finger != NULL, "invariant" );
3451 // The iteration was actually aborted. So now _region_finger
3452 // points to the address of the object we last scanned. If we
3453 // leave it there, when we restart this task, we will rescan
3454 // the object. It is easy to avoid this. We move the finger by
3455 // enough to point to the next possible object header (the
3456 // bitmap knows by how much we need to move it as it knows its
3457 // granularity).
3458 MemRegion newRegion =
3459 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3461 if (!newRegion.is_empty()) {
3462 if (_cm->verbose_low()) {
3463 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3464 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3465 _task_id,
3466 newRegion.start(), newRegion.end());
3467 }
3468 // Now push the part of the region we didn't scan on the
3469 // region stack to make sure a task scans it later.
3470 _cm->region_stack_push(newRegion);
3471 }
3472 // break from while
3473 mr = MemRegion();
3474 }
3475 _region_finger = NULL;
3476 }
3478 // We only push regions on the region stack during evacuation
3479 // pauses. So if we come out the above iteration because we region
3480 // stack is empty, it will remain empty until the next yield
3481 // point. So, the guarantee below is safe.
3482 guarantee( has_aborted() || _cm->region_stack_empty(),
3483 "only way to exit the loop" );
3485 if (_cm->verbose_low())
3486 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3487 _task_id, _cm->region_stack_size());
3488 }
3489 }
3491 void CMTask::print_stats() {
3492 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3493 _task_id, _calls);
3494 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3495 _elapsed_time_ms, _termination_time_ms);
3496 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3497 _step_times_ms.num(), _step_times_ms.avg(),
3498 _step_times_ms.sd());
3499 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3500 _step_times_ms.maximum(), _step_times_ms.sum());
3502 #if _MARKING_STATS_
3503 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3504 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3505 _all_clock_intervals_ms.sd());
3506 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3507 _all_clock_intervals_ms.maximum(),
3508 _all_clock_intervals_ms.sum());
3509 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3510 _clock_due_to_scanning, _clock_due_to_marking);
3511 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3512 _objs_scanned, _objs_found_on_bitmap);
3513 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3514 _local_pushes, _local_pops, _local_max_size);
3515 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3516 _global_pushes, _global_pops, _global_max_size);
3517 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3518 _global_transfers_to,_global_transfers_from);
3519 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3520 _regions_claimed, _region_stack_pops);
3521 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3522 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3523 _steal_attempts, _steals);
3524 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3525 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3526 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3527 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3528 _aborted_timed_out, _aborted_satb, _aborted_termination);
3529 #endif // _MARKING_STATS_
3530 }
3532 /*****************************************************************************
3534 The do_marking_step(time_target_ms) method is the building block
3535 of the parallel marking framework. It can be called in parallel
3536 with other invocations of do_marking_step() on different tasks
3537 (but only one per task, obviously) and concurrently with the
3538 mutator threads, or during remark, hence it eliminates the need
3539 for two versions of the code. When called during remark, it will
3540 pick up from where the task left off during the concurrent marking
3541 phase. Interestingly, tasks are also claimable during evacuation
3542 pauses too, since do_marking_step() ensures that it aborts before
3543 it needs to yield.
3545 The data structures that is uses to do marking work are the
3546 following:
3548 (1) Marking Bitmap. If there are gray objects that appear only
3549 on the bitmap (this happens either when dealing with an overflow
3550 or when the initial marking phase has simply marked the roots
3551 and didn't push them on the stack), then tasks claim heap
3552 regions whose bitmap they then scan to find gray objects. A
3553 global finger indicates where the end of the last claimed region
3554 is. A local finger indicates how far into the region a task has
3555 scanned. The two fingers are used to determine how to gray an
3556 object (i.e. whether simply marking it is OK, as it will be
3557 visited by a task in the future, or whether it needs to be also
3558 pushed on a stack).
3560 (2) Local Queue. The local queue of the task which is accessed
3561 reasonably efficiently by the task. Other tasks can steal from
3562 it when they run out of work. Throughout the marking phase, a
3563 task attempts to keep its local queue short but not totally
3564 empty, so that entries are available for stealing by other
3565 tasks. Only when there is no more work, a task will totally
3566 drain its local queue.
3568 (3) Global Mark Stack. This handles local queue overflow. During
3569 marking only sets of entries are moved between it and the local
3570 queues, as access to it requires a mutex and more fine-grain
3571 interaction with it which might cause contention. If it
3572 overflows, then the marking phase should restart and iterate
3573 over the bitmap to identify gray objects. Throughout the marking
3574 phase, tasks attempt to keep the global mark stack at a small
3575 length but not totally empty, so that entries are available for
3576 popping by other tasks. Only when there is no more work, tasks
3577 will totally drain the global mark stack.
3579 (4) Global Region Stack. Entries on it correspond to areas of
3580 the bitmap that need to be scanned since they contain gray
3581 objects. Pushes on the region stack only happen during
3582 evacuation pauses and typically correspond to areas covered by
3583 GC LABS. If it overflows, then the marking phase should restart
3584 and iterate over the bitmap to identify gray objects. Tasks will
3585 try to totally drain the region stack as soon as possible.
3587 (5) SATB Buffer Queue. This is where completed SATB buffers are
3588 made available. Buffers are regularly removed from this queue
3589 and scanned for roots, so that the queue doesn't get too
3590 long. During remark, all completed buffers are processed, as
3591 well as the filled in parts of any uncompleted buffers.
3593 The do_marking_step() method tries to abort when the time target
3594 has been reached. There are a few other cases when the
3595 do_marking_step() method also aborts:
3597 (1) When the marking phase has been aborted (after a Full GC).
3599 (2) When a global overflow (either on the global stack or the
3600 region stack) has been triggered. Before the task aborts, it
3601 will actually sync up with the other tasks to ensure that all
3602 the marking data structures (local queues, stacks, fingers etc.)
3603 are re-initialised so that when do_marking_step() completes,
3604 the marking phase can immediately restart.
3606 (3) When enough completed SATB buffers are available. The
3607 do_marking_step() method only tries to drain SATB buffers right
3608 at the beginning. So, if enough buffers are available, the
3609 marking step aborts and the SATB buffers are processed at
3610 the beginning of the next invocation.
3612 (4) To yield. when we have to yield then we abort and yield
3613 right at the end of do_marking_step(). This saves us from a lot
3614 of hassle as, by yielding we might allow a Full GC. If this
3615 happens then objects will be compacted underneath our feet, the
3616 heap might shrink, etc. We save checking for this by just
3617 aborting and doing the yield right at the end.
3619 From the above it follows that the do_marking_step() method should
3620 be called in a loop (or, otherwise, regularly) until it completes.
3622 If a marking step completes without its has_aborted() flag being
3623 true, it means it has completed the current marking phase (and
3624 also all other marking tasks have done so and have all synced up).
3626 A method called regular_clock_call() is invoked "regularly" (in
3627 sub ms intervals) throughout marking. It is this clock method that
3628 checks all the abort conditions which were mentioned above and
3629 decides when the task should abort. A work-based scheme is used to
3630 trigger this clock method: when the number of object words the
3631 marking phase has scanned or the number of references the marking
3632 phase has visited reach a given limit. Additional invocations to
3633 the method clock have been planted in a few other strategic places
3634 too. The initial reason for the clock method was to avoid calling
3635 vtime too regularly, as it is quite expensive. So, once it was in
3636 place, it was natural to piggy-back all the other conditions on it
3637 too and not constantly check them throughout the code.
3639 *****************************************************************************/
3641 void CMTask::do_marking_step(double time_target_ms) {
3642 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3643 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3645 guarantee( concurrent() || _cm->region_stack_empty(),
3646 "the region stack should have been cleared before remark" );
3647 guarantee( _region_finger == NULL,
3648 "this should be non-null only when a region is being scanned" );
3650 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3651 guarantee( _task_queues != NULL, "invariant" );
3652 guarantee( _task_queue != NULL, "invariant" );
3653 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3655 guarantee( !_claimed,
3656 "only one thread should claim this task at any one time" );
3658 // OK, this doesn't safeguard again all possible scenarios, as it is
3659 // possible for two threads to set the _claimed flag at the same
3660 // time. But it is only for debugging purposes anyway and it will
3661 // catch most problems.
3662 _claimed = true;
3664 _start_time_ms = os::elapsedVTime() * 1000.0;
3665 statsOnly( _interval_start_time_ms = _start_time_ms );
3667 double diff_prediction_ms =
3668 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3669 _time_target_ms = time_target_ms - diff_prediction_ms;
3671 // set up the variables that are used in the work-based scheme to
3672 // call the regular clock method
3673 _words_scanned = 0;
3674 _refs_reached = 0;
3675 recalculate_limits();
3677 // clear all flags
3678 clear_has_aborted();
3679 _has_aborted_timed_out = false;
3680 _draining_satb_buffers = false;
3682 ++_calls;
3684 if (_cm->verbose_low())
3685 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3686 "target = %1.2lfms >>>>>>>>>>",
3687 _task_id, _calls, _time_target_ms);
3689 // Set up the bitmap and oop closures. Anything that uses them is
3690 // eventually called from this method, so it is OK to allocate these
3691 // statically.
3692 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3693 CMOopClosure oop_closure(_g1h, _cm, this);
3694 set_oop_closure(&oop_closure);
3696 if (_cm->has_overflown()) {
3697 // This can happen if the region stack or the mark stack overflows
3698 // during a GC pause and this task, after a yield point,
3699 // restarts. We have to abort as we need to get into the overflow
3700 // protocol which happens right at the end of this task.
3701 set_has_aborted();
3702 }
3704 // First drain any available SATB buffers. After this, we will not
3705 // look at SATB buffers before the next invocation of this method.
3706 // If enough completed SATB buffers are queued up, the regular clock
3707 // will abort this task so that it restarts.
3708 drain_satb_buffers();
3709 // ...then partially drain the local queue and the global stack
3710 drain_local_queue(true);
3711 drain_global_stack(true);
3713 // Then totally drain the region stack. We will not look at
3714 // it again before the next invocation of this method. Entries on
3715 // the region stack are only added during evacuation pauses, for
3716 // which we have to yield. When we do, we abort the task anyway so
3717 // it will look at the region stack again when it restarts.
3718 bitmap_closure.set_scanning_heap_region(false);
3719 drain_region_stack(&bitmap_closure);
3720 // ...then partially drain the local queue and the global stack
3721 drain_local_queue(true);
3722 drain_global_stack(true);
3724 do {
3725 if (!has_aborted() && _curr_region != NULL) {
3726 // This means that we're already holding on to a region.
3727 tmp_guarantee_CM( _finger != NULL,
3728 "if region is not NULL, then the finger "
3729 "should not be NULL either" );
3731 // We might have restarted this task after an evacuation pause
3732 // which might have evacuated the region we're holding on to
3733 // underneath our feet. Let's read its limit again to make sure
3734 // that we do not iterate over a region of the heap that
3735 // contains garbage (update_region_limit() will also move
3736 // _finger to the start of the region if it is found empty).
3737 update_region_limit();
3738 // We will start from _finger not from the start of the region,
3739 // as we might be restarting this task after aborting half-way
3740 // through scanning this region. In this case, _finger points to
3741 // the address where we last found a marked object. If this is a
3742 // fresh region, _finger points to start().
3743 MemRegion mr = MemRegion(_finger, _region_limit);
3745 if (_cm->verbose_low())
3746 gclog_or_tty->print_cr("[%d] we're scanning part "
3747 "["PTR_FORMAT", "PTR_FORMAT") "
3748 "of region "PTR_FORMAT,
3749 _task_id, _finger, _region_limit, _curr_region);
3751 // Let's iterate over the bitmap of the part of the
3752 // region that is left.
3753 bitmap_closure.set_scanning_heap_region(true);
3754 if (mr.is_empty() ||
3755 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3756 // We successfully completed iterating over the region. Now,
3757 // let's give up the region.
3758 giveup_current_region();
3759 regular_clock_call();
3760 } else {
3761 guarantee( has_aborted(), "currently the only way to do so" );
3762 // The only way to abort the bitmap iteration is to return
3763 // false from the do_bit() method. However, inside the
3764 // do_bit() method we move the _finger to point to the
3765 // object currently being looked at. So, if we bail out, we
3766 // have definitely set _finger to something non-null.
3767 guarantee( _finger != NULL, "invariant" );
3769 // Region iteration was actually aborted. So now _finger
3770 // points to the address of the object we last scanned. If we
3771 // leave it there, when we restart this task, we will rescan
3772 // the object. It is easy to avoid this. We move the finger by
3773 // enough to point to the next possible object header (the
3774 // bitmap knows by how much we need to move it as it knows its
3775 // granularity).
3776 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3777 }
3778 }
3779 // At this point we have either completed iterating over the
3780 // region we were holding on to, or we have aborted.
3782 // We then partially drain the local queue and the global stack.
3783 // (Do we really need this?)
3784 drain_local_queue(true);
3785 drain_global_stack(true);
3787 // Read the note on the claim_region() method on why it might
3788 // return NULL with potentially more regions available for
3789 // claiming and why we have to check out_of_regions() to determine
3790 // whether we're done or not.
3791 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3792 // We are going to try to claim a new region. We should have
3793 // given up on the previous one.
3794 tmp_guarantee_CM( _curr_region == NULL &&
3795 _finger == NULL &&
3796 _region_limit == NULL, "invariant" );
3797 if (_cm->verbose_low())
3798 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3799 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3800 if (claimed_region != NULL) {
3801 // Yes, we managed to claim one
3802 statsOnly( ++_regions_claimed );
3804 if (_cm->verbose_low())
3805 gclog_or_tty->print_cr("[%d] we successfully claimed "
3806 "region "PTR_FORMAT,
3807 _task_id, claimed_region);
3809 setup_for_region(claimed_region);
3810 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3811 }
3812 // It is important to call the regular clock here. It might take
3813 // a while to claim a region if, for example, we hit a large
3814 // block of empty regions. So we need to call the regular clock
3815 // method once round the loop to make sure it's called
3816 // frequently enough.
3817 regular_clock_call();
3818 }
3820 if (!has_aborted() && _curr_region == NULL) {
3821 tmp_guarantee_CM( _cm->out_of_regions(),
3822 "at this point we should be out of regions" );
3823 }
3824 } while ( _curr_region != NULL && !has_aborted());
3826 if (!has_aborted()) {
3827 // We cannot check whether the global stack is empty, since other
3828 // tasks might be pushing objects to it concurrently. We also cannot
3829 // check if the region stack is empty because if a thread is aborting
3830 // it can push a partially done region back.
3831 tmp_guarantee_CM( _cm->out_of_regions(),
3832 "at this point we should be out of regions" );
3834 if (_cm->verbose_low())
3835 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3837 // Try to reduce the number of available SATB buffers so that
3838 // remark has less work to do.
3839 drain_satb_buffers();
3840 }
3842 // Since we've done everything else, we can now totally drain the
3843 // local queue and global stack.
3844 drain_local_queue(false);
3845 drain_global_stack(false);
3847 // Attempt at work stealing from other task's queues.
3848 if (!has_aborted()) {
3849 // We have not aborted. This means that we have finished all that
3850 // we could. Let's try to do some stealing...
3852 // We cannot check whether the global stack is empty, since other
3853 // tasks might be pushing objects to it concurrently. We also cannot
3854 // check if the region stack is empty because if a thread is aborting
3855 // it can push a partially done region back.
3856 guarantee( _cm->out_of_regions() &&
3857 _task_queue->size() == 0, "only way to reach here" );
3859 if (_cm->verbose_low())
3860 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3862 while (!has_aborted()) {
3863 oop obj;
3864 statsOnly( ++_steal_attempts );
3866 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3867 if (_cm->verbose_medium())
3868 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3869 _task_id, (void*) obj);
3871 statsOnly( ++_steals );
3873 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3874 "any stolen object should be marked" );
3875 scan_object(obj);
3877 // And since we're towards the end, let's totally drain the
3878 // local queue and global stack.
3879 drain_local_queue(false);
3880 drain_global_stack(false);
3881 } else {
3882 break;
3883 }
3884 }
3885 }
3887 // We still haven't aborted. Now, let's try to get into the
3888 // termination protocol.
3889 if (!has_aborted()) {
3890 // We cannot check whether the global stack is empty, since other
3891 // tasks might be concurrently pushing objects on it. We also cannot
3892 // check if the region stack is empty because if a thread is aborting
3893 // it can push a partially done region back.
3894 guarantee( _cm->out_of_regions() &&
3895 _task_queue->size() == 0, "only way to reach here" );
3897 if (_cm->verbose_low())
3898 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3900 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3901 // The CMTask class also extends the TerminatorTerminator class,
3902 // hence its should_exit_termination() method will also decide
3903 // whether to exit the termination protocol or not.
3904 bool finished = _cm->terminator()->offer_termination(this);
3905 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3906 _termination_time_ms +=
3907 termination_end_time_ms - _termination_start_time_ms;
3909 if (finished) {
3910 // We're all done.
3912 if (_task_id == 0) {
3913 // let's allow task 0 to do this
3914 if (concurrent()) {
3915 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3916 // we need to set this to false before the next
3917 // safepoint. This way we ensure that the marking phase
3918 // doesn't observe any more heap expansions.
3919 _cm->clear_concurrent_marking_in_progress();
3920 }
3921 }
3923 // We can now guarantee that the global stack is empty, since
3924 // all other tasks have finished.
3925 guarantee( _cm->out_of_regions() &&
3926 _cm->region_stack_empty() &&
3927 _cm->mark_stack_empty() &&
3928 _task_queue->size() == 0 &&
3929 !_cm->has_overflown() &&
3930 !_cm->mark_stack_overflow() &&
3931 !_cm->region_stack_overflow(),
3932 "only way to reach here" );
3934 if (_cm->verbose_low())
3935 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3936 } else {
3937 // Apparently there's more work to do. Let's abort this task. It
3938 // will restart it and we can hopefully find more things to do.
3940 if (_cm->verbose_low())
3941 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3943 set_has_aborted();
3944 statsOnly( ++_aborted_termination );
3945 }
3946 }
3948 // Mainly for debugging purposes to make sure that a pointer to the
3949 // closure which was statically allocated in this frame doesn't
3950 // escape it by accident.
3951 set_oop_closure(NULL);
3952 double end_time_ms = os::elapsedVTime() * 1000.0;
3953 double elapsed_time_ms = end_time_ms - _start_time_ms;
3954 // Update the step history.
3955 _step_times_ms.add(elapsed_time_ms);
3957 if (has_aborted()) {
3958 // The task was aborted for some reason.
3960 statsOnly( ++_aborted );
3962 if (_has_aborted_timed_out) {
3963 double diff_ms = elapsed_time_ms - _time_target_ms;
3964 // Keep statistics of how well we did with respect to hitting
3965 // our target only if we actually timed out (if we aborted for
3966 // other reasons, then the results might get skewed).
3967 _marking_step_diffs_ms.add(diff_ms);
3968 }
3970 if (_cm->has_overflown()) {
3971 // This is the interesting one. We aborted because a global
3972 // overflow was raised. This means we have to restart the
3973 // marking phase and start iterating over regions. However, in
3974 // order to do this we have to make sure that all tasks stop
3975 // what they are doing and re-initialise in a safe manner. We
3976 // will achieve this with the use of two barrier sync points.
3978 if (_cm->verbose_low())
3979 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3981 _cm->enter_first_sync_barrier(_task_id);
3982 // When we exit this sync barrier we know that all tasks have
3983 // stopped doing marking work. So, it's now safe to
3984 // re-initialise our data structures. At the end of this method,
3985 // task 0 will clear the global data structures.
3987 statsOnly( ++_aborted_overflow );
3989 // We clear the local state of this task...
3990 clear_region_fields();
3992 // ...and enter the second barrier.
3993 _cm->enter_second_sync_barrier(_task_id);
3994 // At this point everything has bee re-initialised and we're
3995 // ready to restart.
3996 }
3998 if (_cm->verbose_low()) {
3999 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4000 "elapsed = %1.2lfms <<<<<<<<<<",
4001 _task_id, _time_target_ms, elapsed_time_ms);
4002 if (_cm->has_aborted())
4003 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4004 _task_id);
4005 }
4006 } else {
4007 if (_cm->verbose_low())
4008 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4009 "elapsed = %1.2lfms <<<<<<<<<<",
4010 _task_id, _time_target_ms, elapsed_time_ms);
4011 }
4013 _claimed = false;
4014 }
4016 CMTask::CMTask(int task_id,
4017 ConcurrentMark* cm,
4018 CMTaskQueue* task_queue,
4019 CMTaskQueueSet* task_queues)
4020 : _g1h(G1CollectedHeap::heap()),
4021 _co_tracker(G1CMGroup),
4022 _task_id(task_id), _cm(cm),
4023 _claimed(false),
4024 _nextMarkBitMap(NULL), _hash_seed(17),
4025 _task_queue(task_queue),
4026 _task_queues(task_queues),
4027 _oop_closure(NULL) {
4028 guarantee( task_queue != NULL, "invariant" );
4029 guarantee( task_queues != NULL, "invariant" );
4031 statsOnly( _clock_due_to_scanning = 0;
4032 _clock_due_to_marking = 0 );
4034 _marking_step_diffs_ms.add(0.5);
4035 }