Tue, 05 May 2009 22:15:35 -0700
6833576: G1: assert illegal index, growableArray.hpp:186
Summary: The code that calculates the heap region index for an object address incorrectly used signed arithmetic.
Reviewed-by: jcoomes, ysr
1 /*
2 * Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
25 #include "incls/_precompiled.incl"
26 #include "incls/_concurrentMark.cpp.incl"
28 //
29 // CMS Bit Map Wrapper
31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
32 _bm((uintptr_t*)NULL,0),
33 _shifter(shifter) {
34 _bmStartWord = (HeapWord*)(rs.base());
35 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
36 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
37 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
39 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
40 // For now we'll just commit all of the bit map up fromt.
41 // Later on we'll try to be more parsimonious with swap.
42 guarantee(_virtual_space.initialize(brs, brs.size()),
43 "couldn't reseve backing store for CMS bit map");
44 assert(_virtual_space.committed_size() == brs.size(),
45 "didn't reserve backing store for all of CMS bit map?");
46 _bm.set_map((uintptr_t*)_virtual_space.low());
47 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
48 _bmWordSize, "inconsistency in bit map sizing");
49 _bm.set_size(_bmWordSize >> _shifter);
50 }
52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
53 HeapWord* limit) const {
54 // First we must round addr *up* to a possible object boundary.
55 addr = (HeapWord*)align_size_up((intptr_t)addr,
56 HeapWordSize << _shifter);
57 size_t addrOffset = heapWordToOffset(addr);
58 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
59 size_t limitOffset = heapWordToOffset(limit);
60 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
61 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
62 assert(nextAddr >= addr, "get_next_one postcondition");
63 assert(nextAddr == limit || isMarked(nextAddr),
64 "get_next_one postcondition");
65 return nextAddr;
66 }
68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 size_t addrOffset = heapWordToOffset(addr);
71 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
72 size_t limitOffset = heapWordToOffset(limit);
73 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
74 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
75 assert(nextAddr >= addr, "get_next_one postcondition");
76 assert(nextAddr == limit || !isMarked(nextAddr),
77 "get_next_one postcondition");
78 return nextAddr;
79 }
81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
82 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
83 return (int) (diff >> _shifter);
84 }
86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
87 HeapWord* left = MAX2(_bmStartWord, mr.start());
88 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
89 if (right > left) {
90 // Right-open interval [leftOffset, rightOffset).
91 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
92 } else {
93 return true;
94 }
95 }
97 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
98 size_t from_start_index,
99 HeapWord* to_start_word,
100 size_t word_num) {
101 _bm.mostly_disjoint_range_union(from_bitmap,
102 from_start_index,
103 heapWordToOffset(to_start_word),
104 word_num);
105 }
107 #ifndef PRODUCT
108 bool CMBitMapRO::covers(ReservedSpace rs) const {
109 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
110 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
111 "size inconsistency");
112 return _bmStartWord == (HeapWord*)(rs.base()) &&
113 _bmWordSize == rs.size()>>LogHeapWordSize;
114 }
115 #endif
117 void CMBitMap::clearAll() {
118 _bm.clear();
119 return;
120 }
122 void CMBitMap::markRange(MemRegion mr) {
123 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
124 assert(!mr.is_empty(), "unexpected empty region");
125 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
126 ((HeapWord *) mr.end())),
127 "markRange memory region end is not card aligned");
128 // convert address range into offset range
129 _bm.at_put_range(heapWordToOffset(mr.start()),
130 heapWordToOffset(mr.end()), true);
131 }
133 void CMBitMap::clearRange(MemRegion mr) {
134 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
135 assert(!mr.is_empty(), "unexpected empty region");
136 // convert address range into offset range
137 _bm.at_put_range(heapWordToOffset(mr.start()),
138 heapWordToOffset(mr.end()), false);
139 }
141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
142 HeapWord* end_addr) {
143 HeapWord* start = getNextMarkedWordAddress(addr);
144 start = MIN2(start, end_addr);
145 HeapWord* end = getNextUnmarkedWordAddress(start);
146 end = MIN2(end, end_addr);
147 assert(start <= end, "Consistency check");
148 MemRegion mr(start, end);
149 if (!mr.is_empty()) {
150 clearRange(mr);
151 }
152 return mr;
153 }
155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
156 _base(NULL), _cm(cm)
157 #ifdef ASSERT
158 , _drain_in_progress(false)
159 , _drain_in_progress_yields(false)
160 #endif
161 {}
163 void CMMarkStack::allocate(size_t size) {
164 _base = NEW_C_HEAP_ARRAY(oop, size);
165 if (_base == NULL)
166 vm_exit_during_initialization("Failed to allocate "
167 "CM region mark stack");
168 _index = 0;
169 // QQQQ cast ...
170 _capacity = (jint) size;
171 _oops_do_bound = -1;
172 NOT_PRODUCT(_max_depth = 0);
173 }
175 CMMarkStack::~CMMarkStack() {
176 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
177 }
179 void CMMarkStack::par_push(oop ptr) {
180 while (true) {
181 if (isFull()) {
182 _overflow = true;
183 return;
184 }
185 // Otherwise...
186 jint index = _index;
187 jint next_index = index+1;
188 jint res = Atomic::cmpxchg(next_index, &_index, index);
189 if (res == index) {
190 _base[index] = ptr;
191 // Note that we don't maintain this atomically. We could, but it
192 // doesn't seem necessary.
193 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
194 return;
195 }
196 // Otherwise, we need to try again.
197 }
198 }
200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
201 while (true) {
202 if (isFull()) {
203 _overflow = true;
204 return;
205 }
206 // Otherwise...
207 jint index = _index;
208 jint next_index = index + n;
209 if (next_index > _capacity) {
210 _overflow = true;
211 return;
212 }
213 jint res = Atomic::cmpxchg(next_index, &_index, index);
214 if (res == index) {
215 for (int i = 0; i < n; i++) {
216 int ind = index + i;
217 assert(ind < _capacity, "By overflow test above.");
218 _base[ind] = ptr_arr[i];
219 }
220 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
221 return;
222 }
223 // Otherwise, we need to try again.
224 }
225 }
228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
229 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
230 jint start = _index;
231 jint next_index = start + n;
232 if (next_index > _capacity) {
233 _overflow = true;
234 return;
235 }
236 // Otherwise.
237 _index = next_index;
238 for (int i = 0; i < n; i++) {
239 int ind = start + i;
240 guarantee(ind < _capacity, "By overflow test above.");
241 _base[ind] = ptr_arr[i];
242 }
243 }
246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
247 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
248 jint index = _index;
249 if (index == 0) {
250 *n = 0;
251 return false;
252 } else {
253 int k = MIN2(max, index);
254 jint new_ind = index - k;
255 for (int j = 0; j < k; j++) {
256 ptr_arr[j] = _base[new_ind + j];
257 }
258 _index = new_ind;
259 *n = k;
260 return true;
261 }
262 }
265 CMRegionStack::CMRegionStack() : _base(NULL) {}
267 void CMRegionStack::allocate(size_t size) {
268 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
269 if (_base == NULL)
270 vm_exit_during_initialization("Failed to allocate "
271 "CM region mark stack");
272 _index = 0;
273 // QQQQ cast ...
274 _capacity = (jint) size;
275 }
277 CMRegionStack::~CMRegionStack() {
278 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
279 }
281 void CMRegionStack::push(MemRegion mr) {
282 assert(mr.word_size() > 0, "Precondition");
283 while (true) {
284 if (isFull()) {
285 _overflow = true;
286 return;
287 }
288 // Otherwise...
289 jint index = _index;
290 jint next_index = index+1;
291 jint res = Atomic::cmpxchg(next_index, &_index, index);
292 if (res == index) {
293 _base[index] = mr;
294 return;
295 }
296 // Otherwise, we need to try again.
297 }
298 }
300 MemRegion CMRegionStack::pop() {
301 while (true) {
302 // Otherwise...
303 jint index = _index;
305 if (index == 0) {
306 return MemRegion();
307 }
308 jint next_index = index-1;
309 jint res = Atomic::cmpxchg(next_index, &_index, index);
310 if (res == index) {
311 MemRegion mr = _base[next_index];
312 if (mr.start() != NULL) {
313 tmp_guarantee_CM( mr.end() != NULL, "invariant" );
314 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
315 return mr;
316 } else {
317 // that entry was invalidated... let's skip it
318 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
319 }
320 }
321 // Otherwise, we need to try again.
322 }
323 }
325 bool CMRegionStack::invalidate_entries_into_cset() {
326 bool result = false;
327 G1CollectedHeap* g1h = G1CollectedHeap::heap();
328 for (int i = 0; i < _oops_do_bound; ++i) {
329 MemRegion mr = _base[i];
330 if (mr.start() != NULL) {
331 tmp_guarantee_CM( mr.end() != NULL, "invariant");
332 tmp_guarantee_CM( mr.word_size() > 0, "invariant" );
333 HeapRegion* hr = g1h->heap_region_containing(mr.start());
334 tmp_guarantee_CM( hr != NULL, "invariant" );
335 if (hr->in_collection_set()) {
336 // The region points into the collection set
337 _base[i] = MemRegion();
338 result = true;
339 }
340 } else {
341 // that entry was invalidated... let's skip it
342 tmp_guarantee_CM( mr.end() == NULL, "invariant" );
343 }
344 }
345 return result;
346 }
348 template<class OopClosureClass>
349 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
350 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
351 || SafepointSynchronize::is_at_safepoint(),
352 "Drain recursion must be yield-safe.");
353 bool res = true;
354 debug_only(_drain_in_progress = true);
355 debug_only(_drain_in_progress_yields = yield_after);
356 while (!isEmpty()) {
357 oop newOop = pop();
358 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
359 assert(newOop->is_oop(), "Expected an oop");
360 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
361 "only grey objects on this stack");
362 // iterate over the oops in this oop, marking and pushing
363 // the ones in CMS generation.
364 newOop->oop_iterate(cl);
365 if (yield_after && _cm->do_yield_check()) {
366 res = false; break;
367 }
368 }
369 debug_only(_drain_in_progress = false);
370 return res;
371 }
373 void CMMarkStack::oops_do(OopClosure* f) {
374 if (_index == 0) return;
375 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
376 "Bound must be set.");
377 for (int i = 0; i < _oops_do_bound; i++) {
378 f->do_oop(&_base[i]);
379 }
380 _oops_do_bound = -1;
381 }
383 bool ConcurrentMark::not_yet_marked(oop obj) const {
384 return (_g1h->is_obj_ill(obj)
385 || (_g1h->is_in_permanent(obj)
386 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
387 }
389 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
390 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
391 #endif // _MSC_VER
393 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
394 int max_regions) :
395 _markBitMap1(rs, MinObjAlignment - 1),
396 _markBitMap2(rs, MinObjAlignment - 1),
398 _parallel_marking_threads(0),
399 _sleep_factor(0.0),
400 _marking_task_overhead(1.0),
401 _cleanup_sleep_factor(0.0),
402 _cleanup_task_overhead(1.0),
403 _region_bm(max_regions, false /* in_resource_area*/),
404 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
405 CardTableModRefBS::card_shift,
406 false /* in_resource_area*/),
407 _prevMarkBitMap(&_markBitMap1),
408 _nextMarkBitMap(&_markBitMap2),
409 _at_least_one_mark_complete(false),
411 _markStack(this),
412 _regionStack(),
413 // _finger set in set_non_marking_state
415 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
416 // _active_tasks set in set_non_marking_state
417 // _tasks set inside the constructor
418 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
419 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
421 _has_overflown(false),
422 _concurrent(false),
423 _has_aborted(false),
424 _restart_for_overflow(false),
425 _concurrent_marking_in_progress(false),
426 _should_gray_objects(false),
428 // _verbose_level set below
430 _init_times(),
431 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
432 _cleanup_times(),
433 _total_counting_time(0.0),
434 _total_rs_scrub_time(0.0),
436 _parallel_workers(NULL),
437 _cleanup_co_tracker(G1CLGroup)
438 {
439 CMVerboseLevel verbose_level =
440 (CMVerboseLevel) G1MarkingVerboseLevel;
441 if (verbose_level < no_verbose)
442 verbose_level = no_verbose;
443 if (verbose_level > high_verbose)
444 verbose_level = high_verbose;
445 _verbose_level = verbose_level;
447 if (verbose_low())
448 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
449 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
451 _markStack.allocate(G1MarkStackSize);
452 _regionStack.allocate(G1MarkRegionStackSize);
454 // Create & start a ConcurrentMark thread.
455 if (G1ConcMark) {
456 _cmThread = new ConcurrentMarkThread(this);
457 assert(cmThread() != NULL, "CM Thread should have been created");
458 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
459 } else {
460 _cmThread = NULL;
461 }
462 _g1h = G1CollectedHeap::heap();
463 assert(CGC_lock != NULL, "Where's the CGC_lock?");
464 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
465 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
467 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
468 satb_qs.set_buffer_size(G1SATBLogBufferSize);
470 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
471 _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
472 for (int i = 0 ; i < size; i++) {
473 _par_cleanup_thread_state[i] = new ParCleanupThreadState;
474 }
476 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
477 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
479 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
480 _active_tasks = _max_task_num;
481 for (int i = 0; i < (int) _max_task_num; ++i) {
482 CMTaskQueue* task_queue = new CMTaskQueue();
483 task_queue->initialize();
484 _task_queues->register_queue(i, task_queue);
486 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
487 _accum_task_vtime[i] = 0.0;
488 }
490 if (ParallelMarkingThreads > ParallelGCThreads) {
491 vm_exit_during_initialization("Can't have more ParallelMarkingThreads "
492 "than ParallelGCThreads.");
493 }
494 if (ParallelGCThreads == 0) {
495 // if we are not running with any parallel GC threads we will not
496 // spawn any marking threads either
497 _parallel_marking_threads = 0;
498 _sleep_factor = 0.0;
499 _marking_task_overhead = 1.0;
500 } else {
501 if (ParallelMarkingThreads > 0) {
502 // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPercent
503 // if both are set
505 _parallel_marking_threads = ParallelMarkingThreads;
506 _sleep_factor = 0.0;
507 _marking_task_overhead = 1.0;
508 } else if (G1MarkingOverheadPercent > 0) {
509 // we will calculate the number of parallel marking threads
510 // based on a target overhead with respect to the soft real-time
511 // goal
513 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
514 double overall_cm_overhead =
515 (double) MaxGCPauseMillis * marking_overhead /
516 (double) GCPauseIntervalMillis;
517 double cpu_ratio = 1.0 / (double) os::processor_count();
518 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
519 double marking_task_overhead =
520 overall_cm_overhead / marking_thread_num *
521 (double) os::processor_count();
522 double sleep_factor =
523 (1.0 - marking_task_overhead) / marking_task_overhead;
525 _parallel_marking_threads = (size_t) marking_thread_num;
526 _sleep_factor = sleep_factor;
527 _marking_task_overhead = marking_task_overhead;
528 } else {
529 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
530 _sleep_factor = 0.0;
531 _marking_task_overhead = 1.0;
532 }
534 if (parallel_marking_threads() > 1)
535 _cleanup_task_overhead = 1.0;
536 else
537 _cleanup_task_overhead = marking_task_overhead();
538 _cleanup_sleep_factor =
539 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
541 #if 0
542 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
543 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
544 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
545 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
546 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
547 #endif
549 guarantee( parallel_marking_threads() > 0, "peace of mind" );
550 _parallel_workers = new WorkGang("Parallel Marking Threads",
551 (int) parallel_marking_threads(), false, true);
552 if (_parallel_workers == NULL)
553 vm_exit_during_initialization("Failed necessary allocation.");
554 }
556 // so that the call below can read a sensible value
557 _heap_start = (HeapWord*) rs.base();
558 set_non_marking_state();
559 }
561 void ConcurrentMark::update_g1_committed(bool force) {
562 // If concurrent marking is not in progress, then we do not need to
563 // update _heap_end. This has a subtle and important
564 // side-effect. Imagine that two evacuation pauses happen between
565 // marking completion and remark. The first one can grow the
566 // heap (hence now the finger is below the heap end). Then, the
567 // second one could unnecessarily push regions on the region
568 // stack. This causes the invariant that the region stack is empty
569 // at the beginning of remark to be false. By ensuring that we do
570 // not observe heap expansions after marking is complete, then we do
571 // not have this problem.
572 if (!concurrent_marking_in_progress() && !force)
573 return;
575 MemRegion committed = _g1h->g1_committed();
576 tmp_guarantee_CM( committed.start() == _heap_start,
577 "start shouldn't change" );
578 HeapWord* new_end = committed.end();
579 if (new_end > _heap_end) {
580 // The heap has been expanded.
582 _heap_end = new_end;
583 }
584 // Notice that the heap can also shrink. However, this only happens
585 // during a Full GC (at least currently) and the entire marking
586 // phase will bail out and the task will not be restarted. So, let's
587 // do nothing.
588 }
590 void ConcurrentMark::reset() {
591 // Starting values for these two. This should be called in a STW
592 // phase. CM will be notified of any future g1_committed expansions
593 // will be at the end of evacuation pauses, when tasks are
594 // inactive.
595 MemRegion committed = _g1h->g1_committed();
596 _heap_start = committed.start();
597 _heap_end = committed.end();
599 guarantee( _heap_start != NULL &&
600 _heap_end != NULL &&
601 _heap_start < _heap_end, "heap bounds should look ok" );
603 // reset all the marking data structures and any necessary flags
604 clear_marking_state();
606 if (verbose_low())
607 gclog_or_tty->print_cr("[global] resetting");
609 // We do reset all of them, since different phases will use
610 // different number of active threads. So, it's easiest to have all
611 // of them ready.
612 for (int i = 0; i < (int) _max_task_num; ++i)
613 _tasks[i]->reset(_nextMarkBitMap);
615 // we need this to make sure that the flag is on during the evac
616 // pause with initial mark piggy-backed
617 set_concurrent_marking_in_progress();
618 }
620 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
621 guarantee( active_tasks <= _max_task_num, "we should not have more" );
623 _active_tasks = active_tasks;
624 // Need to update the three data structures below according to the
625 // number of active threads for this phase.
626 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
627 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
628 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
630 _concurrent = concurrent;
631 // We propagate this to all tasks, not just the active ones.
632 for (int i = 0; i < (int) _max_task_num; ++i)
633 _tasks[i]->set_concurrent(concurrent);
635 if (concurrent) {
636 set_concurrent_marking_in_progress();
637 } else {
638 // We currently assume that the concurrent flag has been set to
639 // false before we start remark. At this point we should also be
640 // in a STW phase.
641 guarantee( !concurrent_marking_in_progress(), "invariant" );
642 guarantee( _finger == _heap_end, "only way to get here" );
643 update_g1_committed(true);
644 }
645 }
647 void ConcurrentMark::set_non_marking_state() {
648 // We set the global marking state to some default values when we're
649 // not doing marking.
650 clear_marking_state();
651 _active_tasks = 0;
652 clear_concurrent_marking_in_progress();
653 }
655 ConcurrentMark::~ConcurrentMark() {
656 int size = (int) MAX2(ParallelGCThreads, (size_t)1);
657 for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
658 FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
659 _par_cleanup_thread_state);
661 for (int i = 0; i < (int) _max_task_num; ++i) {
662 delete _task_queues->queue(i);
663 delete _tasks[i];
664 }
665 delete _task_queues;
666 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
667 }
669 // This closure is used to mark refs into the g1 generation
670 // from external roots in the CMS bit map.
671 // Called at the first checkpoint.
672 //
674 #define PRINT_REACHABLE_AT_INITIAL_MARK 0
675 #if PRINT_REACHABLE_AT_INITIAL_MARK
676 static FILE* reachable_file = NULL;
678 class PrintReachableClosure: public OopsInGenClosure {
679 CMBitMap* _bm;
680 int _level;
681 public:
682 PrintReachableClosure(CMBitMap* bm) :
683 _bm(bm), _level(0) {
684 guarantee(reachable_file != NULL, "pre-condition");
685 }
686 void do_oop(oop* p) {
687 oop obj = *p;
688 HeapWord* obj_addr = (HeapWord*)obj;
689 if (obj == NULL) return;
690 fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n",
691 _level, p, (void*) obj, _bm->isMarked(obj_addr));
692 if (!_bm->isMarked(obj_addr)) {
693 _bm->mark(obj_addr);
694 _level++;
695 obj->oop_iterate(this);
696 _level--;
697 }
698 }
699 };
700 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
702 #define SEND_HEAP_DUMP_TO_FILE 0
703 #if SEND_HEAP_DUMP_TO_FILE
704 static FILE* heap_dump_file = NULL;
705 #endif // SEND_HEAP_DUMP_TO_FILE
707 void ConcurrentMark::clearNextBitmap() {
708 guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition.");
710 // clear the mark bitmap (no grey objects to start with).
711 // We need to do this in chunks and offer to yield in between
712 // each chunk.
713 HeapWord* start = _nextMarkBitMap->startWord();
714 HeapWord* end = _nextMarkBitMap->endWord();
715 HeapWord* cur = start;
716 size_t chunkSize = M;
717 while (cur < end) {
718 HeapWord* next = cur + chunkSize;
719 if (next > end)
720 next = end;
721 MemRegion mr(cur,next);
722 _nextMarkBitMap->clearRange(mr);
723 cur = next;
724 do_yield_check();
725 }
726 }
728 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
729 public:
730 bool doHeapRegion(HeapRegion* r) {
731 if (!r->continuesHumongous()) {
732 r->note_start_of_marking(true);
733 }
734 return false;
735 }
736 };
738 void ConcurrentMark::checkpointRootsInitialPre() {
739 G1CollectedHeap* g1h = G1CollectedHeap::heap();
740 G1CollectorPolicy* g1p = g1h->g1_policy();
742 _has_aborted = false;
744 // Find all the reachable objects...
745 #if PRINT_REACHABLE_AT_INITIAL_MARK
746 guarantee(reachable_file == NULL, "Protocol");
747 char fn_buf[100];
748 sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id());
749 reachable_file = fopen(fn_buf, "w");
750 // clear the mark bitmap (no grey objects to start with)
751 _nextMarkBitMap->clearAll();
752 PrintReachableClosure prcl(_nextMarkBitMap);
753 g1h->process_strong_roots(
754 false, // fake perm gen collection
755 SharedHeap::SO_AllClasses,
756 &prcl, // Regular roots
757 &prcl // Perm Gen Roots
758 );
759 // The root iteration above "consumed" dirty cards in the perm gen.
760 // Therefore, as a shortcut, we dirty all such cards.
761 g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false);
762 fclose(reachable_file);
763 reachable_file = NULL;
764 // clear the mark bitmap again.
765 _nextMarkBitMap->clearAll();
766 COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
767 COMPILER2_PRESENT(DerivedPointerTable::clear());
768 #endif // PRINT_REACHABLE_AT_INITIAL_MARK
770 // Initialise marking structures. This has to be done in a STW phase.
771 reset();
772 }
774 class CMMarkRootsClosure: public OopsInGenClosure {
775 private:
776 ConcurrentMark* _cm;
777 G1CollectedHeap* _g1h;
778 bool _do_barrier;
780 public:
781 CMMarkRootsClosure(ConcurrentMark* cm,
782 G1CollectedHeap* g1h,
783 bool do_barrier) : _cm(cm), _g1h(g1h),
784 _do_barrier(do_barrier) { }
786 virtual void do_oop(narrowOop* p) {
787 guarantee(false, "NYI");
788 }
790 virtual void do_oop(oop* p) {
791 oop thisOop = *p;
792 if (thisOop != NULL) {
793 assert(thisOop->is_oop() || thisOop->mark() == NULL,
794 "expected an oop, possibly with mark word displaced");
795 HeapWord* addr = (HeapWord*)thisOop;
796 if (_g1h->is_in_g1_reserved(addr)) {
797 _cm->grayRoot(thisOop);
798 }
799 }
800 if (_do_barrier) {
801 assert(!_g1h->is_in_g1_reserved(p),
802 "Should be called on external roots");
803 do_barrier(p);
804 }
805 }
806 };
808 void ConcurrentMark::checkpointRootsInitialPost() {
809 G1CollectedHeap* g1h = G1CollectedHeap::heap();
811 // For each region note start of marking.
812 NoteStartOfMarkHRClosure startcl;
813 g1h->heap_region_iterate(&startcl);
815 // Start weak-reference discovery.
816 ReferenceProcessor* rp = g1h->ref_processor();
817 rp->verify_no_references_recorded();
818 rp->enable_discovery(); // enable ("weak") refs discovery
819 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
821 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
822 satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold);
823 satb_mq_set.set_active_all_threads(true);
825 // update_g1_committed() will be called at the end of an evac pause
826 // when marking is on. So, it's also called at the end of the
827 // initial-mark pause to update the heap end, if the heap expands
828 // during it. No need to call it here.
830 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
832 size_t max_marking_threads =
833 MAX2((size_t) 1, parallel_marking_threads());
834 for (int i = 0; i < (int)_max_task_num; ++i) {
835 _tasks[i]->enable_co_tracker();
836 if (i < (int) max_marking_threads)
837 _tasks[i]->reset_co_tracker(marking_task_overhead());
838 else
839 _tasks[i]->reset_co_tracker(0.0);
840 }
841 }
843 // Checkpoint the roots into this generation from outside
844 // this generation. [Note this initial checkpoint need only
845 // be approximate -- we'll do a catch up phase subsequently.]
846 void ConcurrentMark::checkpointRootsInitial() {
847 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
848 G1CollectedHeap* g1h = G1CollectedHeap::heap();
850 double start = os::elapsedTime();
851 GCOverheadReporter::recordSTWStart(start);
853 // If there has not been a GC[n-1] since last GC[n] cycle completed,
854 // precede our marking with a collection of all
855 // younger generations to keep floating garbage to a minimum.
856 // YSR: we won't do this for now -- it's an optimization to be
857 // done post-beta.
859 // YSR: ignoring weak refs for now; will do at bug fixing stage
860 // EVM: assert(discoveredRefsAreClear());
863 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
864 g1p->record_concurrent_mark_init_start();
865 checkpointRootsInitialPre();
867 // YSR: when concurrent precleaning is in place, we'll
868 // need to clear the cached card table here
870 ResourceMark rm;
871 HandleMark hm;
873 g1h->ensure_parsability(false);
874 g1h->perm_gen()->save_marks();
876 CMMarkRootsClosure notOlder(this, g1h, false);
877 CMMarkRootsClosure older(this, g1h, true);
879 g1h->set_marking_started();
880 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
882 g1h->process_strong_roots(false, // fake perm gen collection
883 SharedHeap::SO_AllClasses,
884 ¬Older, // Regular roots
885 &older // Perm Gen Roots
886 );
887 checkpointRootsInitialPost();
889 // Statistics.
890 double end = os::elapsedTime();
891 _init_times.add((end - start) * 1000.0);
892 GCOverheadReporter::recordSTWEnd(end);
894 g1p->record_concurrent_mark_init_end();
895 }
897 /*
898 Notice that in the next two methods, we actually leave the STS
899 during the barrier sync and join it immediately afterwards. If we
900 do not do this, this then the following deadlock can occur: one
901 thread could be in the barrier sync code, waiting for the other
902 thread to also sync up, whereas another one could be trying to
903 yield, while also waiting for the other threads to sync up too.
905 Because the thread that does the sync barrier has left the STS, it
906 is possible to be suspended for a Full GC or an evacuation pause
907 could occur. This is actually safe, since the entering the sync
908 barrier is one of the last things do_marking_step() does, and it
909 doesn't manipulate any data structures afterwards.
910 */
912 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
913 if (verbose_low())
914 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
916 ConcurrentGCThread::stsLeave();
917 _first_overflow_barrier_sync.enter();
918 ConcurrentGCThread::stsJoin();
919 // at this point everyone should have synced up and not be doing any
920 // more work
922 if (verbose_low())
923 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
925 // let task 0 do this
926 if (task_num == 0) {
927 // task 0 is responsible for clearing the global data structures
928 clear_marking_state();
930 if (PrintGC) {
931 gclog_or_tty->date_stamp(PrintGCDateStamps);
932 gclog_or_tty->stamp(PrintGCTimeStamps);
933 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
934 }
935 }
937 // after this, each task should reset its own data structures then
938 // then go into the second barrier
939 }
941 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
942 if (verbose_low())
943 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
945 ConcurrentGCThread::stsLeave();
946 _second_overflow_barrier_sync.enter();
947 ConcurrentGCThread::stsJoin();
948 // at this point everything should be re-initialised and ready to go
950 if (verbose_low())
951 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
952 }
954 void ConcurrentMark::grayRoot(oop p) {
955 HeapWord* addr = (HeapWord*) p;
956 // We can't really check against _heap_start and _heap_end, since it
957 // is possible during an evacuation pause with piggy-backed
958 // initial-mark that the committed space is expanded during the
959 // pause without CM observing this change. So the assertions below
960 // is a bit conservative; but better than nothing.
961 tmp_guarantee_CM( _g1h->g1_committed().contains(addr),
962 "address should be within the heap bounds" );
964 if (!_nextMarkBitMap->isMarked(addr))
965 _nextMarkBitMap->parMark(addr);
966 }
968 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
969 // The objects on the region have already been marked "in bulk" by
970 // the caller. We only need to decide whether to push the region on
971 // the region stack or not.
973 if (!concurrent_marking_in_progress() || !_should_gray_objects)
974 // We're done with marking and waiting for remark. We do not need to
975 // push anything else on the region stack.
976 return;
978 HeapWord* finger = _finger;
980 if (verbose_low())
981 gclog_or_tty->print_cr("[global] attempting to push "
982 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
983 PTR_FORMAT, mr.start(), mr.end(), finger);
985 if (mr.start() < finger) {
986 // The finger is always heap region aligned and it is not possible
987 // for mr to span heap regions.
988 tmp_guarantee_CM( mr.end() <= finger, "invariant" );
990 tmp_guarantee_CM( mr.start() <= mr.end() &&
991 _heap_start <= mr.start() &&
992 mr.end() <= _heap_end,
993 "region boundaries should fall within the committed space" );
994 if (verbose_low())
995 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
996 "below the finger, pushing it",
997 mr.start(), mr.end());
999 if (!region_stack_push(mr)) {
1000 if (verbose_low())
1001 gclog_or_tty->print_cr("[global] region stack has overflown.");
1002 }
1003 }
1004 }
1006 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1007 // The object is not marked by the caller. We need to at least mark
1008 // it and maybe push in on the stack.
1010 HeapWord* addr = (HeapWord*)p;
1011 if (!_nextMarkBitMap->isMarked(addr)) {
1012 // We definitely need to mark it, irrespective whether we bail out
1013 // because we're done with marking.
1014 if (_nextMarkBitMap->parMark(addr)) {
1015 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1016 // If we're done with concurrent marking and we're waiting for
1017 // remark, then we're not pushing anything on the stack.
1018 return;
1020 // No OrderAccess:store_load() is needed. It is implicit in the
1021 // CAS done in parMark(addr) above
1022 HeapWord* finger = _finger;
1024 if (addr < finger) {
1025 if (!mark_stack_push(oop(addr))) {
1026 if (verbose_low())
1027 gclog_or_tty->print_cr("[global] global stack overflow "
1028 "during parMark");
1029 }
1030 }
1031 }
1032 }
1033 }
1035 class CMConcurrentMarkingTask: public AbstractGangTask {
1036 private:
1037 ConcurrentMark* _cm;
1038 ConcurrentMarkThread* _cmt;
1040 public:
1041 void work(int worker_i) {
1042 guarantee( Thread::current()->is_ConcurrentGC_thread(),
1043 "this should only be done by a conc GC thread" );
1045 double start_vtime = os::elapsedVTime();
1047 ConcurrentGCThread::stsJoin();
1049 guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" );
1050 CMTask* the_task = _cm->task(worker_i);
1051 the_task->start_co_tracker();
1052 the_task->record_start_time();
1053 if (!_cm->has_aborted()) {
1054 do {
1055 double start_vtime_sec = os::elapsedVTime();
1056 double start_time_sec = os::elapsedTime();
1057 the_task->do_marking_step(10.0);
1058 double end_time_sec = os::elapsedTime();
1059 double end_vtime_sec = os::elapsedVTime();
1060 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1061 double elapsed_time_sec = end_time_sec - start_time_sec;
1062 _cm->clear_has_overflown();
1064 bool ret = _cm->do_yield_check(worker_i);
1066 jlong sleep_time_ms;
1067 if (!_cm->has_aborted() && the_task->has_aborted()) {
1068 sleep_time_ms =
1069 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1070 ConcurrentGCThread::stsLeave();
1071 os::sleep(Thread::current(), sleep_time_ms, false);
1072 ConcurrentGCThread::stsJoin();
1073 }
1074 double end_time2_sec = os::elapsedTime();
1075 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1077 the_task->update_co_tracker();
1079 #if 0
1080 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1081 "overhead %1.4lf",
1082 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1083 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1084 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1085 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1086 #endif
1087 } while (!_cm->has_aborted() && the_task->has_aborted());
1088 }
1089 the_task->record_end_time();
1090 guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" );
1092 ConcurrentGCThread::stsLeave();
1094 double end_vtime = os::elapsedVTime();
1095 the_task->update_co_tracker(true);
1096 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1097 }
1099 CMConcurrentMarkingTask(ConcurrentMark* cm,
1100 ConcurrentMarkThread* cmt) :
1101 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1103 ~CMConcurrentMarkingTask() { }
1104 };
1106 void ConcurrentMark::markFromRoots() {
1107 // we might be tempted to assert that:
1108 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1109 // "inconsistent argument?");
1110 // However that wouldn't be right, because it's possible that
1111 // a safepoint is indeed in progress as a younger generation
1112 // stop-the-world GC happens even as we mark in this generation.
1114 _restart_for_overflow = false;
1116 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1118 CMConcurrentMarkingTask markingTask(this, cmThread());
1119 if (parallel_marking_threads() > 0)
1120 _parallel_workers->run_task(&markingTask);
1121 else
1122 markingTask.work(0);
1123 print_stats();
1124 }
1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1127 // world is stopped at this checkpoint
1128 assert(SafepointSynchronize::is_at_safepoint(),
1129 "world should be stopped");
1130 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1132 // If a full collection has happened, we shouldn't do this.
1133 if (has_aborted()) {
1134 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1135 return;
1136 }
1138 G1CollectorPolicy* g1p = g1h->g1_policy();
1139 g1p->record_concurrent_mark_remark_start();
1141 double start = os::elapsedTime();
1142 GCOverheadReporter::recordSTWStart(start);
1144 checkpointRootsFinalWork();
1146 double mark_work_end = os::elapsedTime();
1148 weakRefsWork(clear_all_soft_refs);
1150 if (has_overflown()) {
1151 // Oops. We overflowed. Restart concurrent marking.
1152 _restart_for_overflow = true;
1153 // Clear the flag. We do not need it any more.
1154 clear_has_overflown();
1155 if (G1TraceMarkStackOverflow)
1156 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1157 } else {
1158 // We're done with marking.
1159 JavaThread::satb_mark_queue_set().set_active_all_threads(false);
1160 }
1162 #if VERIFY_OBJS_PROCESSED
1163 _scan_obj_cl.objs_processed = 0;
1164 ThreadLocalObjQueue::objs_enqueued = 0;
1165 #endif
1167 // Statistics
1168 double now = os::elapsedTime();
1169 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1170 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1171 _remark_times.add((now - start) * 1000.0);
1173 GCOverheadReporter::recordSTWEnd(now);
1174 for (int i = 0; i < (int)_max_task_num; ++i)
1175 _tasks[i]->disable_co_tracker();
1176 _cleanup_co_tracker.enable();
1177 _cleanup_co_tracker.reset(cleanup_task_overhead());
1178 g1p->record_concurrent_mark_remark_end();
1179 }
1182 #define CARD_BM_TEST_MODE 0
1184 class CalcLiveObjectsClosure: public HeapRegionClosure {
1186 CMBitMapRO* _bm;
1187 ConcurrentMark* _cm;
1188 COTracker* _co_tracker;
1189 bool _changed;
1190 bool _yield;
1191 size_t _words_done;
1192 size_t _tot_live;
1193 size_t _tot_used;
1194 size_t _regions_done;
1195 double _start_vtime_sec;
1197 BitMap* _region_bm;
1198 BitMap* _card_bm;
1199 intptr_t _bottom_card_num;
1200 bool _final;
1202 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1203 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1204 #if CARD_BM_TEST_MODE
1205 guarantee(_card_bm->at(i - _bottom_card_num),
1206 "Should already be set.");
1207 #else
1208 _card_bm->par_at_put(i - _bottom_card_num, 1);
1209 #endif
1210 }
1211 }
1213 public:
1214 CalcLiveObjectsClosure(bool final,
1215 CMBitMapRO *bm, ConcurrentMark *cm,
1216 BitMap* region_bm, BitMap* card_bm,
1217 COTracker* co_tracker) :
1218 _bm(bm), _cm(cm), _changed(false), _yield(true),
1219 _words_done(0), _tot_live(0), _tot_used(0),
1220 _region_bm(region_bm), _card_bm(card_bm),
1221 _final(final), _co_tracker(co_tracker),
1222 _regions_done(0), _start_vtime_sec(0.0)
1223 {
1224 _bottom_card_num =
1225 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1226 CardTableModRefBS::card_shift);
1227 }
1229 bool doHeapRegion(HeapRegion* hr) {
1230 if (_co_tracker != NULL)
1231 _co_tracker->update();
1233 if (!_final && _regions_done == 0)
1234 _start_vtime_sec = os::elapsedVTime();
1236 if (hr->continuesHumongous()) {
1237 HeapRegion* hum_start = hr->humongous_start_region();
1238 // If the head region of the humongous region has been determined
1239 // to be alive, then all the tail regions should be marked
1240 // such as well.
1241 if (_region_bm->at(hum_start->hrs_index())) {
1242 _region_bm->par_at_put(hr->hrs_index(), 1);
1243 }
1244 return false;
1245 }
1247 HeapWord* nextTop = hr->next_top_at_mark_start();
1248 HeapWord* start = hr->top_at_conc_mark_count();
1249 assert(hr->bottom() <= start && start <= hr->end() &&
1250 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1251 start <= nextTop,
1252 "Preconditions.");
1253 // Otherwise, record the number of word's we'll examine.
1254 size_t words_done = (nextTop - start);
1255 // Find the first marked object at or after "start".
1256 start = _bm->getNextMarkedWordAddress(start, nextTop);
1257 size_t marked_bytes = 0;
1259 // Below, the term "card num" means the result of shifting an address
1260 // by the card shift -- address 0 corresponds to card number 0. One
1261 // must subtract the card num of the bottom of the heap to obtain a
1262 // card table index.
1263 // The first card num of the sequence of live cards currently being
1264 // constructed. -1 ==> no sequence.
1265 intptr_t start_card_num = -1;
1266 // The last card num of the sequence of live cards currently being
1267 // constructed. -1 ==> no sequence.
1268 intptr_t last_card_num = -1;
1270 while (start < nextTop) {
1271 if (_yield && _cm->do_yield_check()) {
1272 // We yielded. It might be for a full collection, in which case
1273 // all bets are off; terminate the traversal.
1274 if (_cm->has_aborted()) {
1275 _changed = false;
1276 return true;
1277 } else {
1278 // Otherwise, it might be a collection pause, and the region
1279 // we're looking at might be in the collection set. We'll
1280 // abandon this region.
1281 return false;
1282 }
1283 }
1284 oop obj = oop(start);
1285 int obj_sz = obj->size();
1286 // The card num of the start of the current object.
1287 intptr_t obj_card_num =
1288 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1290 HeapWord* obj_last = start + obj_sz - 1;
1291 intptr_t obj_last_card_num =
1292 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1294 if (obj_card_num != last_card_num) {
1295 if (start_card_num == -1) {
1296 assert(last_card_num == -1, "Both or neither.");
1297 start_card_num = obj_card_num;
1298 } else {
1299 assert(last_card_num != -1, "Both or neither.");
1300 assert(obj_card_num >= last_card_num, "Inv");
1301 if ((obj_card_num - last_card_num) > 1) {
1302 // Mark the last run, and start a new one.
1303 mark_card_num_range(start_card_num, last_card_num);
1304 start_card_num = obj_card_num;
1305 }
1306 }
1307 #if CARD_BM_TEST_MODE
1308 /*
1309 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1310 obj_card_num - _bottom_card_num,
1311 obj_last_card_num - _bottom_card_num);
1312 */
1313 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1314 _card_bm->par_at_put(j - _bottom_card_num, 1);
1315 }
1316 #endif
1317 }
1318 // In any case, we set the last card num.
1319 last_card_num = obj_last_card_num;
1321 marked_bytes += obj_sz * HeapWordSize;
1322 // Find the next marked object after this one.
1323 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1324 _changed = true;
1325 }
1326 // Handle the last range, if any.
1327 if (start_card_num != -1)
1328 mark_card_num_range(start_card_num, last_card_num);
1329 if (_final) {
1330 // Mark the allocated-since-marking portion...
1331 HeapWord* tp = hr->top();
1332 if (nextTop < tp) {
1333 start_card_num =
1334 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1335 last_card_num =
1336 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1337 mark_card_num_range(start_card_num, last_card_num);
1338 // This definitely means the region has live objects.
1339 _region_bm->par_at_put(hr->hrs_index(), 1);
1340 }
1341 }
1343 hr->add_to_marked_bytes(marked_bytes);
1344 // Update the live region bitmap.
1345 if (marked_bytes > 0) {
1346 _region_bm->par_at_put(hr->hrs_index(), 1);
1347 }
1348 hr->set_top_at_conc_mark_count(nextTop);
1349 _tot_live += hr->next_live_bytes();
1350 _tot_used += hr->used();
1351 _words_done = words_done;
1353 if (!_final) {
1354 ++_regions_done;
1355 if (_regions_done % 10 == 0) {
1356 double end_vtime_sec = os::elapsedVTime();
1357 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1358 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1359 jlong sleep_time_ms =
1360 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1361 #if 0
1362 gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, "
1363 "overhead %1.4lf",
1364 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1365 _co_tracker->concOverhead(os::elapsedTime()));
1366 #endif
1367 os::sleep(Thread::current(), sleep_time_ms, false);
1368 _start_vtime_sec = end_vtime_sec;
1369 }
1370 }
1371 }
1373 return false;
1374 }
1376 bool changed() { return _changed; }
1377 void reset() { _changed = false; _words_done = 0; }
1378 void no_yield() { _yield = false; }
1379 size_t words_done() { return _words_done; }
1380 size_t tot_live() { return _tot_live; }
1381 size_t tot_used() { return _tot_used; }
1382 };
1385 void ConcurrentMark::calcDesiredRegions() {
1386 guarantee( _cleanup_co_tracker.enabled(), "invariant" );
1387 _cleanup_co_tracker.start();
1389 _region_bm.clear();
1390 _card_bm.clear();
1391 CalcLiveObjectsClosure calccl(false /*final*/,
1392 nextMarkBitMap(), this,
1393 &_region_bm, &_card_bm,
1394 &_cleanup_co_tracker);
1395 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1396 g1h->heap_region_iterate(&calccl);
1398 do {
1399 calccl.reset();
1400 g1h->heap_region_iterate(&calccl);
1401 } while (calccl.changed());
1403 _cleanup_co_tracker.update(true);
1404 }
1406 class G1ParFinalCountTask: public AbstractGangTask {
1407 protected:
1408 G1CollectedHeap* _g1h;
1409 CMBitMap* _bm;
1410 size_t _n_workers;
1411 size_t *_live_bytes;
1412 size_t *_used_bytes;
1413 BitMap* _region_bm;
1414 BitMap* _card_bm;
1415 public:
1416 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1417 BitMap* region_bm, BitMap* card_bm) :
1418 AbstractGangTask("G1 final counting"), _g1h(g1h),
1419 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1420 {
1421 if (ParallelGCThreads > 0)
1422 _n_workers = _g1h->workers()->total_workers();
1423 else
1424 _n_workers = 1;
1425 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1426 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1427 }
1429 ~G1ParFinalCountTask() {
1430 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1431 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1432 }
1434 void work(int i) {
1435 CalcLiveObjectsClosure calccl(true /*final*/,
1436 _bm, _g1h->concurrent_mark(),
1437 _region_bm, _card_bm,
1438 NULL /* CO tracker */);
1439 calccl.no_yield();
1440 if (ParallelGCThreads > 0) {
1441 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1442 HeapRegion::FinalCountClaimValue);
1443 } else {
1444 _g1h->heap_region_iterate(&calccl);
1445 }
1446 assert(calccl.complete(), "Shouldn't have yielded!");
1448 guarantee( (size_t)i < _n_workers, "invariant" );
1449 _live_bytes[i] = calccl.tot_live();
1450 _used_bytes[i] = calccl.tot_used();
1451 }
1452 size_t live_bytes() {
1453 size_t live_bytes = 0;
1454 for (size_t i = 0; i < _n_workers; ++i)
1455 live_bytes += _live_bytes[i];
1456 return live_bytes;
1457 }
1458 size_t used_bytes() {
1459 size_t used_bytes = 0;
1460 for (size_t i = 0; i < _n_workers; ++i)
1461 used_bytes += _used_bytes[i];
1462 return used_bytes;
1463 }
1464 };
1466 class G1ParNoteEndTask;
1468 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1469 G1CollectedHeap* _g1;
1470 int _worker_num;
1471 size_t _max_live_bytes;
1472 size_t _regions_claimed;
1473 size_t _freed_bytes;
1474 size_t _cleared_h_regions;
1475 size_t _freed_regions;
1476 UncleanRegionList* _unclean_region_list;
1477 double _claimed_region_time;
1478 double _max_region_time;
1480 public:
1481 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1482 UncleanRegionList* list,
1483 int worker_num);
1484 size_t freed_bytes() { return _freed_bytes; }
1485 size_t cleared_h_regions() { return _cleared_h_regions; }
1486 size_t freed_regions() { return _freed_regions; }
1487 UncleanRegionList* unclean_region_list() {
1488 return _unclean_region_list;
1489 }
1491 bool doHeapRegion(HeapRegion *r);
1493 size_t max_live_bytes() { return _max_live_bytes; }
1494 size_t regions_claimed() { return _regions_claimed; }
1495 double claimed_region_time_sec() { return _claimed_region_time; }
1496 double max_region_time_sec() { return _max_region_time; }
1497 };
1499 class G1ParNoteEndTask: public AbstractGangTask {
1500 friend class G1NoteEndOfConcMarkClosure;
1501 protected:
1502 G1CollectedHeap* _g1h;
1503 size_t _max_live_bytes;
1504 size_t _freed_bytes;
1505 ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1506 public:
1507 G1ParNoteEndTask(G1CollectedHeap* g1h,
1508 ConcurrentMark::ParCleanupThreadState**
1509 par_cleanup_thread_state) :
1510 AbstractGangTask("G1 note end"), _g1h(g1h),
1511 _max_live_bytes(0), _freed_bytes(0),
1512 _par_cleanup_thread_state(par_cleanup_thread_state)
1513 {}
1515 void work(int i) {
1516 double start = os::elapsedTime();
1517 G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1518 &_par_cleanup_thread_state[i]->list,
1519 i);
1520 if (ParallelGCThreads > 0) {
1521 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1522 HeapRegion::NoteEndClaimValue);
1523 } else {
1524 _g1h->heap_region_iterate(&g1_note_end);
1525 }
1526 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1528 // Now finish up freeing the current thread's regions.
1529 _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1530 g1_note_end.cleared_h_regions(),
1531 0, NULL);
1532 {
1533 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1534 _max_live_bytes += g1_note_end.max_live_bytes();
1535 _freed_bytes += g1_note_end.freed_bytes();
1536 }
1537 double end = os::elapsedTime();
1538 if (G1PrintParCleanupStats) {
1539 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1540 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1541 i, start, end, (end-start)*1000.0,
1542 g1_note_end.regions_claimed(),
1543 g1_note_end.claimed_region_time_sec()*1000.0,
1544 g1_note_end.max_region_time_sec()*1000.0);
1545 }
1546 }
1547 size_t max_live_bytes() { return _max_live_bytes; }
1548 size_t freed_bytes() { return _freed_bytes; }
1549 };
1551 class G1ParScrubRemSetTask: public AbstractGangTask {
1552 protected:
1553 G1RemSet* _g1rs;
1554 BitMap* _region_bm;
1555 BitMap* _card_bm;
1556 public:
1557 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1558 BitMap* region_bm, BitMap* card_bm) :
1559 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1560 _region_bm(region_bm), _card_bm(card_bm)
1561 {}
1563 void work(int i) {
1564 if (ParallelGCThreads > 0) {
1565 _g1rs->scrub_par(_region_bm, _card_bm, i,
1566 HeapRegion::ScrubRemSetClaimValue);
1567 } else {
1568 _g1rs->scrub(_region_bm, _card_bm);
1569 }
1570 }
1572 };
1574 G1NoteEndOfConcMarkClosure::
1575 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1576 UncleanRegionList* list,
1577 int worker_num)
1578 : _g1(g1), _worker_num(worker_num),
1579 _max_live_bytes(0), _regions_claimed(0),
1580 _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1581 _claimed_region_time(0.0), _max_region_time(0.0),
1582 _unclean_region_list(list)
1583 {}
1585 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1586 // We use a claim value of zero here because all regions
1587 // were claimed with value 1 in the FinalCount task.
1588 r->reset_gc_time_stamp();
1589 if (!r->continuesHumongous()) {
1590 double start = os::elapsedTime();
1591 _regions_claimed++;
1592 r->note_end_of_marking();
1593 _max_live_bytes += r->max_live_bytes();
1594 _g1->free_region_if_totally_empty_work(r,
1595 _freed_bytes,
1596 _cleared_h_regions,
1597 _freed_regions,
1598 _unclean_region_list,
1599 true /*par*/);
1600 double region_time = (os::elapsedTime() - start);
1601 _claimed_region_time += region_time;
1602 if (region_time > _max_region_time) _max_region_time = region_time;
1603 }
1604 return false;
1605 }
1607 void ConcurrentMark::cleanup() {
1608 // world is stopped at this checkpoint
1609 assert(SafepointSynchronize::is_at_safepoint(),
1610 "world should be stopped");
1611 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1613 // If a full collection has happened, we shouldn't do this.
1614 if (has_aborted()) {
1615 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1616 return;
1617 }
1619 _cleanup_co_tracker.disable();
1621 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1622 g1p->record_concurrent_mark_cleanup_start();
1624 double start = os::elapsedTime();
1625 GCOverheadReporter::recordSTWStart(start);
1627 // Do counting once more with the world stopped for good measure.
1628 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1629 &_region_bm, &_card_bm);
1630 if (ParallelGCThreads > 0) {
1631 assert(g1h->check_heap_region_claim_values(
1632 HeapRegion::InitialClaimValue),
1633 "sanity check");
1635 int n_workers = g1h->workers()->total_workers();
1636 g1h->set_par_threads(n_workers);
1637 g1h->workers()->run_task(&g1_par_count_task);
1638 g1h->set_par_threads(0);
1640 assert(g1h->check_heap_region_claim_values(
1641 HeapRegion::FinalCountClaimValue),
1642 "sanity check");
1643 } else {
1644 g1_par_count_task.work(0);
1645 }
1647 size_t known_garbage_bytes =
1648 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1649 #if 0
1650 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1651 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1652 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1653 (double) known_garbage_bytes / (double) (1024 * 1024));
1654 #endif // 0
1655 g1p->set_known_garbage_bytes(known_garbage_bytes);
1657 size_t start_used_bytes = g1h->used();
1658 _at_least_one_mark_complete = true;
1659 g1h->set_marking_complete();
1661 double count_end = os::elapsedTime();
1662 double this_final_counting_time = (count_end - start);
1663 if (G1PrintParCleanupStats) {
1664 gclog_or_tty->print_cr("Cleanup:");
1665 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1666 this_final_counting_time*1000.0);
1667 }
1668 _total_counting_time += this_final_counting_time;
1670 // Install newly created mark bitMap as "prev".
1671 swapMarkBitMaps();
1673 g1h->reset_gc_time_stamp();
1675 // Note end of marking in all heap regions.
1676 double note_end_start = os::elapsedTime();
1677 G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1678 if (ParallelGCThreads > 0) {
1679 int n_workers = g1h->workers()->total_workers();
1680 g1h->set_par_threads(n_workers);
1681 g1h->workers()->run_task(&g1_par_note_end_task);
1682 g1h->set_par_threads(0);
1684 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1685 "sanity check");
1686 } else {
1687 g1_par_note_end_task.work(0);
1688 }
1689 g1h->set_unclean_regions_coming(true);
1690 double note_end_end = os::elapsedTime();
1691 // Tell the mutators that there might be unclean regions coming...
1692 if (G1PrintParCleanupStats) {
1693 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1694 (note_end_end - note_end_start)*1000.0);
1695 }
1698 // call below, since it affects the metric by which we sort the heap
1699 // regions.
1700 if (G1ScrubRemSets) {
1701 double rs_scrub_start = os::elapsedTime();
1702 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1703 if (ParallelGCThreads > 0) {
1704 int n_workers = g1h->workers()->total_workers();
1705 g1h->set_par_threads(n_workers);
1706 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1707 g1h->set_par_threads(0);
1709 assert(g1h->check_heap_region_claim_values(
1710 HeapRegion::ScrubRemSetClaimValue),
1711 "sanity check");
1712 } else {
1713 g1_par_scrub_rs_task.work(0);
1714 }
1716 double rs_scrub_end = os::elapsedTime();
1717 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1718 _total_rs_scrub_time += this_rs_scrub_time;
1719 }
1721 // this will also free any regions totally full of garbage objects,
1722 // and sort the regions.
1723 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1724 g1_par_note_end_task.freed_bytes(),
1725 g1_par_note_end_task.max_live_bytes());
1727 // Statistics.
1728 double end = os::elapsedTime();
1729 _cleanup_times.add((end - start) * 1000.0);
1730 GCOverheadReporter::recordSTWEnd(end);
1732 // G1CollectedHeap::heap()->print();
1733 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1734 // G1CollectedHeap::heap()->get_gc_time_stamp());
1736 if (PrintGC || PrintGCDetails) {
1737 g1h->print_size_transition(gclog_or_tty,
1738 start_used_bytes,
1739 g1h->used(),
1740 g1h->capacity());
1741 }
1743 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1744 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1746 // We need to make this be a "collection" so any collection pause that
1747 // races with it goes around and waits for completeCleanup to finish.
1748 g1h->increment_total_collections();
1750 #ifndef PRODUCT
1751 if (VerifyDuringGC) {
1752 G1CollectedHeap::heap()->prepare_for_verify();
1753 G1CollectedHeap::heap()->verify(true,false);
1754 }
1755 #endif
1756 }
1758 void ConcurrentMark::completeCleanup() {
1759 // A full collection intervened.
1760 if (has_aborted()) return;
1762 int first = 0;
1763 int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1764 for (int t = 0; t < last; t++) {
1765 UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1766 assert(list->well_formed(), "Inv");
1767 HeapRegion* hd = list->hd();
1768 while (hd != NULL) {
1769 // Now finish up the other stuff.
1770 hd->rem_set()->clear();
1771 HeapRegion* next_hd = hd->next_from_unclean_list();
1772 (void)list->pop();
1773 guarantee(list->hd() == next_hd, "how not?");
1774 _g1h->put_region_on_unclean_list(hd);
1775 if (!hd->isHumongous()) {
1776 // Add this to the _free_regions count by 1.
1777 _g1h->finish_free_region_work(0, 0, 1, NULL);
1778 }
1779 hd = list->hd();
1780 guarantee(hd == next_hd, "how not?");
1781 }
1782 }
1783 }
1786 class G1CMIsAliveClosure: public BoolObjectClosure {
1787 G1CollectedHeap* _g1;
1788 public:
1789 G1CMIsAliveClosure(G1CollectedHeap* g1) :
1790 _g1(g1)
1791 {}
1793 void do_object(oop obj) {
1794 assert(false, "not to be invoked");
1795 }
1796 bool do_object_b(oop obj) {
1797 HeapWord* addr = (HeapWord*)obj;
1798 return addr != NULL &&
1799 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1800 }
1801 };
1803 class G1CMKeepAliveClosure: public OopClosure {
1804 G1CollectedHeap* _g1;
1805 ConcurrentMark* _cm;
1806 CMBitMap* _bitMap;
1807 public:
1808 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1809 CMBitMap* bitMap) :
1810 _g1(g1), _cm(cm),
1811 _bitMap(bitMap) {}
1813 void do_oop(narrowOop* p) {
1814 guarantee(false, "NYI");
1815 }
1817 void do_oop(oop* p) {
1818 oop thisOop = *p;
1819 HeapWord* addr = (HeapWord*)thisOop;
1820 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1821 _bitMap->mark(addr);
1822 _cm->mark_stack_push(thisOop);
1823 }
1824 }
1825 };
1827 class G1CMDrainMarkingStackClosure: public VoidClosure {
1828 CMMarkStack* _markStack;
1829 CMBitMap* _bitMap;
1830 G1CMKeepAliveClosure* _oopClosure;
1831 public:
1832 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1833 G1CMKeepAliveClosure* oopClosure) :
1834 _bitMap(bitMap),
1835 _markStack(markStack),
1836 _oopClosure(oopClosure)
1837 {}
1839 void do_void() {
1840 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1841 }
1842 };
1844 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1845 ResourceMark rm;
1846 HandleMark hm;
1847 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1848 ReferenceProcessor* rp = g1h->ref_processor();
1850 // Process weak references.
1851 rp->setup_policy(clear_all_soft_refs);
1852 assert(_markStack.isEmpty(), "mark stack should be empty");
1854 G1CMIsAliveClosure g1IsAliveClosure (g1h);
1855 G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1856 G1CMDrainMarkingStackClosure
1857 g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1858 &g1KeepAliveClosure);
1860 // XXXYYY Also: copy the parallel ref processing code from CMS.
1861 rp->process_discovered_references(&g1IsAliveClosure,
1862 &g1KeepAliveClosure,
1863 &g1DrainMarkingStackClosure,
1864 NULL);
1865 assert(_markStack.overflow() || _markStack.isEmpty(),
1866 "mark stack should be empty (unless it overflowed)");
1867 if (_markStack.overflow()) {
1868 set_has_overflown();
1869 }
1871 rp->enqueue_discovered_references();
1872 rp->verify_no_references_recorded();
1873 assert(!rp->discovery_enabled(), "should have been disabled");
1875 // Now clean up stale oops in SymbolTable and StringTable
1876 SymbolTable::unlink(&g1IsAliveClosure);
1877 StringTable::unlink(&g1IsAliveClosure);
1878 }
1880 void ConcurrentMark::swapMarkBitMaps() {
1881 CMBitMapRO* temp = _prevMarkBitMap;
1882 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1883 _nextMarkBitMap = (CMBitMap*) temp;
1884 }
1886 class CMRemarkTask: public AbstractGangTask {
1887 private:
1888 ConcurrentMark *_cm;
1890 public:
1891 void work(int worker_i) {
1892 // Since all available tasks are actually started, we should
1893 // only proceed if we're supposed to be actived.
1894 if ((size_t)worker_i < _cm->active_tasks()) {
1895 CMTask* task = _cm->task(worker_i);
1896 task->record_start_time();
1897 do {
1898 task->do_marking_step(1000000000.0 /* something very large */);
1899 } while (task->has_aborted() && !_cm->has_overflown());
1900 // If we overflow, then we do not want to restart. We instead
1901 // want to abort remark and do concurrent marking again.
1902 task->record_end_time();
1903 }
1904 }
1906 CMRemarkTask(ConcurrentMark* cm) :
1907 AbstractGangTask("Par Remark"), _cm(cm) { }
1908 };
1910 void ConcurrentMark::checkpointRootsFinalWork() {
1911 ResourceMark rm;
1912 HandleMark hm;
1913 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1915 g1h->ensure_parsability(false);
1917 if (ParallelGCThreads > 0) {
1918 g1h->change_strong_roots_parity();
1919 // this is remark, so we'll use up all available threads
1920 int active_workers = ParallelGCThreads;
1921 set_phase(active_workers, false);
1923 CMRemarkTask remarkTask(this);
1924 // We will start all available threads, even if we decide that the
1925 // active_workers will be fewer. The extra ones will just bail out
1926 // immediately.
1927 int n_workers = g1h->workers()->total_workers();
1928 g1h->set_par_threads(n_workers);
1929 g1h->workers()->run_task(&remarkTask);
1930 g1h->set_par_threads(0);
1932 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1933 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1934 } else {
1935 g1h->change_strong_roots_parity();
1936 // this is remark, so we'll use up all available threads
1937 int active_workers = 1;
1938 set_phase(active_workers, false);
1940 CMRemarkTask remarkTask(this);
1941 // We will start all available threads, even if we decide that the
1942 // active_workers will be fewer. The extra ones will just bail out
1943 // immediately.
1944 remarkTask.work(0);
1946 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1947 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
1948 }
1950 print_stats();
1952 if (!restart_for_overflow())
1953 set_non_marking_state();
1955 #if VERIFY_OBJS_PROCESSED
1956 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1957 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1958 _scan_obj_cl.objs_processed,
1959 ThreadLocalObjQueue::objs_enqueued);
1960 guarantee(_scan_obj_cl.objs_processed ==
1961 ThreadLocalObjQueue::objs_enqueued,
1962 "Different number of objs processed and enqueued.");
1963 }
1964 #endif
1965 }
1967 class ReachablePrinterOopClosure: public OopClosure {
1968 private:
1969 G1CollectedHeap* _g1h;
1970 CMBitMapRO* _bitmap;
1971 outputStream* _out;
1973 public:
1974 ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) :
1975 _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { }
1977 void do_oop(narrowOop* p) {
1978 guarantee(false, "NYI");
1979 }
1981 void do_oop(oop* p) {
1982 oop obj = *p;
1983 const char* str = NULL;
1984 const char* str2 = "";
1986 if (!_g1h->is_in_g1_reserved(obj))
1987 str = "outside G1 reserved";
1988 else {
1989 HeapRegion* hr = _g1h->heap_region_containing(obj);
1990 guarantee( hr != NULL, "invariant" );
1991 if (hr->obj_allocated_since_prev_marking(obj)) {
1992 str = "over TAMS";
1993 if (_bitmap->isMarked((HeapWord*) obj))
1994 str2 = " AND MARKED";
1995 } else if (_bitmap->isMarked((HeapWord*) obj))
1996 str = "marked";
1997 else
1998 str = "#### NOT MARKED ####";
1999 }
2001 _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s",
2002 p, (void*) obj, str, str2);
2003 }
2004 };
2006 class ReachablePrinterClosure: public BitMapClosure {
2007 private:
2008 CMBitMapRO* _bitmap;
2009 outputStream* _out;
2011 public:
2012 ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2013 _bitmap(bitmap), _out(out) { }
2015 bool do_bit(size_t offset) {
2016 HeapWord* addr = _bitmap->offsetToHeapWord(offset);
2017 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2019 _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset);
2020 oop(addr)->oop_iterate(&oopCl);
2021 _out->print_cr("");
2023 return true;
2024 }
2025 };
2027 class ObjInRegionReachablePrinterClosure : public ObjectClosure {
2028 private:
2029 CMBitMapRO* _bitmap;
2030 outputStream* _out;
2032 public:
2033 void do_object(oop o) {
2034 ReachablePrinterOopClosure oopCl(_bitmap, _out);
2036 _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o);
2037 o->oop_iterate(&oopCl);
2038 _out->print_cr("");
2039 }
2041 ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) :
2042 _bitmap(bitmap), _out(out) { }
2043 };
2045 class RegionReachablePrinterClosure : public HeapRegionClosure {
2046 private:
2047 CMBitMapRO* _bitmap;
2048 outputStream* _out;
2050 public:
2051 bool doHeapRegion(HeapRegion* hr) {
2052 HeapWord* b = hr->bottom();
2053 HeapWord* e = hr->end();
2054 HeapWord* t = hr->top();
2055 HeapWord* p = hr->prev_top_at_mark_start();
2056 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2057 "PTAMS: "PTR_FORMAT, b, e, t, p);
2058 _out->print_cr("");
2060 ObjInRegionReachablePrinterClosure ocl(_bitmap, _out);
2061 hr->object_iterate_mem_careful(MemRegion(p, t), &ocl);
2063 return false;
2064 }
2066 RegionReachablePrinterClosure(CMBitMapRO* bitmap,
2067 outputStream* out) :
2068 _bitmap(bitmap), _out(out) { }
2069 };
2071 void ConcurrentMark::print_prev_bitmap_reachable() {
2072 outputStream* out = gclog_or_tty;
2074 #if SEND_HEAP_DUMP_TO_FILE
2075 guarantee(heap_dump_file == NULL, "Protocol");
2076 char fn_buf[100];
2077 sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id());
2078 heap_dump_file = fopen(fn_buf, "w");
2079 fileStream fstream(heap_dump_file);
2080 out = &fstream;
2081 #endif // SEND_HEAP_DUMP_TO_FILE
2083 RegionReachablePrinterClosure rcl(_prevMarkBitMap, out);
2084 out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP");
2085 _g1h->heap_region_iterate(&rcl);
2086 out->print_cr("");
2088 ReachablePrinterClosure cl(_prevMarkBitMap, out);
2089 out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP");
2090 _prevMarkBitMap->iterate(&cl);
2091 out->print_cr("");
2093 #if SEND_HEAP_DUMP_TO_FILE
2094 fclose(heap_dump_file);
2095 heap_dump_file = NULL;
2096 #endif // SEND_HEAP_DUMP_TO_FILE
2097 }
2099 // This note is for drainAllSATBBuffers and the code in between.
2100 // In the future we could reuse a task to do this work during an
2101 // evacuation pause (since now tasks are not active and can be claimed
2102 // during an evacuation pause). This was a late change to the code and
2103 // is currently not being taken advantage of.
2105 class CMGlobalObjectClosure : public ObjectClosure {
2106 private:
2107 ConcurrentMark* _cm;
2109 public:
2110 void do_object(oop obj) {
2111 _cm->deal_with_reference(obj);
2112 }
2114 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2115 };
2117 void ConcurrentMark::deal_with_reference(oop obj) {
2118 if (verbose_high())
2119 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2120 (void*) obj);
2123 HeapWord* objAddr = (HeapWord*) obj;
2124 if (_g1h->is_in_g1_reserved(objAddr)) {
2125 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2126 HeapRegion* hr = _g1h->heap_region_containing(obj);
2127 if (_g1h->is_obj_ill(obj, hr)) {
2128 if (verbose_high())
2129 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2130 "marked", (void*) obj);
2132 // we need to mark it first
2133 if (_nextMarkBitMap->parMark(objAddr)) {
2134 // No OrderAccess:store_load() is needed. It is implicit in the
2135 // CAS done in parMark(objAddr) above
2136 HeapWord* finger = _finger;
2137 if (objAddr < finger) {
2138 if (verbose_high())
2139 gclog_or_tty->print_cr("[global] below the global finger "
2140 "("PTR_FORMAT"), pushing it", finger);
2141 if (!mark_stack_push(obj)) {
2142 if (verbose_low())
2143 gclog_or_tty->print_cr("[global] global stack overflow during "
2144 "deal_with_reference");
2145 }
2146 }
2147 }
2148 }
2149 }
2150 }
2152 void ConcurrentMark::drainAllSATBBuffers() {
2153 CMGlobalObjectClosure oc(this);
2154 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2155 satb_mq_set.set_closure(&oc);
2157 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2158 if (verbose_medium())
2159 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2160 }
2162 // no need to check whether we should do this, as this is only
2163 // called during an evacuation pause
2164 satb_mq_set.iterate_closure_all_threads();
2166 satb_mq_set.set_closure(NULL);
2167 guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" );
2168 }
2170 void ConcurrentMark::markPrev(oop p) {
2171 // Note we are overriding the read-only view of the prev map here, via
2172 // the cast.
2173 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2174 }
2176 void ConcurrentMark::clear(oop p) {
2177 assert(p != NULL && p->is_oop(), "expected an oop");
2178 HeapWord* addr = (HeapWord*)p;
2179 assert(addr >= _nextMarkBitMap->startWord() ||
2180 addr < _nextMarkBitMap->endWord(), "in a region");
2182 _nextMarkBitMap->clear(addr);
2183 }
2185 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2186 // Note we are overriding the read-only view of the prev map here, via
2187 // the cast.
2188 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2189 _nextMarkBitMap->clearRange(mr);
2190 }
2192 HeapRegion*
2193 ConcurrentMark::claim_region(int task_num) {
2194 // "checkpoint" the finger
2195 HeapWord* finger = _finger;
2197 // _heap_end will not change underneath our feet; it only changes at
2198 // yield points.
2199 while (finger < _heap_end) {
2200 tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" );
2202 // is the gap between reading the finger and doing the CAS too long?
2204 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2205 HeapWord* bottom = curr_region->bottom();
2206 HeapWord* end = curr_region->end();
2207 HeapWord* limit = curr_region->next_top_at_mark_start();
2209 if (verbose_low())
2210 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2211 "["PTR_FORMAT", "PTR_FORMAT"), "
2212 "limit = "PTR_FORMAT,
2213 task_num, curr_region, bottom, end, limit);
2215 HeapWord* res =
2216 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2217 if (res == finger) {
2218 // we succeeded
2220 // notice that _finger == end cannot be guaranteed here since,
2221 // someone else might have moved the finger even further
2222 guarantee( _finger >= end, "the finger should have moved forward" );
2224 if (verbose_low())
2225 gclog_or_tty->print_cr("[%d] we were successful with region = "
2226 PTR_FORMAT, task_num, curr_region);
2228 if (limit > bottom) {
2229 if (verbose_low())
2230 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2231 "returning it ", task_num, curr_region);
2232 return curr_region;
2233 } else {
2234 tmp_guarantee_CM( limit == bottom,
2235 "the region limit should be at bottom" );
2236 if (verbose_low())
2237 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2238 "returning NULL", task_num, curr_region);
2239 // we return NULL and the caller should try calling
2240 // claim_region() again.
2241 return NULL;
2242 }
2243 } else {
2244 guarantee( _finger > finger, "the finger should have moved forward" );
2245 if (verbose_low())
2246 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2247 "global finger = "PTR_FORMAT", "
2248 "our finger = "PTR_FORMAT,
2249 task_num, _finger, finger);
2251 // read it again
2252 finger = _finger;
2253 }
2254 }
2256 return NULL;
2257 }
2259 void ConcurrentMark::oops_do(OopClosure* cl) {
2260 if (_markStack.size() > 0 && verbose_low())
2261 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2262 "size = %d", _markStack.size());
2263 // we first iterate over the contents of the mark stack...
2264 _markStack.oops_do(cl);
2266 for (int i = 0; i < (int)_max_task_num; ++i) {
2267 OopTaskQueue* queue = _task_queues->queue((int)i);
2269 if (queue->size() > 0 && verbose_low())
2270 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2271 "size = %d", i, queue->size());
2273 // ...then over the contents of the all the task queues.
2274 queue->oops_do(cl);
2275 }
2277 // finally, invalidate any entries that in the region stack that
2278 // point into the collection set
2279 if (_regionStack.invalidate_entries_into_cset()) {
2280 // otherwise, any gray objects copied during the evacuation pause
2281 // might not be visited.
2282 guarantee( _should_gray_objects, "invariant" );
2283 }
2284 }
2286 void ConcurrentMark::clear_marking_state() {
2287 _markStack.setEmpty();
2288 _markStack.clear_overflow();
2289 _regionStack.setEmpty();
2290 _regionStack.clear_overflow();
2291 clear_has_overflown();
2292 _finger = _heap_start;
2294 for (int i = 0; i < (int)_max_task_num; ++i) {
2295 OopTaskQueue* queue = _task_queues->queue(i);
2296 queue->set_empty();
2297 }
2298 }
2300 void ConcurrentMark::print_stats() {
2301 if (verbose_stats()) {
2302 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2303 for (size_t i = 0; i < _active_tasks; ++i) {
2304 _tasks[i]->print_stats();
2305 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2306 }
2307 }
2308 }
2310 class CSMarkOopClosure: public OopClosure {
2311 friend class CSMarkBitMapClosure;
2313 G1CollectedHeap* _g1h;
2314 CMBitMap* _bm;
2315 ConcurrentMark* _cm;
2316 oop* _ms;
2317 jint* _array_ind_stack;
2318 int _ms_size;
2319 int _ms_ind;
2320 int _array_increment;
2322 bool push(oop obj, int arr_ind = 0) {
2323 if (_ms_ind == _ms_size) {
2324 gclog_or_tty->print_cr("Mark stack is full.");
2325 return false;
2326 }
2327 _ms[_ms_ind] = obj;
2328 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2329 _ms_ind++;
2330 return true;
2331 }
2333 oop pop() {
2334 if (_ms_ind == 0) return NULL;
2335 else {
2336 _ms_ind--;
2337 return _ms[_ms_ind];
2338 }
2339 }
2341 bool drain() {
2342 while (_ms_ind > 0) {
2343 oop obj = pop();
2344 assert(obj != NULL, "Since index was non-zero.");
2345 if (obj->is_objArray()) {
2346 jint arr_ind = _array_ind_stack[_ms_ind];
2347 objArrayOop aobj = objArrayOop(obj);
2348 jint len = aobj->length();
2349 jint next_arr_ind = arr_ind + _array_increment;
2350 if (next_arr_ind < len) {
2351 push(obj, next_arr_ind);
2352 }
2353 // Now process this portion of this one.
2354 int lim = MIN2(next_arr_ind, len);
2355 assert(!UseCompressedOops, "This needs to be fixed");
2356 for (int j = arr_ind; j < lim; j++) {
2357 do_oop(aobj->obj_at_addr<oop>(j));
2358 }
2360 } else {
2361 obj->oop_iterate(this);
2362 }
2363 if (abort()) return false;
2364 }
2365 return true;
2366 }
2368 public:
2369 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2370 _g1h(G1CollectedHeap::heap()),
2371 _cm(cm),
2372 _bm(cm->nextMarkBitMap()),
2373 _ms_size(ms_size), _ms_ind(0),
2374 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2375 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2376 _array_increment(MAX2(ms_size/8, 16))
2377 {}
2379 ~CSMarkOopClosure() {
2380 FREE_C_HEAP_ARRAY(oop, _ms);
2381 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2382 }
2384 void do_oop(narrowOop* p) {
2385 guarantee(false, "NYI");
2386 }
2388 void do_oop(oop* p) {
2389 oop obj = *p;
2390 if (obj == NULL) return;
2391 if (obj->is_forwarded()) {
2392 // If the object has already been forwarded, we have to make sure
2393 // that it's marked. So follow the forwarding pointer. Note that
2394 // this does the right thing for self-forwarding pointers in the
2395 // evacuation failure case.
2396 obj = obj->forwardee();
2397 }
2398 HeapRegion* hr = _g1h->heap_region_containing(obj);
2399 if (hr != NULL) {
2400 if (hr->in_collection_set()) {
2401 if (_g1h->is_obj_ill(obj)) {
2402 _bm->mark((HeapWord*)obj);
2403 if (!push(obj)) {
2404 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2405 set_abort();
2406 }
2407 }
2408 } else {
2409 // Outside the collection set; we need to gray it
2410 _cm->deal_with_reference(obj);
2411 }
2412 }
2413 }
2414 };
2416 class CSMarkBitMapClosure: public BitMapClosure {
2417 G1CollectedHeap* _g1h;
2418 CMBitMap* _bitMap;
2419 ConcurrentMark* _cm;
2420 CSMarkOopClosure _oop_cl;
2421 public:
2422 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2423 _g1h(G1CollectedHeap::heap()),
2424 _bitMap(cm->nextMarkBitMap()),
2425 _oop_cl(cm, ms_size)
2426 {}
2428 ~CSMarkBitMapClosure() {}
2430 bool do_bit(size_t offset) {
2431 // convert offset into a HeapWord*
2432 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2433 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2434 "address out of range");
2435 assert(_bitMap->isMarked(addr), "tautology");
2436 oop obj = oop(addr);
2437 if (!obj->is_forwarded()) {
2438 if (!_oop_cl.push(obj)) return false;
2439 if (!_oop_cl.drain()) return false;
2440 }
2441 // Otherwise...
2442 return true;
2443 }
2444 };
2447 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2448 CMBitMap* _bm;
2449 CSMarkBitMapClosure _bit_cl;
2450 enum SomePrivateConstants {
2451 MSSize = 1000
2452 };
2453 bool _completed;
2454 public:
2455 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2456 _bm(cm->nextMarkBitMap()),
2457 _bit_cl(cm, MSSize),
2458 _completed(true)
2459 {}
2461 ~CompleteMarkingInCSHRClosure() {}
2463 bool doHeapRegion(HeapRegion* r) {
2464 if (!r->evacuation_failed()) {
2465 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2466 if (!mr.is_empty()) {
2467 if (!_bm->iterate(&_bit_cl, mr)) {
2468 _completed = false;
2469 return true;
2470 }
2471 }
2472 }
2473 return false;
2474 }
2476 bool completed() { return _completed; }
2477 };
2479 class ClearMarksInHRClosure: public HeapRegionClosure {
2480 CMBitMap* _bm;
2481 public:
2482 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2484 bool doHeapRegion(HeapRegion* r) {
2485 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2486 MemRegion usedMR = r->used_region();
2487 _bm->clearRange(r->used_region());
2488 }
2489 return false;
2490 }
2491 };
2493 void ConcurrentMark::complete_marking_in_collection_set() {
2494 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2496 if (!g1h->mark_in_progress()) {
2497 g1h->g1_policy()->record_mark_closure_time(0.0);
2498 return;
2499 }
2501 int i = 1;
2502 double start = os::elapsedTime();
2503 while (true) {
2504 i++;
2505 CompleteMarkingInCSHRClosure cmplt(this);
2506 g1h->collection_set_iterate(&cmplt);
2507 if (cmplt.completed()) break;
2508 }
2509 double end_time = os::elapsedTime();
2510 double elapsed_time_ms = (end_time - start) * 1000.0;
2511 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2512 if (PrintGCDetails) {
2513 gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms);
2514 }
2516 ClearMarksInHRClosure clr(nextMarkBitMap());
2517 g1h->collection_set_iterate(&clr);
2518 }
2520 // The next two methods deal with the following optimisation. Some
2521 // objects are gray by being marked and located above the finger. If
2522 // they are copied, during an evacuation pause, below the finger then
2523 // the need to be pushed on the stack. The observation is that, if
2524 // there are no regions in the collection set located above the
2525 // finger, then the above cannot happen, hence we do not need to
2526 // explicitly gray any objects when copying them to below the
2527 // finger. The global stack will be scanned to ensure that, if it
2528 // points to objects being copied, it will update their
2529 // location. There is a tricky situation with the gray objects in
2530 // region stack that are being coped, however. See the comment in
2531 // newCSet().
2533 void ConcurrentMark::newCSet() {
2534 if (!concurrent_marking_in_progress())
2535 // nothing to do if marking is not in progress
2536 return;
2538 // find what the lowest finger is among the global and local fingers
2539 _min_finger = _finger;
2540 for (int i = 0; i < (int)_max_task_num; ++i) {
2541 CMTask* task = _tasks[i];
2542 HeapWord* task_finger = task->finger();
2543 if (task_finger != NULL && task_finger < _min_finger)
2544 _min_finger = task_finger;
2545 }
2547 _should_gray_objects = false;
2549 // This fixes a very subtle and fustrating bug. It might be the case
2550 // that, during en evacuation pause, heap regions that contain
2551 // objects that are gray (by being in regions contained in the
2552 // region stack) are included in the collection set. Since such gray
2553 // objects will be moved, and because it's not easy to redirect
2554 // region stack entries to point to a new location (because objects
2555 // in one region might be scattered to multiple regions after they
2556 // are copied), one option is to ensure that all marked objects
2557 // copied during a pause are pushed on the stack. Notice, however,
2558 // that this problem can only happen when the region stack is not
2559 // empty during an evacuation pause. So, we make the fix a bit less
2560 // conservative and ensure that regions are pushed on the stack,
2561 // irrespective whether all collection set regions are below the
2562 // finger, if the region stack is not empty. This is expected to be
2563 // a rare case, so I don't think it's necessary to be smarted about it.
2564 if (!region_stack_empty())
2565 _should_gray_objects = true;
2566 }
2568 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2569 if (!concurrent_marking_in_progress())
2570 return;
2572 HeapWord* region_end = hr->end();
2573 if (region_end > _min_finger)
2574 _should_gray_objects = true;
2575 }
2577 void ConcurrentMark::disable_co_trackers() {
2578 if (has_aborted()) {
2579 if (_cleanup_co_tracker.enabled())
2580 _cleanup_co_tracker.disable();
2581 for (int i = 0; i < (int)_max_task_num; ++i) {
2582 CMTask* task = _tasks[i];
2583 if (task->co_tracker_enabled())
2584 task->disable_co_tracker();
2585 }
2586 } else {
2587 guarantee( !_cleanup_co_tracker.enabled(), "invariant" );
2588 for (int i = 0; i < (int)_max_task_num; ++i) {
2589 CMTask* task = _tasks[i];
2590 guarantee( !task->co_tracker_enabled(), "invariant" );
2591 }
2592 }
2593 }
2595 // abandon current marking iteration due to a Full GC
2596 void ConcurrentMark::abort() {
2597 // If we're not marking, nothing to do.
2598 if (!G1ConcMark) return;
2600 // Clear all marks to force marking thread to do nothing
2601 _nextMarkBitMap->clearAll();
2602 // Empty mark stack
2603 clear_marking_state();
2604 for (int i = 0; i < (int)_max_task_num; ++i)
2605 _tasks[i]->clear_region_fields();
2606 _has_aborted = true;
2608 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2609 satb_mq_set.abandon_partial_marking();
2610 satb_mq_set.set_active_all_threads(false);
2611 }
2613 static void print_ms_time_info(const char* prefix, const char* name,
2614 NumberSeq& ns) {
2615 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2616 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2617 if (ns.num() > 0) {
2618 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2619 prefix, ns.sd(), ns.maximum());
2620 }
2621 }
2623 void ConcurrentMark::print_summary_info() {
2624 gclog_or_tty->print_cr(" Concurrent marking:");
2625 print_ms_time_info(" ", "init marks", _init_times);
2626 print_ms_time_info(" ", "remarks", _remark_times);
2627 {
2628 print_ms_time_info(" ", "final marks", _remark_mark_times);
2629 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2631 }
2632 print_ms_time_info(" ", "cleanups", _cleanup_times);
2633 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2634 _total_counting_time,
2635 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2636 (double)_cleanup_times.num()
2637 : 0.0));
2638 if (G1ScrubRemSets) {
2639 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2640 _total_rs_scrub_time,
2641 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2642 (double)_cleanup_times.num()
2643 : 0.0));
2644 }
2645 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2646 (_init_times.sum() + _remark_times.sum() +
2647 _cleanup_times.sum())/1000.0);
2648 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2649 "(%8.2f s marking, %8.2f s counting).",
2650 cmThread()->vtime_accum(),
2651 cmThread()->vtime_mark_accum(),
2652 cmThread()->vtime_count_accum());
2653 }
2655 // Closures
2656 // XXX: there seems to be a lot of code duplication here;
2657 // should refactor and consolidate the shared code.
2659 // This closure is used to mark refs into the CMS generation in
2660 // the CMS bit map. Called at the first checkpoint.
2662 // We take a break if someone is trying to stop the world.
2663 bool ConcurrentMark::do_yield_check(int worker_i) {
2664 if (should_yield()) {
2665 if (worker_i == 0)
2666 _g1h->g1_policy()->record_concurrent_pause();
2667 cmThread()->yield();
2668 if (worker_i == 0)
2669 _g1h->g1_policy()->record_concurrent_pause_end();
2670 return true;
2671 } else {
2672 return false;
2673 }
2674 }
2676 bool ConcurrentMark::should_yield() {
2677 return cmThread()->should_yield();
2678 }
2680 bool ConcurrentMark::containing_card_is_marked(void* p) {
2681 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2682 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2683 }
2685 bool ConcurrentMark::containing_cards_are_marked(void* start,
2686 void* last) {
2687 return
2688 containing_card_is_marked(start) &&
2689 containing_card_is_marked(last);
2690 }
2692 #ifndef PRODUCT
2693 // for debugging purposes
2694 void ConcurrentMark::print_finger() {
2695 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2696 _heap_start, _heap_end, _finger);
2697 for (int i = 0; i < (int) _max_task_num; ++i) {
2698 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2699 }
2700 gclog_or_tty->print_cr("");
2701 }
2702 #endif
2704 // Closure for iteration over bitmaps
2705 class CMBitMapClosure : public BitMapClosure {
2706 private:
2707 // the bitmap that is being iterated over
2708 CMBitMap* _nextMarkBitMap;
2709 ConcurrentMark* _cm;
2710 CMTask* _task;
2711 // true if we're scanning a heap region claimed by the task (so that
2712 // we move the finger along), false if we're not, i.e. currently when
2713 // scanning a heap region popped from the region stack (so that we
2714 // do not move the task finger along; it'd be a mistake if we did so).
2715 bool _scanning_heap_region;
2717 public:
2718 CMBitMapClosure(CMTask *task,
2719 ConcurrentMark* cm,
2720 CMBitMap* nextMarkBitMap)
2721 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2723 void set_scanning_heap_region(bool scanning_heap_region) {
2724 _scanning_heap_region = scanning_heap_region;
2725 }
2727 bool do_bit(size_t offset) {
2728 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2729 tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" );
2730 tmp_guarantee_CM( addr < _cm->finger(), "invariant" );
2732 if (_scanning_heap_region) {
2733 statsOnly( _task->increase_objs_found_on_bitmap() );
2734 tmp_guarantee_CM( addr >= _task->finger(), "invariant" );
2735 // We move that task's local finger along.
2736 _task->move_finger_to(addr);
2737 } else {
2738 // We move the task's region finger along.
2739 _task->move_region_finger_to(addr);
2740 }
2742 _task->scan_object(oop(addr));
2743 // we only partially drain the local queue and global stack
2744 _task->drain_local_queue(true);
2745 _task->drain_global_stack(true);
2747 // if the has_aborted flag has been raised, we need to bail out of
2748 // the iteration
2749 return !_task->has_aborted();
2750 }
2751 };
2753 // Closure for iterating over objects, currently only used for
2754 // processing SATB buffers.
2755 class CMObjectClosure : public ObjectClosure {
2756 private:
2757 CMTask* _task;
2759 public:
2760 void do_object(oop obj) {
2761 _task->deal_with_reference(obj);
2762 }
2764 CMObjectClosure(CMTask* task) : _task(task) { }
2765 };
2767 // Closure for iterating over object fields
2768 class CMOopClosure : public OopClosure {
2769 private:
2770 G1CollectedHeap* _g1h;
2771 ConcurrentMark* _cm;
2772 CMTask* _task;
2774 public:
2775 void do_oop(narrowOop* p) {
2776 guarantee(false, "NYI");
2777 }
2779 void do_oop(oop* p) {
2780 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" );
2782 oop obj = *p;
2783 if (_cm->verbose_high())
2784 gclog_or_tty->print_cr("[%d] we're looking at location "
2785 "*"PTR_FORMAT" = "PTR_FORMAT,
2786 _task->task_id(), p, (void*) obj);
2787 _task->deal_with_reference(obj);
2788 }
2790 CMOopClosure(G1CollectedHeap* g1h,
2791 ConcurrentMark* cm,
2792 CMTask* task)
2793 : _g1h(g1h), _cm(cm), _task(task) { }
2794 };
2796 void CMTask::setup_for_region(HeapRegion* hr) {
2797 tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(),
2798 "claim_region() should have filtered out continues humongous regions" );
2800 if (_cm->verbose_low())
2801 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2802 _task_id, hr);
2804 _curr_region = hr;
2805 _finger = hr->bottom();
2806 update_region_limit();
2807 }
2809 void CMTask::update_region_limit() {
2810 HeapRegion* hr = _curr_region;
2811 HeapWord* bottom = hr->bottom();
2812 HeapWord* limit = hr->next_top_at_mark_start();
2814 if (limit == bottom) {
2815 if (_cm->verbose_low())
2816 gclog_or_tty->print_cr("[%d] found an empty region "
2817 "["PTR_FORMAT", "PTR_FORMAT")",
2818 _task_id, bottom, limit);
2819 // The region was collected underneath our feet.
2820 // We set the finger to bottom to ensure that the bitmap
2821 // iteration that will follow this will not do anything.
2822 // (this is not a condition that holds when we set the region up,
2823 // as the region is not supposed to be empty in the first place)
2824 _finger = bottom;
2825 } else if (limit >= _region_limit) {
2826 tmp_guarantee_CM( limit >= _finger, "peace of mind" );
2827 } else {
2828 tmp_guarantee_CM( limit < _region_limit, "only way to get here" );
2829 // This can happen under some pretty unusual circumstances. An
2830 // evacuation pause empties the region underneath our feet (NTAMS
2831 // at bottom). We then do some allocation in the region (NTAMS
2832 // stays at bottom), followed by the region being used as a GC
2833 // alloc region (NTAMS will move to top() and the objects
2834 // originally below it will be grayed). All objects now marked in
2835 // the region are explicitly grayed, if below the global finger,
2836 // and we do not need in fact to scan anything else. So, we simply
2837 // set _finger to be limit to ensure that the bitmap iteration
2838 // doesn't do anything.
2839 _finger = limit;
2840 }
2842 _region_limit = limit;
2843 }
2845 void CMTask::giveup_current_region() {
2846 tmp_guarantee_CM( _curr_region != NULL, "invariant" );
2847 if (_cm->verbose_low())
2848 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2849 _task_id, _curr_region);
2850 clear_region_fields();
2851 }
2853 void CMTask::clear_region_fields() {
2854 // Values for these three fields that indicate that we're not
2855 // holding on to a region.
2856 _curr_region = NULL;
2857 _finger = NULL;
2858 _region_limit = NULL;
2860 _region_finger = NULL;
2861 }
2863 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2864 guarantee( nextMarkBitMap != NULL, "invariant" );
2866 if (_cm->verbose_low())
2867 gclog_or_tty->print_cr("[%d] resetting", _task_id);
2869 _nextMarkBitMap = nextMarkBitMap;
2870 clear_region_fields();
2872 _calls = 0;
2873 _elapsed_time_ms = 0.0;
2874 _termination_time_ms = 0.0;
2875 _termination_start_time_ms = 0.0;
2877 #if _MARKING_STATS_
2878 _local_pushes = 0;
2879 _local_pops = 0;
2880 _local_max_size = 0;
2881 _objs_scanned = 0;
2882 _global_pushes = 0;
2883 _global_pops = 0;
2884 _global_max_size = 0;
2885 _global_transfers_to = 0;
2886 _global_transfers_from = 0;
2887 _region_stack_pops = 0;
2888 _regions_claimed = 0;
2889 _objs_found_on_bitmap = 0;
2890 _satb_buffers_processed = 0;
2891 _steal_attempts = 0;
2892 _steals = 0;
2893 _aborted = 0;
2894 _aborted_overflow = 0;
2895 _aborted_cm_aborted = 0;
2896 _aborted_yield = 0;
2897 _aborted_timed_out = 0;
2898 _aborted_satb = 0;
2899 _aborted_termination = 0;
2900 #endif // _MARKING_STATS_
2901 }
2903 bool CMTask::should_exit_termination() {
2904 regular_clock_call();
2905 // This is called when we are in the termination protocol. We should
2906 // quit if, for some reason, this task wants to abort or the global
2907 // stack is not empty (this means that we can get work from it).
2908 return !_cm->mark_stack_empty() || has_aborted();
2909 }
2911 // This determines whether the method below will check both the local
2912 // and global fingers when determining whether to push on the stack a
2913 // gray object (value 1) or whether it will only check the global one
2914 // (value 0). The tradeoffs are that the former will be a bit more
2915 // accurate and possibly push less on the stack, but it might also be
2916 // a little bit slower.
2918 #define _CHECK_BOTH_FINGERS_ 1
2920 void CMTask::deal_with_reference(oop obj) {
2921 if (_cm->verbose_high())
2922 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
2923 _task_id, (void*) obj);
2925 ++_refs_reached;
2927 HeapWord* objAddr = (HeapWord*) obj;
2928 if (_g1h->is_in_g1_reserved(objAddr)) {
2929 tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" );
2930 HeapRegion* hr = _g1h->heap_region_containing(obj);
2931 if (_g1h->is_obj_ill(obj, hr)) {
2932 if (_cm->verbose_high())
2933 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
2934 _task_id, (void*) obj);
2936 // we need to mark it first
2937 if (_nextMarkBitMap->parMark(objAddr)) {
2938 // No OrderAccess:store_load() is needed. It is implicit in the
2939 // CAS done in parMark(objAddr) above
2940 HeapWord* global_finger = _cm->finger();
2942 #if _CHECK_BOTH_FINGERS_
2943 // we will check both the local and global fingers
2945 if (_finger != NULL && objAddr < _finger) {
2946 if (_cm->verbose_high())
2947 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
2948 "pushing it", _task_id, _finger);
2949 push(obj);
2950 } else if (_curr_region != NULL && objAddr < _region_limit) {
2951 // do nothing
2952 } else if (objAddr < global_finger) {
2953 // Notice that the global finger might be moving forward
2954 // concurrently. This is not a problem. In the worst case, we
2955 // mark the object while it is above the global finger and, by
2956 // the time we read the global finger, it has moved forward
2957 // passed this object. In this case, the object will probably
2958 // be visited when a task is scanning the region and will also
2959 // be pushed on the stack. So, some duplicate work, but no
2960 // correctness problems.
2962 if (_cm->verbose_high())
2963 gclog_or_tty->print_cr("[%d] below the global finger "
2964 "("PTR_FORMAT"), pushing it",
2965 _task_id, global_finger);
2966 push(obj);
2967 } else {
2968 // do nothing
2969 }
2970 #else // _CHECK_BOTH_FINGERS_
2971 // we will only check the global finger
2973 if (objAddr < global_finger) {
2974 // see long comment above
2976 if (_cm->verbose_high())
2977 gclog_or_tty->print_cr("[%d] below the global finger "
2978 "("PTR_FORMAT"), pushing it",
2979 _task_id, global_finger);
2980 push(obj);
2981 }
2982 #endif // _CHECK_BOTH_FINGERS_
2983 }
2984 }
2985 }
2986 }
2988 void CMTask::push(oop obj) {
2989 HeapWord* objAddr = (HeapWord*) obj;
2990 tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" );
2991 tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" );
2992 tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" );
2994 if (_cm->verbose_high())
2995 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
2997 if (!_task_queue->push(obj)) {
2998 // The local task queue looks full. We need to push some entries
2999 // to the global stack.
3001 if (_cm->verbose_medium())
3002 gclog_or_tty->print_cr("[%d] task queue overflow, "
3003 "moving entries to the global stack",
3004 _task_id);
3005 move_entries_to_global_stack();
3007 // this should succeed since, even if we overflow the global
3008 // stack, we should have definitely removed some entries from the
3009 // local queue. So, there must be space on it.
3010 bool success = _task_queue->push(obj);
3011 tmp_guarantee_CM( success, "invariant" );
3012 }
3014 statsOnly( int tmp_size = _task_queue->size();
3015 if (tmp_size > _local_max_size)
3016 _local_max_size = tmp_size;
3017 ++_local_pushes );
3018 }
3020 void CMTask::reached_limit() {
3021 tmp_guarantee_CM( _words_scanned >= _words_scanned_limit ||
3022 _refs_reached >= _refs_reached_limit ,
3023 "shouldn't have been called otherwise" );
3024 regular_clock_call();
3025 }
3027 void CMTask::regular_clock_call() {
3028 if (has_aborted())
3029 return;
3031 // First, we need to recalculate the words scanned and refs reached
3032 // limits for the next clock call.
3033 recalculate_limits();
3035 // During the regular clock call we do the following
3037 // (1) If an overflow has been flagged, then we abort.
3038 if (_cm->has_overflown()) {
3039 set_has_aborted();
3040 return;
3041 }
3043 // If we are not concurrent (i.e. we're doing remark) we don't need
3044 // to check anything else. The other steps are only needed during
3045 // the concurrent marking phase.
3046 if (!concurrent())
3047 return;
3049 // (2) If marking has been aborted for Full GC, then we also abort.
3050 if (_cm->has_aborted()) {
3051 set_has_aborted();
3052 statsOnly( ++_aborted_cm_aborted );
3053 return;
3054 }
3056 double curr_time_ms = os::elapsedVTime() * 1000.0;
3058 // (3) If marking stats are enabled, then we update the step history.
3059 #if _MARKING_STATS_
3060 if (_words_scanned >= _words_scanned_limit)
3061 ++_clock_due_to_scanning;
3062 if (_refs_reached >= _refs_reached_limit)
3063 ++_clock_due_to_marking;
3065 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3066 _interval_start_time_ms = curr_time_ms;
3067 _all_clock_intervals_ms.add(last_interval_ms);
3069 if (_cm->verbose_medium()) {
3070 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3071 "scanned = %d%s, refs reached = %d%s",
3072 _task_id, last_interval_ms,
3073 _words_scanned,
3074 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3075 _refs_reached,
3076 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3077 }
3078 #endif // _MARKING_STATS_
3080 // (4) We check whether we should yield. If we have to, then we abort.
3081 if (_cm->should_yield()) {
3082 // We should yield. To do this we abort the task. The caller is
3083 // responsible for yielding.
3084 set_has_aborted();
3085 statsOnly( ++_aborted_yield );
3086 return;
3087 }
3089 // (5) We check whether we've reached our time quota. If we have,
3090 // then we abort.
3091 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3092 if (elapsed_time_ms > _time_target_ms) {
3093 set_has_aborted();
3094 _has_aborted_timed_out = true;
3095 statsOnly( ++_aborted_timed_out );
3096 return;
3097 }
3099 // (6) Finally, we check whether there are enough completed STAB
3100 // buffers available for processing. If there are, we abort.
3101 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3102 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3103 if (_cm->verbose_low())
3104 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3105 _task_id);
3106 // we do need to process SATB buffers, we'll abort and restart
3107 // the marking task to do so
3108 set_has_aborted();
3109 statsOnly( ++_aborted_satb );
3110 return;
3111 }
3112 }
3114 void CMTask::recalculate_limits() {
3115 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3116 _words_scanned_limit = _real_words_scanned_limit;
3118 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3119 _refs_reached_limit = _real_refs_reached_limit;
3120 }
3122 void CMTask::decrease_limits() {
3123 // This is called when we believe that we're going to do an infrequent
3124 // operation which will increase the per byte scanned cost (i.e. move
3125 // entries to/from the global stack). It basically tries to decrease the
3126 // scanning limit so that the clock is called earlier.
3128 if (_cm->verbose_medium())
3129 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3131 _words_scanned_limit = _real_words_scanned_limit -
3132 3 * words_scanned_period / 4;
3133 _refs_reached_limit = _real_refs_reached_limit -
3134 3 * refs_reached_period / 4;
3135 }
3137 void CMTask::move_entries_to_global_stack() {
3138 // local array where we'll store the entries that will be popped
3139 // from the local queue
3140 oop buffer[global_stack_transfer_size];
3142 int n = 0;
3143 oop obj;
3144 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3145 buffer[n] = obj;
3146 ++n;
3147 }
3149 if (n > 0) {
3150 // we popped at least one entry from the local queue
3152 statsOnly( ++_global_transfers_to; _local_pops += n );
3154 if (!_cm->mark_stack_push(buffer, n)) {
3155 if (_cm->verbose_low())
3156 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3157 set_has_aborted();
3158 } else {
3159 // the transfer was successful
3161 if (_cm->verbose_medium())
3162 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3163 _task_id, n);
3164 statsOnly( int tmp_size = _cm->mark_stack_size();
3165 if (tmp_size > _global_max_size)
3166 _global_max_size = tmp_size;
3167 _global_pushes += n );
3168 }
3169 }
3171 // this operation was quite expensive, so decrease the limits
3172 decrease_limits();
3173 }
3175 void CMTask::get_entries_from_global_stack() {
3176 // local array where we'll store the entries that will be popped
3177 // from the global stack.
3178 oop buffer[global_stack_transfer_size];
3179 int n;
3180 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3181 tmp_guarantee_CM( n <= global_stack_transfer_size,
3182 "we should not pop more than the given limit" );
3183 if (n > 0) {
3184 // yes, we did actually pop at least one entry
3186 statsOnly( ++_global_transfers_from; _global_pops += n );
3187 if (_cm->verbose_medium())
3188 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3189 _task_id, n);
3190 for (int i = 0; i < n; ++i) {
3191 bool success = _task_queue->push(buffer[i]);
3192 // We only call this when the local queue is empty or under a
3193 // given target limit. So, we do not expect this push to fail.
3194 tmp_guarantee_CM( success, "invariant" );
3195 }
3197 statsOnly( int tmp_size = _task_queue->size();
3198 if (tmp_size > _local_max_size)
3199 _local_max_size = tmp_size;
3200 _local_pushes += n );
3201 }
3203 // this operation was quite expensive, so decrease the limits
3204 decrease_limits();
3205 }
3207 void CMTask::drain_local_queue(bool partially) {
3208 if (has_aborted())
3209 return;
3211 // Decide what the target size is, depending whether we're going to
3212 // drain it partially (so that other tasks can steal if they run out
3213 // of things to do) or totally (at the very end).
3214 size_t target_size;
3215 if (partially)
3216 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3217 else
3218 target_size = 0;
3220 if (_task_queue->size() > target_size) {
3221 if (_cm->verbose_high())
3222 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3223 _task_id, target_size);
3225 oop obj;
3226 bool ret = _task_queue->pop_local(obj);
3227 while (ret) {
3228 statsOnly( ++_local_pops );
3230 if (_cm->verbose_high())
3231 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3232 (void*) obj);
3234 tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj),
3235 "invariant" );
3237 scan_object(obj);
3239 if (_task_queue->size() <= target_size || has_aborted())
3240 ret = false;
3241 else
3242 ret = _task_queue->pop_local(obj);
3243 }
3245 if (_cm->verbose_high())
3246 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3247 _task_id, _task_queue->size());
3248 }
3249 }
3251 void CMTask::drain_global_stack(bool partially) {
3252 if (has_aborted())
3253 return;
3255 // We have a policy to drain the local queue before we attempt to
3256 // drain the global stack.
3257 tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" );
3259 // Decide what the target size is, depending whether we're going to
3260 // drain it partially (so that other tasks can steal if they run out
3261 // of things to do) or totally (at the very end). Notice that,
3262 // because we move entries from the global stack in chunks or
3263 // because another task might be doing the same, we might in fact
3264 // drop below the target. But, this is not a problem.
3265 size_t target_size;
3266 if (partially)
3267 target_size = _cm->partial_mark_stack_size_target();
3268 else
3269 target_size = 0;
3271 if (_cm->mark_stack_size() > target_size) {
3272 if (_cm->verbose_low())
3273 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3274 _task_id, target_size);
3276 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3277 get_entries_from_global_stack();
3278 drain_local_queue(partially);
3279 }
3281 if (_cm->verbose_low())
3282 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3283 _task_id, _cm->mark_stack_size());
3284 }
3285 }
3287 // SATB Queue has several assumptions on whether to call the par or
3288 // non-par versions of the methods. this is why some of the code is
3289 // replicated. We should really get rid of the single-threaded version
3290 // of the code to simplify things.
3291 void CMTask::drain_satb_buffers() {
3292 if (has_aborted())
3293 return;
3295 // We set this so that the regular clock knows that we're in the
3296 // middle of draining buffers and doesn't set the abort flag when it
3297 // notices that SATB buffers are available for draining. It'd be
3298 // very counter productive if it did that. :-)
3299 _draining_satb_buffers = true;
3301 CMObjectClosure oc(this);
3302 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3303 if (ParallelGCThreads > 0)
3304 satb_mq_set.set_par_closure(_task_id, &oc);
3305 else
3306 satb_mq_set.set_closure(&oc);
3308 // This keeps claiming and applying the closure to completed buffers
3309 // until we run out of buffers or we need to abort.
3310 if (ParallelGCThreads > 0) {
3311 while (!has_aborted() &&
3312 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3313 if (_cm->verbose_medium())
3314 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3315 statsOnly( ++_satb_buffers_processed );
3316 regular_clock_call();
3317 }
3318 } else {
3319 while (!has_aborted() &&
3320 satb_mq_set.apply_closure_to_completed_buffer()) {
3321 if (_cm->verbose_medium())
3322 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3323 statsOnly( ++_satb_buffers_processed );
3324 regular_clock_call();
3325 }
3326 }
3328 if (!concurrent() && !has_aborted()) {
3329 // We should only do this during remark.
3330 if (ParallelGCThreads > 0)
3331 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3332 else
3333 satb_mq_set.iterate_closure_all_threads();
3334 }
3336 _draining_satb_buffers = false;
3338 tmp_guarantee_CM( has_aborted() ||
3339 concurrent() ||
3340 satb_mq_set.completed_buffers_num() == 0, "invariant" );
3342 if (ParallelGCThreads > 0)
3343 satb_mq_set.set_par_closure(_task_id, NULL);
3344 else
3345 satb_mq_set.set_closure(NULL);
3347 // again, this was a potentially expensive operation, decrease the
3348 // limits to get the regular clock call early
3349 decrease_limits();
3350 }
3352 void CMTask::drain_region_stack(BitMapClosure* bc) {
3353 if (has_aborted())
3354 return;
3356 tmp_guarantee_CM( _region_finger == NULL,
3357 "it should be NULL when we're not scanning a region" );
3359 if (!_cm->region_stack_empty()) {
3360 if (_cm->verbose_low())
3361 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3362 _task_id, _cm->region_stack_size());
3364 MemRegion mr = _cm->region_stack_pop();
3365 // it returns MemRegion() if the pop fails
3366 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3368 while (mr.start() != NULL) {
3369 if (_cm->verbose_medium())
3370 gclog_or_tty->print_cr("[%d] we are scanning region "
3371 "["PTR_FORMAT", "PTR_FORMAT")",
3372 _task_id, mr.start(), mr.end());
3373 tmp_guarantee_CM( mr.end() <= _cm->finger(),
3374 "otherwise the region shouldn't be on the stack" );
3375 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3376 if (_nextMarkBitMap->iterate(bc, mr)) {
3377 tmp_guarantee_CM( !has_aborted(),
3378 "cannot abort the task without aborting the bitmap iteration" );
3380 // We finished iterating over the region without aborting.
3381 regular_clock_call();
3382 if (has_aborted())
3383 mr = MemRegion();
3384 else {
3385 mr = _cm->region_stack_pop();
3386 // it returns MemRegion() if the pop fails
3387 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3388 }
3389 } else {
3390 guarantee( has_aborted(), "currently the only way to do so" );
3392 // The only way to abort the bitmap iteration is to return
3393 // false from the do_bit() method. However, inside the
3394 // do_bit() method we move the _region_finger to point to the
3395 // object currently being looked at. So, if we bail out, we
3396 // have definitely set _region_finger to something non-null.
3397 guarantee( _region_finger != NULL, "invariant" );
3399 // The iteration was actually aborted. So now _region_finger
3400 // points to the address of the object we last scanned. If we
3401 // leave it there, when we restart this task, we will rescan
3402 // the object. It is easy to avoid this. We move the finger by
3403 // enough to point to the next possible object header (the
3404 // bitmap knows by how much we need to move it as it knows its
3405 // granularity).
3406 MemRegion newRegion =
3407 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3409 if (!newRegion.is_empty()) {
3410 if (_cm->verbose_low()) {
3411 gclog_or_tty->print_cr("[%d] pushing unscanned region"
3412 "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3413 _task_id,
3414 newRegion.start(), newRegion.end());
3415 }
3416 // Now push the part of the region we didn't scan on the
3417 // region stack to make sure a task scans it later.
3418 _cm->region_stack_push(newRegion);
3419 }
3420 // break from while
3421 mr = MemRegion();
3422 }
3423 _region_finger = NULL;
3424 }
3426 // We only push regions on the region stack during evacuation
3427 // pauses. So if we come out the above iteration because we region
3428 // stack is empty, it will remain empty until the next yield
3429 // point. So, the guarantee below is safe.
3430 guarantee( has_aborted() || _cm->region_stack_empty(),
3431 "only way to exit the loop" );
3433 if (_cm->verbose_low())
3434 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3435 _task_id, _cm->region_stack_size());
3436 }
3437 }
3439 void CMTask::print_stats() {
3440 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3441 _task_id, _calls);
3442 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3443 _elapsed_time_ms, _termination_time_ms);
3444 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3445 _step_times_ms.num(), _step_times_ms.avg(),
3446 _step_times_ms.sd());
3447 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3448 _step_times_ms.maximum(), _step_times_ms.sum());
3450 #if _MARKING_STATS_
3451 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3452 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3453 _all_clock_intervals_ms.sd());
3454 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3455 _all_clock_intervals_ms.maximum(),
3456 _all_clock_intervals_ms.sum());
3457 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3458 _clock_due_to_scanning, _clock_due_to_marking);
3459 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3460 _objs_scanned, _objs_found_on_bitmap);
3461 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3462 _local_pushes, _local_pops, _local_max_size);
3463 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3464 _global_pushes, _global_pops, _global_max_size);
3465 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3466 _global_transfers_to,_global_transfers_from);
3467 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3468 _regions_claimed, _region_stack_pops);
3469 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3470 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3471 _steal_attempts, _steals);
3472 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3473 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3474 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3475 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3476 _aborted_timed_out, _aborted_satb, _aborted_termination);
3477 #endif // _MARKING_STATS_
3478 }
3480 /*****************************************************************************
3482 The do_marking_step(time_target_ms) method is the building block
3483 of the parallel marking framework. It can be called in parallel
3484 with other invocations of do_marking_step() on different tasks
3485 (but only one per task, obviously) and concurrently with the
3486 mutator threads, or during remark, hence it eliminates the need
3487 for two versions of the code. When called during remark, it will
3488 pick up from where the task left off during the concurrent marking
3489 phase. Interestingly, tasks are also claimable during evacuation
3490 pauses too, since do_marking_step() ensures that it aborts before
3491 it needs to yield.
3493 The data structures that is uses to do marking work are the
3494 following:
3496 (1) Marking Bitmap. If there are gray objects that appear only
3497 on the bitmap (this happens either when dealing with an overflow
3498 or when the initial marking phase has simply marked the roots
3499 and didn't push them on the stack), then tasks claim heap
3500 regions whose bitmap they then scan to find gray objects. A
3501 global finger indicates where the end of the last claimed region
3502 is. A local finger indicates how far into the region a task has
3503 scanned. The two fingers are used to determine how to gray an
3504 object (i.e. whether simply marking it is OK, as it will be
3505 visited by a task in the future, or whether it needs to be also
3506 pushed on a stack).
3508 (2) Local Queue. The local queue of the task which is accessed
3509 reasonably efficiently by the task. Other tasks can steal from
3510 it when they run out of work. Throughout the marking phase, a
3511 task attempts to keep its local queue short but not totally
3512 empty, so that entries are available for stealing by other
3513 tasks. Only when there is no more work, a task will totally
3514 drain its local queue.
3516 (3) Global Mark Stack. This handles local queue overflow. During
3517 marking only sets of entries are moved between it and the local
3518 queues, as access to it requires a mutex and more fine-grain
3519 interaction with it which might cause contention. If it
3520 overflows, then the marking phase should restart and iterate
3521 over the bitmap to identify gray objects. Throughout the marking
3522 phase, tasks attempt to keep the global mark stack at a small
3523 length but not totally empty, so that entries are available for
3524 popping by other tasks. Only when there is no more work, tasks
3525 will totally drain the global mark stack.
3527 (4) Global Region Stack. Entries on it correspond to areas of
3528 the bitmap that need to be scanned since they contain gray
3529 objects. Pushes on the region stack only happen during
3530 evacuation pauses and typically correspond to areas covered by
3531 GC LABS. If it overflows, then the marking phase should restart
3532 and iterate over the bitmap to identify gray objects. Tasks will
3533 try to totally drain the region stack as soon as possible.
3535 (5) SATB Buffer Queue. This is where completed SATB buffers are
3536 made available. Buffers are regularly removed from this queue
3537 and scanned for roots, so that the queue doesn't get too
3538 long. During remark, all completed buffers are processed, as
3539 well as the filled in parts of any uncompleted buffers.
3541 The do_marking_step() method tries to abort when the time target
3542 has been reached. There are a few other cases when the
3543 do_marking_step() method also aborts:
3545 (1) When the marking phase has been aborted (after a Full GC).
3547 (2) When a global overflow (either on the global stack or the
3548 region stack) has been triggered. Before the task aborts, it
3549 will actually sync up with the other tasks to ensure that all
3550 the marking data structures (local queues, stacks, fingers etc.)
3551 are re-initialised so that when do_marking_step() completes,
3552 the marking phase can immediately restart.
3554 (3) When enough completed SATB buffers are available. The
3555 do_marking_step() method only tries to drain SATB buffers right
3556 at the beginning. So, if enough buffers are available, the
3557 marking step aborts and the SATB buffers are processed at
3558 the beginning of the next invocation.
3560 (4) To yield. when we have to yield then we abort and yield
3561 right at the end of do_marking_step(). This saves us from a lot
3562 of hassle as, by yielding we might allow a Full GC. If this
3563 happens then objects will be compacted underneath our feet, the
3564 heap might shrink, etc. We save checking for this by just
3565 aborting and doing the yield right at the end.
3567 From the above it follows that the do_marking_step() method should
3568 be called in a loop (or, otherwise, regularly) until it completes.
3570 If a marking step completes without its has_aborted() flag being
3571 true, it means it has completed the current marking phase (and
3572 also all other marking tasks have done so and have all synced up).
3574 A method called regular_clock_call() is invoked "regularly" (in
3575 sub ms intervals) throughout marking. It is this clock method that
3576 checks all the abort conditions which were mentioned above and
3577 decides when the task should abort. A work-based scheme is used to
3578 trigger this clock method: when the number of object words the
3579 marking phase has scanned or the number of references the marking
3580 phase has visited reach a given limit. Additional invocations to
3581 the method clock have been planted in a few other strategic places
3582 too. The initial reason for the clock method was to avoid calling
3583 vtime too regularly, as it is quite expensive. So, once it was in
3584 place, it was natural to piggy-back all the other conditions on it
3585 too and not constantly check them throughout the code.
3587 *****************************************************************************/
3589 void CMTask::do_marking_step(double time_target_ms) {
3590 guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" );
3591 guarantee( concurrent() == _cm->concurrent(), "they should be the same" );
3593 guarantee( concurrent() || _cm->region_stack_empty(),
3594 "the region stack should have been cleared before remark" );
3595 guarantee( _region_finger == NULL,
3596 "this should be non-null only when a region is being scanned" );
3598 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3599 guarantee( _task_queues != NULL, "invariant" );
3600 guarantee( _task_queue != NULL, "invariant" );
3601 guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" );
3603 guarantee( !_claimed,
3604 "only one thread should claim this task at any one time" );
3606 // OK, this doesn't safeguard again all possible scenarios, as it is
3607 // possible for two threads to set the _claimed flag at the same
3608 // time. But it is only for debugging purposes anyway and it will
3609 // catch most problems.
3610 _claimed = true;
3612 _start_time_ms = os::elapsedVTime() * 1000.0;
3613 statsOnly( _interval_start_time_ms = _start_time_ms );
3615 double diff_prediction_ms =
3616 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3617 _time_target_ms = time_target_ms - diff_prediction_ms;
3619 // set up the variables that are used in the work-based scheme to
3620 // call the regular clock method
3621 _words_scanned = 0;
3622 _refs_reached = 0;
3623 recalculate_limits();
3625 // clear all flags
3626 clear_has_aborted();
3627 _has_aborted_timed_out = false;
3628 _draining_satb_buffers = false;
3630 ++_calls;
3632 if (_cm->verbose_low())
3633 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3634 "target = %1.2lfms >>>>>>>>>>",
3635 _task_id, _calls, _time_target_ms);
3637 // Set up the bitmap and oop closures. Anything that uses them is
3638 // eventually called from this method, so it is OK to allocate these
3639 // statically.
3640 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3641 CMOopClosure oop_closure(_g1h, _cm, this);
3642 set_oop_closure(&oop_closure);
3644 if (_cm->has_overflown()) {
3645 // This can happen if the region stack or the mark stack overflows
3646 // during a GC pause and this task, after a yield point,
3647 // restarts. We have to abort as we need to get into the overflow
3648 // protocol which happens right at the end of this task.
3649 set_has_aborted();
3650 }
3652 // First drain any available SATB buffers. After this, we will not
3653 // look at SATB buffers before the next invocation of this method.
3654 // If enough completed SATB buffers are queued up, the regular clock
3655 // will abort this task so that it restarts.
3656 drain_satb_buffers();
3657 // ...then partially drain the local queue and the global stack
3658 drain_local_queue(true);
3659 drain_global_stack(true);
3661 // Then totally drain the region stack. We will not look at
3662 // it again before the next invocation of this method. Entries on
3663 // the region stack are only added during evacuation pauses, for
3664 // which we have to yield. When we do, we abort the task anyway so
3665 // it will look at the region stack again when it restarts.
3666 bitmap_closure.set_scanning_heap_region(false);
3667 drain_region_stack(&bitmap_closure);
3668 // ...then partially drain the local queue and the global stack
3669 drain_local_queue(true);
3670 drain_global_stack(true);
3672 do {
3673 if (!has_aborted() && _curr_region != NULL) {
3674 // This means that we're already holding on to a region.
3675 tmp_guarantee_CM( _finger != NULL,
3676 "if region is not NULL, then the finger "
3677 "should not be NULL either" );
3679 // We might have restarted this task after an evacuation pause
3680 // which might have evacuated the region we're holding on to
3681 // underneath our feet. Let's read its limit again to make sure
3682 // that we do not iterate over a region of the heap that
3683 // contains garbage (update_region_limit() will also move
3684 // _finger to the start of the region if it is found empty).
3685 update_region_limit();
3686 // We will start from _finger not from the start of the region,
3687 // as we might be restarting this task after aborting half-way
3688 // through scanning this region. In this case, _finger points to
3689 // the address where we last found a marked object. If this is a
3690 // fresh region, _finger points to start().
3691 MemRegion mr = MemRegion(_finger, _region_limit);
3693 if (_cm->verbose_low())
3694 gclog_or_tty->print_cr("[%d] we're scanning part "
3695 "["PTR_FORMAT", "PTR_FORMAT") "
3696 "of region "PTR_FORMAT,
3697 _task_id, _finger, _region_limit, _curr_region);
3699 // Let's iterate over the bitmap of the part of the
3700 // region that is left.
3701 bitmap_closure.set_scanning_heap_region(true);
3702 if (mr.is_empty() ||
3703 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3704 // We successfully completed iterating over the region. Now,
3705 // let's give up the region.
3706 giveup_current_region();
3707 regular_clock_call();
3708 } else {
3709 guarantee( has_aborted(), "currently the only way to do so" );
3710 // The only way to abort the bitmap iteration is to return
3711 // false from the do_bit() method. However, inside the
3712 // do_bit() method we move the _finger to point to the
3713 // object currently being looked at. So, if we bail out, we
3714 // have definitely set _finger to something non-null.
3715 guarantee( _finger != NULL, "invariant" );
3717 // Region iteration was actually aborted. So now _finger
3718 // points to the address of the object we last scanned. If we
3719 // leave it there, when we restart this task, we will rescan
3720 // the object. It is easy to avoid this. We move the finger by
3721 // enough to point to the next possible object header (the
3722 // bitmap knows by how much we need to move it as it knows its
3723 // granularity).
3724 move_finger_to(_nextMarkBitMap->nextWord(_finger));
3725 }
3726 }
3727 // At this point we have either completed iterating over the
3728 // region we were holding on to, or we have aborted.
3730 // We then partially drain the local queue and the global stack.
3731 // (Do we really need this?)
3732 drain_local_queue(true);
3733 drain_global_stack(true);
3735 // Read the note on the claim_region() method on why it might
3736 // return NULL with potentially more regions available for
3737 // claiming and why we have to check out_of_regions() to determine
3738 // whether we're done or not.
3739 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3740 // We are going to try to claim a new region. We should have
3741 // given up on the previous one.
3742 tmp_guarantee_CM( _curr_region == NULL &&
3743 _finger == NULL &&
3744 _region_limit == NULL, "invariant" );
3745 if (_cm->verbose_low())
3746 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3747 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3748 if (claimed_region != NULL) {
3749 // Yes, we managed to claim one
3750 statsOnly( ++_regions_claimed );
3752 if (_cm->verbose_low())
3753 gclog_or_tty->print_cr("[%d] we successfully claimed "
3754 "region "PTR_FORMAT,
3755 _task_id, claimed_region);
3757 setup_for_region(claimed_region);
3758 tmp_guarantee_CM( _curr_region == claimed_region, "invariant" );
3759 }
3760 // It is important to call the regular clock here. It might take
3761 // a while to claim a region if, for example, we hit a large
3762 // block of empty regions. So we need to call the regular clock
3763 // method once round the loop to make sure it's called
3764 // frequently enough.
3765 regular_clock_call();
3766 }
3768 if (!has_aborted() && _curr_region == NULL) {
3769 tmp_guarantee_CM( _cm->out_of_regions(),
3770 "at this point we should be out of regions" );
3771 }
3772 } while ( _curr_region != NULL && !has_aborted());
3774 if (!has_aborted()) {
3775 // We cannot check whether the global stack is empty, since other
3776 // tasks might be pushing objects to it concurrently. We also cannot
3777 // check if the region stack is empty because if a thread is aborting
3778 // it can push a partially done region back.
3779 tmp_guarantee_CM( _cm->out_of_regions(),
3780 "at this point we should be out of regions" );
3782 if (_cm->verbose_low())
3783 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3785 // Try to reduce the number of available SATB buffers so that
3786 // remark has less work to do.
3787 drain_satb_buffers();
3788 }
3790 // Since we've done everything else, we can now totally drain the
3791 // local queue and global stack.
3792 drain_local_queue(false);
3793 drain_global_stack(false);
3795 // Attempt at work stealing from other task's queues.
3796 if (!has_aborted()) {
3797 // We have not aborted. This means that we have finished all that
3798 // we could. Let's try to do some stealing...
3800 // We cannot check whether the global stack is empty, since other
3801 // tasks might be pushing objects to it concurrently. We also cannot
3802 // check if the region stack is empty because if a thread is aborting
3803 // it can push a partially done region back.
3804 guarantee( _cm->out_of_regions() &&
3805 _task_queue->size() == 0, "only way to reach here" );
3807 if (_cm->verbose_low())
3808 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3810 while (!has_aborted()) {
3811 oop obj;
3812 statsOnly( ++_steal_attempts );
3814 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3815 if (_cm->verbose_medium())
3816 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3817 _task_id, (void*) obj);
3819 statsOnly( ++_steals );
3821 tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj),
3822 "any stolen object should be marked" );
3823 scan_object(obj);
3825 // And since we're towards the end, let's totally drain the
3826 // local queue and global stack.
3827 drain_local_queue(false);
3828 drain_global_stack(false);
3829 } else {
3830 break;
3831 }
3832 }
3833 }
3835 // We still haven't aborted. Now, let's try to get into the
3836 // termination protocol.
3837 if (!has_aborted()) {
3838 // We cannot check whether the global stack is empty, since other
3839 // tasks might be concurrently pushing objects on it. We also cannot
3840 // check if the region stack is empty because if a thread is aborting
3841 // it can push a partially done region back.
3842 guarantee( _cm->out_of_regions() &&
3843 _task_queue->size() == 0, "only way to reach here" );
3845 if (_cm->verbose_low())
3846 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3848 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3849 // The CMTask class also extends the TerminatorTerminator class,
3850 // hence its should_exit_termination() method will also decide
3851 // whether to exit the termination protocol or not.
3852 bool finished = _cm->terminator()->offer_termination(this);
3853 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3854 _termination_time_ms +=
3855 termination_end_time_ms - _termination_start_time_ms;
3857 if (finished) {
3858 // We're all done.
3860 if (_task_id == 0) {
3861 // let's allow task 0 to do this
3862 if (concurrent()) {
3863 guarantee( _cm->concurrent_marking_in_progress(), "invariant" );
3864 // we need to set this to false before the next
3865 // safepoint. This way we ensure that the marking phase
3866 // doesn't observe any more heap expansions.
3867 _cm->clear_concurrent_marking_in_progress();
3868 }
3869 }
3871 // We can now guarantee that the global stack is empty, since
3872 // all other tasks have finished.
3873 guarantee( _cm->out_of_regions() &&
3874 _cm->region_stack_empty() &&
3875 _cm->mark_stack_empty() &&
3876 _task_queue->size() == 0 &&
3877 !_cm->has_overflown() &&
3878 !_cm->mark_stack_overflow() &&
3879 !_cm->region_stack_overflow(),
3880 "only way to reach here" );
3882 if (_cm->verbose_low())
3883 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3884 } else {
3885 // Apparently there's more work to do. Let's abort this task. It
3886 // will restart it and we can hopefully find more things to do.
3888 if (_cm->verbose_low())
3889 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3891 set_has_aborted();
3892 statsOnly( ++_aborted_termination );
3893 }
3894 }
3896 // Mainly for debugging purposes to make sure that a pointer to the
3897 // closure which was statically allocated in this frame doesn't
3898 // escape it by accident.
3899 set_oop_closure(NULL);
3900 double end_time_ms = os::elapsedVTime() * 1000.0;
3901 double elapsed_time_ms = end_time_ms - _start_time_ms;
3902 // Update the step history.
3903 _step_times_ms.add(elapsed_time_ms);
3905 if (has_aborted()) {
3906 // The task was aborted for some reason.
3908 statsOnly( ++_aborted );
3910 if (_has_aborted_timed_out) {
3911 double diff_ms = elapsed_time_ms - _time_target_ms;
3912 // Keep statistics of how well we did with respect to hitting
3913 // our target only if we actually timed out (if we aborted for
3914 // other reasons, then the results might get skewed).
3915 _marking_step_diffs_ms.add(diff_ms);
3916 }
3918 if (_cm->has_overflown()) {
3919 // This is the interesting one. We aborted because a global
3920 // overflow was raised. This means we have to restart the
3921 // marking phase and start iterating over regions. However, in
3922 // order to do this we have to make sure that all tasks stop
3923 // what they are doing and re-initialise in a safe manner. We
3924 // will achieve this with the use of two barrier sync points.
3926 if (_cm->verbose_low())
3927 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
3929 _cm->enter_first_sync_barrier(_task_id);
3930 // When we exit this sync barrier we know that all tasks have
3931 // stopped doing marking work. So, it's now safe to
3932 // re-initialise our data structures. At the end of this method,
3933 // task 0 will clear the global data structures.
3935 statsOnly( ++_aborted_overflow );
3937 // We clear the local state of this task...
3938 clear_region_fields();
3940 // ...and enter the second barrier.
3941 _cm->enter_second_sync_barrier(_task_id);
3942 // At this point everything has bee re-initialised and we're
3943 // ready to restart.
3944 }
3946 if (_cm->verbose_low()) {
3947 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3948 "elapsed = %1.2lfms <<<<<<<<<<",
3949 _task_id, _time_target_ms, elapsed_time_ms);
3950 if (_cm->has_aborted())
3951 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
3952 _task_id);
3953 }
3954 } else {
3955 if (_cm->verbose_low())
3956 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
3957 "elapsed = %1.2lfms <<<<<<<<<<",
3958 _task_id, _time_target_ms, elapsed_time_ms);
3959 }
3961 _claimed = false;
3962 }
3964 CMTask::CMTask(int task_id,
3965 ConcurrentMark* cm,
3966 CMTaskQueue* task_queue,
3967 CMTaskQueueSet* task_queues)
3968 : _g1h(G1CollectedHeap::heap()),
3969 _co_tracker(G1CMGroup),
3970 _task_id(task_id), _cm(cm),
3971 _claimed(false),
3972 _nextMarkBitMap(NULL), _hash_seed(17),
3973 _task_queue(task_queue),
3974 _task_queues(task_queues),
3975 _oop_closure(NULL) {
3976 guarantee( task_queue != NULL, "invariant" );
3977 guarantee( task_queues != NULL, "invariant" );
3979 statsOnly( _clock_due_to_scanning = 0;
3980 _clock_due_to_marking = 0 );
3982 _marking_step_diffs_ms.add(0.5);
3983 }