Wed, 19 Jan 2011 19:30:42 -0500
6977804: G1: remove the zero-filling thread
Summary: This changeset removes the zero-filling thread from G1 and collapses the two free region lists we had before (the "free" and "unclean" lists) into one. The new free list uses the new heap region sets / lists abstractions that we'll ultimately use it to keep track of all regions in the heap. A heap region set was also introduced for the humongous regions. Finally, this change increases the concurrency between the thread that completes freeing regions (after a cleanup pause) and the rest of the system (before we'd have to wait for said thread to complete before allocating a new region). The changest also includes a lot of refactoring and code simplification.
Reviewed-by: jcoomes, johnc
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1RemSet.hpp"
32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
34 #include "gc_implementation/shared/vmGCOperations.hpp"
35 #include "memory/genOopClosures.inline.hpp"
36 #include "memory/referencePolicy.hpp"
37 #include "memory/resourceArea.hpp"
38 #include "oops/oop.inline.hpp"
39 #include "runtime/handles.inline.hpp"
40 #include "runtime/java.hpp"
42 //
43 // CMS Bit Map Wrapper
45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
46 _bm((uintptr_t*)NULL,0),
47 _shifter(shifter) {
48 _bmStartWord = (HeapWord*)(rs.base());
49 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
50 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
51 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
53 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
54 // For now we'll just commit all of the bit map up fromt.
55 // Later on we'll try to be more parsimonious with swap.
56 guarantee(_virtual_space.initialize(brs, brs.size()),
57 "couldn't reseve backing store for CMS bit map");
58 assert(_virtual_space.committed_size() == brs.size(),
59 "didn't reserve backing store for all of CMS bit map?");
60 _bm.set_map((uintptr_t*)_virtual_space.low());
61 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
62 _bmWordSize, "inconsistency in bit map sizing");
63 _bm.set_size(_bmWordSize >> _shifter);
64 }
66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
67 HeapWord* limit) const {
68 // First we must round addr *up* to a possible object boundary.
69 addr = (HeapWord*)align_size_up((intptr_t)addr,
70 HeapWordSize << _shifter);
71 size_t addrOffset = heapWordToOffset(addr);
72 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
73 size_t limitOffset = heapWordToOffset(limit);
74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
75 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
76 assert(nextAddr >= addr, "get_next_one postcondition");
77 assert(nextAddr == limit || isMarked(nextAddr),
78 "get_next_one postcondition");
79 return nextAddr;
80 }
82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
83 HeapWord* limit) const {
84 size_t addrOffset = heapWordToOffset(addr);
85 if (limit == NULL) limit = _bmStartWord + _bmWordSize;
86 size_t limitOffset = heapWordToOffset(limit);
87 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
88 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
89 assert(nextAddr >= addr, "get_next_one postcondition");
90 assert(nextAddr == limit || !isMarked(nextAddr),
91 "get_next_one postcondition");
92 return nextAddr;
93 }
95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
96 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
97 return (int) (diff >> _shifter);
98 }
100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
101 HeapWord* left = MAX2(_bmStartWord, mr.start());
102 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
103 if (right > left) {
104 // Right-open interval [leftOffset, rightOffset).
105 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
106 } else {
107 return true;
108 }
109 }
111 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
112 size_t from_start_index,
113 HeapWord* to_start_word,
114 size_t word_num) {
115 _bm.mostly_disjoint_range_union(from_bitmap,
116 from_start_index,
117 heapWordToOffset(to_start_word),
118 word_num);
119 }
121 #ifndef PRODUCT
122 bool CMBitMapRO::covers(ReservedSpace rs) const {
123 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
124 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
125 "size inconsistency");
126 return _bmStartWord == (HeapWord*)(rs.base()) &&
127 _bmWordSize == rs.size()>>LogHeapWordSize;
128 }
129 #endif
131 void CMBitMap::clearAll() {
132 _bm.clear();
133 return;
134 }
136 void CMBitMap::markRange(MemRegion mr) {
137 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
138 assert(!mr.is_empty(), "unexpected empty region");
139 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
140 ((HeapWord *) mr.end())),
141 "markRange memory region end is not card aligned");
142 // convert address range into offset range
143 _bm.at_put_range(heapWordToOffset(mr.start()),
144 heapWordToOffset(mr.end()), true);
145 }
147 void CMBitMap::clearRange(MemRegion mr) {
148 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
149 assert(!mr.is_empty(), "unexpected empty region");
150 // convert address range into offset range
151 _bm.at_put_range(heapWordToOffset(mr.start()),
152 heapWordToOffset(mr.end()), false);
153 }
155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
156 HeapWord* end_addr) {
157 HeapWord* start = getNextMarkedWordAddress(addr);
158 start = MIN2(start, end_addr);
159 HeapWord* end = getNextUnmarkedWordAddress(start);
160 end = MIN2(end, end_addr);
161 assert(start <= end, "Consistency check");
162 MemRegion mr(start, end);
163 if (!mr.is_empty()) {
164 clearRange(mr);
165 }
166 return mr;
167 }
169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
170 _base(NULL), _cm(cm)
171 #ifdef ASSERT
172 , _drain_in_progress(false)
173 , _drain_in_progress_yields(false)
174 #endif
175 {}
177 void CMMarkStack::allocate(size_t size) {
178 _base = NEW_C_HEAP_ARRAY(oop, size);
179 if (_base == NULL)
180 vm_exit_during_initialization("Failed to allocate "
181 "CM region mark stack");
182 _index = 0;
183 // QQQQ cast ...
184 _capacity = (jint) size;
185 _oops_do_bound = -1;
186 NOT_PRODUCT(_max_depth = 0);
187 }
189 CMMarkStack::~CMMarkStack() {
190 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
191 }
193 void CMMarkStack::par_push(oop ptr) {
194 while (true) {
195 if (isFull()) {
196 _overflow = true;
197 return;
198 }
199 // Otherwise...
200 jint index = _index;
201 jint next_index = index+1;
202 jint res = Atomic::cmpxchg(next_index, &_index, index);
203 if (res == index) {
204 _base[index] = ptr;
205 // Note that we don't maintain this atomically. We could, but it
206 // doesn't seem necessary.
207 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
208 return;
209 }
210 // Otherwise, we need to try again.
211 }
212 }
214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
215 while (true) {
216 if (isFull()) {
217 _overflow = true;
218 return;
219 }
220 // Otherwise...
221 jint index = _index;
222 jint next_index = index + n;
223 if (next_index > _capacity) {
224 _overflow = true;
225 return;
226 }
227 jint res = Atomic::cmpxchg(next_index, &_index, index);
228 if (res == index) {
229 for (int i = 0; i < n; i++) {
230 int ind = index + i;
231 assert(ind < _capacity, "By overflow test above.");
232 _base[ind] = ptr_arr[i];
233 }
234 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
235 return;
236 }
237 // Otherwise, we need to try again.
238 }
239 }
242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
243 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
244 jint start = _index;
245 jint next_index = start + n;
246 if (next_index > _capacity) {
247 _overflow = true;
248 return;
249 }
250 // Otherwise.
251 _index = next_index;
252 for (int i = 0; i < n; i++) {
253 int ind = start + i;
254 assert(ind < _capacity, "By overflow test above.");
255 _base[ind] = ptr_arr[i];
256 }
257 }
260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
261 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
262 jint index = _index;
263 if (index == 0) {
264 *n = 0;
265 return false;
266 } else {
267 int k = MIN2(max, index);
268 jint new_ind = index - k;
269 for (int j = 0; j < k; j++) {
270 ptr_arr[j] = _base[new_ind + j];
271 }
272 _index = new_ind;
273 *n = k;
274 return true;
275 }
276 }
279 CMRegionStack::CMRegionStack() : _base(NULL) {}
281 void CMRegionStack::allocate(size_t size) {
282 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
283 if (_base == NULL)
284 vm_exit_during_initialization("Failed to allocate "
285 "CM region mark stack");
286 _index = 0;
287 // QQQQ cast ...
288 _capacity = (jint) size;
289 }
291 CMRegionStack::~CMRegionStack() {
292 if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
293 }
295 void CMRegionStack::push_lock_free(MemRegion mr) {
296 assert(mr.word_size() > 0, "Precondition");
297 while (true) {
298 jint index = _index;
300 if (index >= _capacity) {
301 _overflow = true;
302 return;
303 }
304 // Otherwise...
305 jint next_index = index+1;
306 jint res = Atomic::cmpxchg(next_index, &_index, index);
307 if (res == index) {
308 _base[index] = mr;
309 return;
310 }
311 // Otherwise, we need to try again.
312 }
313 }
315 // Lock-free pop of the region stack. Called during the concurrent
316 // marking / remark phases. Should only be called in tandem with
317 // other lock-free pops.
318 MemRegion CMRegionStack::pop_lock_free() {
319 while (true) {
320 jint index = _index;
322 if (index == 0) {
323 return MemRegion();
324 }
325 // Otherwise...
326 jint next_index = index-1;
327 jint res = Atomic::cmpxchg(next_index, &_index, index);
328 if (res == index) {
329 MemRegion mr = _base[next_index];
330 if (mr.start() != NULL) {
331 assert(mr.end() != NULL, "invariant");
332 assert(mr.word_size() > 0, "invariant");
333 return mr;
334 } else {
335 // that entry was invalidated... let's skip it
336 assert(mr.end() == NULL, "invariant");
337 }
338 }
339 // Otherwise, we need to try again.
340 }
341 }
343 #if 0
344 // The routines that manipulate the region stack with a lock are
345 // not currently used. They should be retained, however, as a
346 // diagnostic aid.
348 void CMRegionStack::push_with_lock(MemRegion mr) {
349 assert(mr.word_size() > 0, "Precondition");
350 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
352 if (isFull()) {
353 _overflow = true;
354 return;
355 }
357 _base[_index] = mr;
358 _index += 1;
359 }
361 MemRegion CMRegionStack::pop_with_lock() {
362 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
364 while (true) {
365 if (_index == 0) {
366 return MemRegion();
367 }
368 _index -= 1;
370 MemRegion mr = _base[_index];
371 if (mr.start() != NULL) {
372 assert(mr.end() != NULL, "invariant");
373 assert(mr.word_size() > 0, "invariant");
374 return mr;
375 } else {
376 // that entry was invalidated... let's skip it
377 assert(mr.end() == NULL, "invariant");
378 }
379 }
380 }
381 #endif
383 bool CMRegionStack::invalidate_entries_into_cset() {
384 bool result = false;
385 G1CollectedHeap* g1h = G1CollectedHeap::heap();
386 for (int i = 0; i < _oops_do_bound; ++i) {
387 MemRegion mr = _base[i];
388 if (mr.start() != NULL) {
389 assert(mr.end() != NULL, "invariant");
390 assert(mr.word_size() > 0, "invariant");
391 HeapRegion* hr = g1h->heap_region_containing(mr.start());
392 assert(hr != NULL, "invariant");
393 if (hr->in_collection_set()) {
394 // The region points into the collection set
395 _base[i] = MemRegion();
396 result = true;
397 }
398 } else {
399 // that entry was invalidated... let's skip it
400 assert(mr.end() == NULL, "invariant");
401 }
402 }
403 return result;
404 }
406 template<class OopClosureClass>
407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
408 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
409 || SafepointSynchronize::is_at_safepoint(),
410 "Drain recursion must be yield-safe.");
411 bool res = true;
412 debug_only(_drain_in_progress = true);
413 debug_only(_drain_in_progress_yields = yield_after);
414 while (!isEmpty()) {
415 oop newOop = pop();
416 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
417 assert(newOop->is_oop(), "Expected an oop");
418 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
419 "only grey objects on this stack");
420 // iterate over the oops in this oop, marking and pushing
421 // the ones in CMS generation.
422 newOop->oop_iterate(cl);
423 if (yield_after && _cm->do_yield_check()) {
424 res = false; break;
425 }
426 }
427 debug_only(_drain_in_progress = false);
428 return res;
429 }
431 void CMMarkStack::oops_do(OopClosure* f) {
432 if (_index == 0) return;
433 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
434 "Bound must be set.");
435 for (int i = 0; i < _oops_do_bound; i++) {
436 f->do_oop(&_base[i]);
437 }
438 _oops_do_bound = -1;
439 }
441 bool ConcurrentMark::not_yet_marked(oop obj) const {
442 return (_g1h->is_obj_ill(obj)
443 || (_g1h->is_in_permanent(obj)
444 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
445 }
447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
449 #endif // _MSC_VER
451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
452 int max_regions) :
453 _markBitMap1(rs, MinObjAlignment - 1),
454 _markBitMap2(rs, MinObjAlignment - 1),
456 _parallel_marking_threads(0),
457 _sleep_factor(0.0),
458 _marking_task_overhead(1.0),
459 _cleanup_sleep_factor(0.0),
460 _cleanup_task_overhead(1.0),
461 _cleanup_list("Cleanup List"),
462 _region_bm(max_regions, false /* in_resource_area*/),
463 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
464 CardTableModRefBS::card_shift,
465 false /* in_resource_area*/),
466 _prevMarkBitMap(&_markBitMap1),
467 _nextMarkBitMap(&_markBitMap2),
468 _at_least_one_mark_complete(false),
470 _markStack(this),
471 _regionStack(),
472 // _finger set in set_non_marking_state
474 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
475 // _active_tasks set in set_non_marking_state
476 // _tasks set inside the constructor
477 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
478 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
480 _has_overflown(false),
481 _concurrent(false),
482 _has_aborted(false),
483 _restart_for_overflow(false),
484 _concurrent_marking_in_progress(false),
485 _should_gray_objects(false),
487 // _verbose_level set below
489 _init_times(),
490 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
491 _cleanup_times(),
492 _total_counting_time(0.0),
493 _total_rs_scrub_time(0.0),
495 _parallel_workers(NULL)
496 {
497 CMVerboseLevel verbose_level =
498 (CMVerboseLevel) G1MarkingVerboseLevel;
499 if (verbose_level < no_verbose)
500 verbose_level = no_verbose;
501 if (verbose_level > high_verbose)
502 verbose_level = high_verbose;
503 _verbose_level = verbose_level;
505 if (verbose_low())
506 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
507 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
509 _markStack.allocate(MarkStackSize);
510 _regionStack.allocate(G1MarkRegionStackSize);
512 // Create & start a ConcurrentMark thread.
513 _cmThread = new ConcurrentMarkThread(this);
514 assert(cmThread() != NULL, "CM Thread should have been created");
515 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
517 _g1h = G1CollectedHeap::heap();
518 assert(CGC_lock != NULL, "Where's the CGC_lock?");
519 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
520 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
522 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
523 satb_qs.set_buffer_size(G1SATBBufferSize);
525 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
526 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
528 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
529 _active_tasks = _max_task_num;
530 for (int i = 0; i < (int) _max_task_num; ++i) {
531 CMTaskQueue* task_queue = new CMTaskQueue();
532 task_queue->initialize();
533 _task_queues->register_queue(i, task_queue);
535 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
536 _accum_task_vtime[i] = 0.0;
537 }
539 if (ConcGCThreads > ParallelGCThreads) {
540 vm_exit_during_initialization("Can't have more ConcGCThreads "
541 "than ParallelGCThreads.");
542 }
543 if (ParallelGCThreads == 0) {
544 // if we are not running with any parallel GC threads we will not
545 // spawn any marking threads either
546 _parallel_marking_threads = 0;
547 _sleep_factor = 0.0;
548 _marking_task_overhead = 1.0;
549 } else {
550 if (ConcGCThreads > 0) {
551 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
552 // if both are set
554 _parallel_marking_threads = ConcGCThreads;
555 _sleep_factor = 0.0;
556 _marking_task_overhead = 1.0;
557 } else if (G1MarkingOverheadPercent > 0) {
558 // we will calculate the number of parallel marking threads
559 // based on a target overhead with respect to the soft real-time
560 // goal
562 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
563 double overall_cm_overhead =
564 (double) MaxGCPauseMillis * marking_overhead /
565 (double) GCPauseIntervalMillis;
566 double cpu_ratio = 1.0 / (double) os::processor_count();
567 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
568 double marking_task_overhead =
569 overall_cm_overhead / marking_thread_num *
570 (double) os::processor_count();
571 double sleep_factor =
572 (1.0 - marking_task_overhead) / marking_task_overhead;
574 _parallel_marking_threads = (size_t) marking_thread_num;
575 _sleep_factor = sleep_factor;
576 _marking_task_overhead = marking_task_overhead;
577 } else {
578 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
579 _sleep_factor = 0.0;
580 _marking_task_overhead = 1.0;
581 }
583 if (parallel_marking_threads() > 1)
584 _cleanup_task_overhead = 1.0;
585 else
586 _cleanup_task_overhead = marking_task_overhead();
587 _cleanup_sleep_factor =
588 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
590 #if 0
591 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
592 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
593 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
594 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
595 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
596 #endif
598 guarantee(parallel_marking_threads() > 0, "peace of mind");
599 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
600 (int) _parallel_marking_threads, false, true);
601 if (_parallel_workers == NULL) {
602 vm_exit_during_initialization("Failed necessary allocation.");
603 } else {
604 _parallel_workers->initialize_workers();
605 }
606 }
608 // so that the call below can read a sensible value
609 _heap_start = (HeapWord*) rs.base();
610 set_non_marking_state();
611 }
613 void ConcurrentMark::update_g1_committed(bool force) {
614 // If concurrent marking is not in progress, then we do not need to
615 // update _heap_end. This has a subtle and important
616 // side-effect. Imagine that two evacuation pauses happen between
617 // marking completion and remark. The first one can grow the
618 // heap (hence now the finger is below the heap end). Then, the
619 // second one could unnecessarily push regions on the region
620 // stack. This causes the invariant that the region stack is empty
621 // at the beginning of remark to be false. By ensuring that we do
622 // not observe heap expansions after marking is complete, then we do
623 // not have this problem.
624 if (!concurrent_marking_in_progress() && !force)
625 return;
627 MemRegion committed = _g1h->g1_committed();
628 assert(committed.start() == _heap_start, "start shouldn't change");
629 HeapWord* new_end = committed.end();
630 if (new_end > _heap_end) {
631 // The heap has been expanded.
633 _heap_end = new_end;
634 }
635 // Notice that the heap can also shrink. However, this only happens
636 // during a Full GC (at least currently) and the entire marking
637 // phase will bail out and the task will not be restarted. So, let's
638 // do nothing.
639 }
641 void ConcurrentMark::reset() {
642 // Starting values for these two. This should be called in a STW
643 // phase. CM will be notified of any future g1_committed expansions
644 // will be at the end of evacuation pauses, when tasks are
645 // inactive.
646 MemRegion committed = _g1h->g1_committed();
647 _heap_start = committed.start();
648 _heap_end = committed.end();
650 // Separated the asserts so that we know which one fires.
651 assert(_heap_start != NULL, "heap bounds should look ok");
652 assert(_heap_end != NULL, "heap bounds should look ok");
653 assert(_heap_start < _heap_end, "heap bounds should look ok");
655 // reset all the marking data structures and any necessary flags
656 clear_marking_state();
658 if (verbose_low())
659 gclog_or_tty->print_cr("[global] resetting");
661 // We do reset all of them, since different phases will use
662 // different number of active threads. So, it's easiest to have all
663 // of them ready.
664 for (int i = 0; i < (int) _max_task_num; ++i) {
665 _tasks[i]->reset(_nextMarkBitMap);
666 }
668 // we need this to make sure that the flag is on during the evac
669 // pause with initial mark piggy-backed
670 set_concurrent_marking_in_progress();
671 }
673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
674 assert(active_tasks <= _max_task_num, "we should not have more");
676 _active_tasks = active_tasks;
677 // Need to update the three data structures below according to the
678 // number of active threads for this phase.
679 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
680 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
681 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
683 _concurrent = concurrent;
684 // We propagate this to all tasks, not just the active ones.
685 for (int i = 0; i < (int) _max_task_num; ++i)
686 _tasks[i]->set_concurrent(concurrent);
688 if (concurrent) {
689 set_concurrent_marking_in_progress();
690 } else {
691 // We currently assume that the concurrent flag has been set to
692 // false before we start remark. At this point we should also be
693 // in a STW phase.
694 assert(!concurrent_marking_in_progress(), "invariant");
695 assert(_finger == _heap_end, "only way to get here");
696 update_g1_committed(true);
697 }
698 }
700 void ConcurrentMark::set_non_marking_state() {
701 // We set the global marking state to some default values when we're
702 // not doing marking.
703 clear_marking_state();
704 _active_tasks = 0;
705 clear_concurrent_marking_in_progress();
706 }
708 ConcurrentMark::~ConcurrentMark() {
709 for (int i = 0; i < (int) _max_task_num; ++i) {
710 delete _task_queues->queue(i);
711 delete _tasks[i];
712 }
713 delete _task_queues;
714 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
715 }
717 // This closure is used to mark refs into the g1 generation
718 // from external roots in the CMS bit map.
719 // Called at the first checkpoint.
720 //
722 void ConcurrentMark::clearNextBitmap() {
723 G1CollectedHeap* g1h = G1CollectedHeap::heap();
724 G1CollectorPolicy* g1p = g1h->g1_policy();
726 // Make sure that the concurrent mark thread looks to still be in
727 // the current cycle.
728 guarantee(cmThread()->during_cycle(), "invariant");
730 // We are finishing up the current cycle by clearing the next
731 // marking bitmap and getting it ready for the next cycle. During
732 // this time no other cycle can start. So, let's make sure that this
733 // is the case.
734 guarantee(!g1h->mark_in_progress(), "invariant");
736 // clear the mark bitmap (no grey objects to start with).
737 // We need to do this in chunks and offer to yield in between
738 // each chunk.
739 HeapWord* start = _nextMarkBitMap->startWord();
740 HeapWord* end = _nextMarkBitMap->endWord();
741 HeapWord* cur = start;
742 size_t chunkSize = M;
743 while (cur < end) {
744 HeapWord* next = cur + chunkSize;
745 if (next > end)
746 next = end;
747 MemRegion mr(cur,next);
748 _nextMarkBitMap->clearRange(mr);
749 cur = next;
750 do_yield_check();
752 // Repeat the asserts from above. We'll do them as asserts here to
753 // minimize their overhead on the product. However, we'll have
754 // them as guarantees at the beginning / end of the bitmap
755 // clearing to get some checking in the product.
756 assert(cmThread()->during_cycle(), "invariant");
757 assert(!g1h->mark_in_progress(), "invariant");
758 }
760 // Repeat the asserts from above.
761 guarantee(cmThread()->during_cycle(), "invariant");
762 guarantee(!g1h->mark_in_progress(), "invariant");
763 }
765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
766 public:
767 bool doHeapRegion(HeapRegion* r) {
768 if (!r->continuesHumongous()) {
769 r->note_start_of_marking(true);
770 }
771 return false;
772 }
773 };
775 void ConcurrentMark::checkpointRootsInitialPre() {
776 G1CollectedHeap* g1h = G1CollectedHeap::heap();
777 G1CollectorPolicy* g1p = g1h->g1_policy();
779 _has_aborted = false;
781 #ifndef PRODUCT
782 if (G1PrintReachableAtInitialMark) {
783 print_reachable("at-cycle-start",
784 true /* use_prev_marking */, true /* all */);
785 }
786 #endif
788 // Initialise marking structures. This has to be done in a STW phase.
789 reset();
790 }
792 class CMMarkRootsClosure: public OopsInGenClosure {
793 private:
794 ConcurrentMark* _cm;
795 G1CollectedHeap* _g1h;
796 bool _do_barrier;
798 public:
799 CMMarkRootsClosure(ConcurrentMark* cm,
800 G1CollectedHeap* g1h,
801 bool do_barrier) : _cm(cm), _g1h(g1h),
802 _do_barrier(do_barrier) { }
804 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
805 virtual void do_oop( oop* p) { do_oop_work(p); }
807 template <class T> void do_oop_work(T* p) {
808 T heap_oop = oopDesc::load_heap_oop(p);
809 if (!oopDesc::is_null(heap_oop)) {
810 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
811 assert(obj->is_oop() || obj->mark() == NULL,
812 "expected an oop, possibly with mark word displaced");
813 HeapWord* addr = (HeapWord*)obj;
814 if (_g1h->is_in_g1_reserved(addr)) {
815 _cm->grayRoot(obj);
816 }
817 }
818 if (_do_barrier) {
819 assert(!_g1h->is_in_g1_reserved(p),
820 "Should be called on external roots");
821 do_barrier(p);
822 }
823 }
824 };
826 void ConcurrentMark::checkpointRootsInitialPost() {
827 G1CollectedHeap* g1h = G1CollectedHeap::heap();
829 // For each region note start of marking.
830 NoteStartOfMarkHRClosure startcl;
831 g1h->heap_region_iterate(&startcl);
833 // Start weak-reference discovery.
834 ReferenceProcessor* rp = g1h->ref_processor();
835 rp->verify_no_references_recorded();
836 rp->enable_discovery(); // enable ("weak") refs discovery
837 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
839 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
840 // This is the start of the marking cycle, we're expected all
841 // threads to have SATB queues with active set to false.
842 satb_mq_set.set_active_all_threads(true, /* new active value */
843 false /* expected_active */);
845 // update_g1_committed() will be called at the end of an evac pause
846 // when marking is on. So, it's also called at the end of the
847 // initial-mark pause to update the heap end, if the heap expands
848 // during it. No need to call it here.
849 }
851 // Checkpoint the roots into this generation from outside
852 // this generation. [Note this initial checkpoint need only
853 // be approximate -- we'll do a catch up phase subsequently.]
854 void ConcurrentMark::checkpointRootsInitial() {
855 assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
856 G1CollectedHeap* g1h = G1CollectedHeap::heap();
858 double start = os::elapsedTime();
860 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
861 g1p->record_concurrent_mark_init_start();
862 checkpointRootsInitialPre();
864 // YSR: when concurrent precleaning is in place, we'll
865 // need to clear the cached card table here
867 ResourceMark rm;
868 HandleMark hm;
870 g1h->ensure_parsability(false);
871 g1h->perm_gen()->save_marks();
873 CMMarkRootsClosure notOlder(this, g1h, false);
874 CMMarkRootsClosure older(this, g1h, true);
876 g1h->set_marking_started();
877 g1h->rem_set()->prepare_for_younger_refs_iterate(false);
879 g1h->process_strong_roots(true, // activate StrongRootsScope
880 false, // fake perm gen collection
881 SharedHeap::SO_AllClasses,
882 ¬Older, // Regular roots
883 NULL, // do not visit active blobs
884 &older // Perm Gen Roots
885 );
886 checkpointRootsInitialPost();
888 // Statistics.
889 double end = os::elapsedTime();
890 _init_times.add((end - start) * 1000.0);
892 g1p->record_concurrent_mark_init_end();
893 }
895 /*
896 Notice that in the next two methods, we actually leave the STS
897 during the barrier sync and join it immediately afterwards. If we
898 do not do this, this then the following deadlock can occur: one
899 thread could be in the barrier sync code, waiting for the other
900 thread to also sync up, whereas another one could be trying to
901 yield, while also waiting for the other threads to sync up too.
903 Because the thread that does the sync barrier has left the STS, it
904 is possible to be suspended for a Full GC or an evacuation pause
905 could occur. This is actually safe, since the entering the sync
906 barrier is one of the last things do_marking_step() does, and it
907 doesn't manipulate any data structures afterwards.
908 */
910 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
911 if (verbose_low())
912 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
914 ConcurrentGCThread::stsLeave();
915 _first_overflow_barrier_sync.enter();
916 ConcurrentGCThread::stsJoin();
917 // at this point everyone should have synced up and not be doing any
918 // more work
920 if (verbose_low())
921 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
923 // let task 0 do this
924 if (task_num == 0) {
925 // task 0 is responsible for clearing the global data structures
926 clear_marking_state();
928 if (PrintGC) {
929 gclog_or_tty->date_stamp(PrintGCDateStamps);
930 gclog_or_tty->stamp(PrintGCTimeStamps);
931 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
932 }
933 }
935 // after this, each task should reset its own data structures then
936 // then go into the second barrier
937 }
939 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
940 if (verbose_low())
941 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
943 ConcurrentGCThread::stsLeave();
944 _second_overflow_barrier_sync.enter();
945 ConcurrentGCThread::stsJoin();
946 // at this point everything should be re-initialised and ready to go
948 if (verbose_low())
949 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
950 }
952 void ConcurrentMark::grayRoot(oop p) {
953 HeapWord* addr = (HeapWord*) p;
954 // We can't really check against _heap_start and _heap_end, since it
955 // is possible during an evacuation pause with piggy-backed
956 // initial-mark that the committed space is expanded during the
957 // pause without CM observing this change. So the assertions below
958 // is a bit conservative; but better than nothing.
959 assert(_g1h->g1_committed().contains(addr),
960 "address should be within the heap bounds");
962 if (!_nextMarkBitMap->isMarked(addr))
963 _nextMarkBitMap->parMark(addr);
964 }
966 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
967 // The objects on the region have already been marked "in bulk" by
968 // the caller. We only need to decide whether to push the region on
969 // the region stack or not.
971 if (!concurrent_marking_in_progress() || !_should_gray_objects)
972 // We're done with marking and waiting for remark. We do not need to
973 // push anything else on the region stack.
974 return;
976 HeapWord* finger = _finger;
978 if (verbose_low())
979 gclog_or_tty->print_cr("[global] attempting to push "
980 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
981 PTR_FORMAT, mr.start(), mr.end(), finger);
983 if (mr.start() < finger) {
984 // The finger is always heap region aligned and it is not possible
985 // for mr to span heap regions.
986 assert(mr.end() <= finger, "invariant");
988 // Separated the asserts so that we know which one fires.
989 assert(mr.start() <= mr.end(),
990 "region boundaries should fall within the committed space");
991 assert(_heap_start <= mr.start(),
992 "region boundaries should fall within the committed space");
993 assert(mr.end() <= _heap_end,
994 "region boundaries should fall within the committed space");
995 if (verbose_low())
996 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
997 "below the finger, pushing it",
998 mr.start(), mr.end());
1000 if (!region_stack_push_lock_free(mr)) {
1001 if (verbose_low())
1002 gclog_or_tty->print_cr("[global] region stack has overflown.");
1003 }
1004 }
1005 }
1007 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1008 // The object is not marked by the caller. We need to at least mark
1009 // it and maybe push in on the stack.
1011 HeapWord* addr = (HeapWord*)p;
1012 if (!_nextMarkBitMap->isMarked(addr)) {
1013 // We definitely need to mark it, irrespective whether we bail out
1014 // because we're done with marking.
1015 if (_nextMarkBitMap->parMark(addr)) {
1016 if (!concurrent_marking_in_progress() || !_should_gray_objects)
1017 // If we're done with concurrent marking and we're waiting for
1018 // remark, then we're not pushing anything on the stack.
1019 return;
1021 // No OrderAccess:store_load() is needed. It is implicit in the
1022 // CAS done in parMark(addr) above
1023 HeapWord* finger = _finger;
1025 if (addr < finger) {
1026 if (!mark_stack_push(oop(addr))) {
1027 if (verbose_low())
1028 gclog_or_tty->print_cr("[global] global stack overflow "
1029 "during parMark");
1030 }
1031 }
1032 }
1033 }
1034 }
1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 private:
1038 ConcurrentMark* _cm;
1039 ConcurrentMarkThread* _cmt;
1041 public:
1042 void work(int worker_i) {
1043 assert(Thread::current()->is_ConcurrentGC_thread(),
1044 "this should only be done by a conc GC thread");
1045 ResourceMark rm;
1047 double start_vtime = os::elapsedVTime();
1049 ConcurrentGCThread::stsJoin();
1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052 CMTask* the_task = _cm->task(worker_i);
1053 the_task->record_start_time();
1054 if (!_cm->has_aborted()) {
1055 do {
1056 double start_vtime_sec = os::elapsedVTime();
1057 double start_time_sec = os::elapsedTime();
1058 the_task->do_marking_step(10.0);
1059 double end_time_sec = os::elapsedTime();
1060 double end_vtime_sec = os::elapsedVTime();
1061 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1062 double elapsed_time_sec = end_time_sec - start_time_sec;
1063 _cm->clear_has_overflown();
1065 bool ret = _cm->do_yield_check(worker_i);
1067 jlong sleep_time_ms;
1068 if (!_cm->has_aborted() && the_task->has_aborted()) {
1069 sleep_time_ms =
1070 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1071 ConcurrentGCThread::stsLeave();
1072 os::sleep(Thread::current(), sleep_time_ms, false);
1073 ConcurrentGCThread::stsJoin();
1074 }
1075 double end_time2_sec = os::elapsedTime();
1076 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1078 #if 0
1079 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1080 "overhead %1.4lf",
1081 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1082 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1083 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1084 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1085 #endif
1086 } while (!_cm->has_aborted() && the_task->has_aborted());
1087 }
1088 the_task->record_end_time();
1089 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1091 ConcurrentGCThread::stsLeave();
1093 double end_vtime = os::elapsedVTime();
1094 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1095 }
1097 CMConcurrentMarkingTask(ConcurrentMark* cm,
1098 ConcurrentMarkThread* cmt) :
1099 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1101 ~CMConcurrentMarkingTask() { }
1102 };
1104 void ConcurrentMark::markFromRoots() {
1105 // we might be tempted to assert that:
1106 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1107 // "inconsistent argument?");
1108 // However that wouldn't be right, because it's possible that
1109 // a safepoint is indeed in progress as a younger generation
1110 // stop-the-world GC happens even as we mark in this generation.
1112 _restart_for_overflow = false;
1114 set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1116 CMConcurrentMarkingTask markingTask(this, cmThread());
1117 if (parallel_marking_threads() > 0)
1118 _parallel_workers->run_task(&markingTask);
1119 else
1120 markingTask.work(0);
1121 print_stats();
1122 }
1124 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1125 // world is stopped at this checkpoint
1126 assert(SafepointSynchronize::is_at_safepoint(),
1127 "world should be stopped");
1128 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1130 // If a full collection has happened, we shouldn't do this.
1131 if (has_aborted()) {
1132 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1133 return;
1134 }
1136 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1138 if (VerifyDuringGC) {
1139 HandleMark hm; // handle scope
1140 gclog_or_tty->print(" VerifyDuringGC:(before)");
1141 Universe::heap()->prepare_for_verify();
1142 Universe::verify(true, false, true);
1143 }
1145 G1CollectorPolicy* g1p = g1h->g1_policy();
1146 g1p->record_concurrent_mark_remark_start();
1148 double start = os::elapsedTime();
1150 checkpointRootsFinalWork();
1152 double mark_work_end = os::elapsedTime();
1154 weakRefsWork(clear_all_soft_refs);
1156 if (has_overflown()) {
1157 // Oops. We overflowed. Restart concurrent marking.
1158 _restart_for_overflow = true;
1159 // Clear the flag. We do not need it any more.
1160 clear_has_overflown();
1161 if (G1TraceMarkStackOverflow)
1162 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1163 } else {
1164 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1165 // We're done with marking.
1166 // This is the end of the marking cycle, we're expected all
1167 // threads to have SATB queues with active set to true.
1168 satb_mq_set.set_active_all_threads(false, /* new active value */
1169 true /* expected_active */);
1171 if (VerifyDuringGC) {
1172 HandleMark hm; // handle scope
1173 gclog_or_tty->print(" VerifyDuringGC:(after)");
1174 Universe::heap()->prepare_for_verify();
1175 Universe::heap()->verify(/* allow_dirty */ true,
1176 /* silent */ false,
1177 /* use_prev_marking */ false);
1178 }
1179 }
1181 #if VERIFY_OBJS_PROCESSED
1182 _scan_obj_cl.objs_processed = 0;
1183 ThreadLocalObjQueue::objs_enqueued = 0;
1184 #endif
1186 // Statistics
1187 double now = os::elapsedTime();
1188 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1189 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1190 _remark_times.add((now - start) * 1000.0);
1192 g1p->record_concurrent_mark_remark_end();
1193 }
1196 #define CARD_BM_TEST_MODE 0
1198 class CalcLiveObjectsClosure: public HeapRegionClosure {
1200 CMBitMapRO* _bm;
1201 ConcurrentMark* _cm;
1202 bool _changed;
1203 bool _yield;
1204 size_t _words_done;
1205 size_t _tot_live;
1206 size_t _tot_used;
1207 size_t _regions_done;
1208 double _start_vtime_sec;
1210 BitMap* _region_bm;
1211 BitMap* _card_bm;
1212 intptr_t _bottom_card_num;
1213 bool _final;
1215 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1216 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1217 #if CARD_BM_TEST_MODE
1218 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1219 #else
1220 _card_bm->par_at_put(i - _bottom_card_num, 1);
1221 #endif
1222 }
1223 }
1225 public:
1226 CalcLiveObjectsClosure(bool final,
1227 CMBitMapRO *bm, ConcurrentMark *cm,
1228 BitMap* region_bm, BitMap* card_bm) :
1229 _bm(bm), _cm(cm), _changed(false), _yield(true),
1230 _words_done(0), _tot_live(0), _tot_used(0),
1231 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1232 _regions_done(0), _start_vtime_sec(0.0)
1233 {
1234 _bottom_card_num =
1235 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1236 CardTableModRefBS::card_shift);
1237 }
1239 // It takes a region that's not empty (i.e., it has at least one
1240 // live object in it and sets its corresponding bit on the region
1241 // bitmap to 1. If the region is "starts humongous" it will also set
1242 // to 1 the bits on the region bitmap that correspond to its
1243 // associated "continues humongous" regions.
1244 void set_bit_for_region(HeapRegion* hr) {
1245 assert(!hr->continuesHumongous(), "should have filtered those out");
1247 size_t index = hr->hrs_index();
1248 if (!hr->startsHumongous()) {
1249 // Normal (non-humongous) case: just set the bit.
1250 _region_bm->par_at_put((BitMap::idx_t) index, true);
1251 } else {
1252 // Starts humongous case: calculate how many regions are part of
1253 // this humongous region and then set the bit range. It might
1254 // have been a bit more efficient to look at the object that
1255 // spans these humongous regions to calculate their number from
1256 // the object's size. However, it's a good idea to calculate
1257 // this based on the metadata itself, and not the region
1258 // contents, so that this code is not aware of what goes into
1259 // the humongous regions (in case this changes in the future).
1260 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1261 size_t end_index = index + 1;
1262 while (end_index < g1h->n_regions()) {
1263 HeapRegion* chr = g1h->region_at(end_index);
1264 if (!chr->continuesHumongous()) {
1265 break;
1266 }
1267 end_index += 1;
1268 }
1269 _region_bm->par_at_put_range((BitMap::idx_t) index,
1270 (BitMap::idx_t) end_index, true);
1271 }
1272 }
1274 bool doHeapRegion(HeapRegion* hr) {
1275 if (!_final && _regions_done == 0)
1276 _start_vtime_sec = os::elapsedVTime();
1278 if (hr->continuesHumongous()) {
1279 // We will ignore these here and process them when their
1280 // associated "starts humongous" region is processed (see
1281 // set_bit_for_heap_region()). Note that we cannot rely on their
1282 // associated "starts humongous" region to have their bit set to
1283 // 1 since, due to the region chunking in the parallel region
1284 // iteration, a "continues humongous" region might be visited
1285 // before its associated "starts humongous".
1286 return false;
1287 }
1289 HeapWord* nextTop = hr->next_top_at_mark_start();
1290 HeapWord* start = hr->top_at_conc_mark_count();
1291 assert(hr->bottom() <= start && start <= hr->end() &&
1292 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1293 start <= nextTop,
1294 "Preconditions.");
1295 // Otherwise, record the number of word's we'll examine.
1296 size_t words_done = (nextTop - start);
1297 // Find the first marked object at or after "start".
1298 start = _bm->getNextMarkedWordAddress(start, nextTop);
1299 size_t marked_bytes = 0;
1301 // Below, the term "card num" means the result of shifting an address
1302 // by the card shift -- address 0 corresponds to card number 0. One
1303 // must subtract the card num of the bottom of the heap to obtain a
1304 // card table index.
1305 // The first card num of the sequence of live cards currently being
1306 // constructed. -1 ==> no sequence.
1307 intptr_t start_card_num = -1;
1308 // The last card num of the sequence of live cards currently being
1309 // constructed. -1 ==> no sequence.
1310 intptr_t last_card_num = -1;
1312 while (start < nextTop) {
1313 if (_yield && _cm->do_yield_check()) {
1314 // We yielded. It might be for a full collection, in which case
1315 // all bets are off; terminate the traversal.
1316 if (_cm->has_aborted()) {
1317 _changed = false;
1318 return true;
1319 } else {
1320 // Otherwise, it might be a collection pause, and the region
1321 // we're looking at might be in the collection set. We'll
1322 // abandon this region.
1323 return false;
1324 }
1325 }
1326 oop obj = oop(start);
1327 int obj_sz = obj->size();
1328 // The card num of the start of the current object.
1329 intptr_t obj_card_num =
1330 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1332 HeapWord* obj_last = start + obj_sz - 1;
1333 intptr_t obj_last_card_num =
1334 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1336 if (obj_card_num != last_card_num) {
1337 if (start_card_num == -1) {
1338 assert(last_card_num == -1, "Both or neither.");
1339 start_card_num = obj_card_num;
1340 } else {
1341 assert(last_card_num != -1, "Both or neither.");
1342 assert(obj_card_num >= last_card_num, "Inv");
1343 if ((obj_card_num - last_card_num) > 1) {
1344 // Mark the last run, and start a new one.
1345 mark_card_num_range(start_card_num, last_card_num);
1346 start_card_num = obj_card_num;
1347 }
1348 }
1349 #if CARD_BM_TEST_MODE
1350 /*
1351 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1352 obj_card_num - _bottom_card_num,
1353 obj_last_card_num - _bottom_card_num);
1354 */
1355 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1356 _card_bm->par_at_put(j - _bottom_card_num, 1);
1357 }
1358 #endif
1359 }
1360 // In any case, we set the last card num.
1361 last_card_num = obj_last_card_num;
1363 marked_bytes += (size_t)obj_sz * HeapWordSize;
1364 // Find the next marked object after this one.
1365 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1366 _changed = true;
1367 }
1368 // Handle the last range, if any.
1369 if (start_card_num != -1)
1370 mark_card_num_range(start_card_num, last_card_num);
1371 if (_final) {
1372 // Mark the allocated-since-marking portion...
1373 HeapWord* tp = hr->top();
1374 if (nextTop < tp) {
1375 start_card_num =
1376 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1377 last_card_num =
1378 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1379 mark_card_num_range(start_card_num, last_card_num);
1380 // This definitely means the region has live objects.
1381 set_bit_for_region(hr);
1382 }
1383 }
1385 hr->add_to_marked_bytes(marked_bytes);
1386 // Update the live region bitmap.
1387 if (marked_bytes > 0) {
1388 set_bit_for_region(hr);
1389 }
1390 hr->set_top_at_conc_mark_count(nextTop);
1391 _tot_live += hr->next_live_bytes();
1392 _tot_used += hr->used();
1393 _words_done = words_done;
1395 if (!_final) {
1396 ++_regions_done;
1397 if (_regions_done % 10 == 0) {
1398 double end_vtime_sec = os::elapsedVTime();
1399 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1400 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1401 jlong sleep_time_ms =
1402 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1403 os::sleep(Thread::current(), sleep_time_ms, false);
1404 _start_vtime_sec = end_vtime_sec;
1405 }
1406 }
1407 }
1409 return false;
1410 }
1412 bool changed() { return _changed; }
1413 void reset() { _changed = false; _words_done = 0; }
1414 void no_yield() { _yield = false; }
1415 size_t words_done() { return _words_done; }
1416 size_t tot_live() { return _tot_live; }
1417 size_t tot_used() { return _tot_used; }
1418 };
1421 void ConcurrentMark::calcDesiredRegions() {
1422 _region_bm.clear();
1423 _card_bm.clear();
1424 CalcLiveObjectsClosure calccl(false /*final*/,
1425 nextMarkBitMap(), this,
1426 &_region_bm, &_card_bm);
1427 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1428 g1h->heap_region_iterate(&calccl);
1430 do {
1431 calccl.reset();
1432 g1h->heap_region_iterate(&calccl);
1433 } while (calccl.changed());
1434 }
1436 class G1ParFinalCountTask: public AbstractGangTask {
1437 protected:
1438 G1CollectedHeap* _g1h;
1439 CMBitMap* _bm;
1440 size_t _n_workers;
1441 size_t *_live_bytes;
1442 size_t *_used_bytes;
1443 BitMap* _region_bm;
1444 BitMap* _card_bm;
1445 public:
1446 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1447 BitMap* region_bm, BitMap* card_bm) :
1448 AbstractGangTask("G1 final counting"), _g1h(g1h),
1449 _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1450 {
1451 if (ParallelGCThreads > 0)
1452 _n_workers = _g1h->workers()->total_workers();
1453 else
1454 _n_workers = 1;
1455 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1456 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1457 }
1459 ~G1ParFinalCountTask() {
1460 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1461 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1462 }
1464 void work(int i) {
1465 CalcLiveObjectsClosure calccl(true /*final*/,
1466 _bm, _g1h->concurrent_mark(),
1467 _region_bm, _card_bm);
1468 calccl.no_yield();
1469 if (G1CollectedHeap::use_parallel_gc_threads()) {
1470 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1471 HeapRegion::FinalCountClaimValue);
1472 } else {
1473 _g1h->heap_region_iterate(&calccl);
1474 }
1475 assert(calccl.complete(), "Shouldn't have yielded!");
1477 assert((size_t) i < _n_workers, "invariant");
1478 _live_bytes[i] = calccl.tot_live();
1479 _used_bytes[i] = calccl.tot_used();
1480 }
1481 size_t live_bytes() {
1482 size_t live_bytes = 0;
1483 for (size_t i = 0; i < _n_workers; ++i)
1484 live_bytes += _live_bytes[i];
1485 return live_bytes;
1486 }
1487 size_t used_bytes() {
1488 size_t used_bytes = 0;
1489 for (size_t i = 0; i < _n_workers; ++i)
1490 used_bytes += _used_bytes[i];
1491 return used_bytes;
1492 }
1493 };
1495 class G1ParNoteEndTask;
1497 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1498 G1CollectedHeap* _g1;
1499 int _worker_num;
1500 size_t _max_live_bytes;
1501 size_t _regions_claimed;
1502 size_t _freed_bytes;
1503 FreeRegionList _local_cleanup_list;
1504 HumongousRegionSet _humongous_proxy_set;
1505 double _claimed_region_time;
1506 double _max_region_time;
1508 public:
1509 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1510 int worker_num);
1511 size_t freed_bytes() { return _freed_bytes; }
1512 FreeRegionList* local_cleanup_list() {
1513 return &_local_cleanup_list;
1514 }
1515 HumongousRegionSet* humongous_proxy_set() {
1516 return &_humongous_proxy_set;
1517 }
1519 bool doHeapRegion(HeapRegion *r);
1521 size_t max_live_bytes() { return _max_live_bytes; }
1522 size_t regions_claimed() { return _regions_claimed; }
1523 double claimed_region_time_sec() { return _claimed_region_time; }
1524 double max_region_time_sec() { return _max_region_time; }
1525 };
1527 class G1ParNoteEndTask: public AbstractGangTask {
1528 friend class G1NoteEndOfConcMarkClosure;
1530 protected:
1531 G1CollectedHeap* _g1h;
1532 size_t _max_live_bytes;
1533 size_t _freed_bytes;
1534 FreeRegionList* _cleanup_list;
1536 public:
1537 G1ParNoteEndTask(G1CollectedHeap* g1h,
1538 FreeRegionList* cleanup_list) :
1539 AbstractGangTask("G1 note end"), _g1h(g1h),
1540 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1542 void work(int i) {
1543 double start = os::elapsedTime();
1544 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i);
1545 if (G1CollectedHeap::use_parallel_gc_threads()) {
1546 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1547 HeapRegion::NoteEndClaimValue);
1548 } else {
1549 _g1h->heap_region_iterate(&g1_note_end);
1550 }
1551 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1553 // Now update the lists
1554 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1555 NULL /* free_list */,
1556 g1_note_end.humongous_proxy_set(),
1557 true /* par */);
1558 {
1559 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1560 _max_live_bytes += g1_note_end.max_live_bytes();
1561 _freed_bytes += g1_note_end.freed_bytes();
1563 _cleanup_list->add_as_tail(g1_note_end.local_cleanup_list());
1564 assert(g1_note_end.local_cleanup_list()->is_empty(), "post-condition");
1565 }
1566 double end = os::elapsedTime();
1567 if (G1PrintParCleanupStats) {
1568 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1569 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1570 i, start, end, (end-start)*1000.0,
1571 g1_note_end.regions_claimed(),
1572 g1_note_end.claimed_region_time_sec()*1000.0,
1573 g1_note_end.max_region_time_sec()*1000.0);
1574 }
1575 }
1576 size_t max_live_bytes() { return _max_live_bytes; }
1577 size_t freed_bytes() { return _freed_bytes; }
1578 };
1580 class G1ParScrubRemSetTask: public AbstractGangTask {
1581 protected:
1582 G1RemSet* _g1rs;
1583 BitMap* _region_bm;
1584 BitMap* _card_bm;
1585 public:
1586 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1587 BitMap* region_bm, BitMap* card_bm) :
1588 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1589 _region_bm(region_bm), _card_bm(card_bm)
1590 {}
1592 void work(int i) {
1593 if (G1CollectedHeap::use_parallel_gc_threads()) {
1594 _g1rs->scrub_par(_region_bm, _card_bm, i,
1595 HeapRegion::ScrubRemSetClaimValue);
1596 } else {
1597 _g1rs->scrub(_region_bm, _card_bm);
1598 }
1599 }
1601 };
1603 G1NoteEndOfConcMarkClosure::
1604 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1605 int worker_num)
1606 : _g1(g1), _worker_num(worker_num),
1607 _max_live_bytes(0), _regions_claimed(0),
1608 _freed_bytes(0),
1609 _claimed_region_time(0.0), _max_region_time(0.0),
1610 _local_cleanup_list("Local Cleanup List"),
1611 _humongous_proxy_set("Local Cleanup Humongous Proxy Set") { }
1613 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1614 // We use a claim value of zero here because all regions
1615 // were claimed with value 1 in the FinalCount task.
1616 hr->reset_gc_time_stamp();
1617 if (!hr->continuesHumongous()) {
1618 double start = os::elapsedTime();
1619 _regions_claimed++;
1620 hr->note_end_of_marking();
1621 _max_live_bytes += hr->max_live_bytes();
1622 _g1->free_region_if_totally_empty(hr,
1623 &_freed_bytes,
1624 &_local_cleanup_list,
1625 &_humongous_proxy_set,
1626 true /* par */);
1627 double region_time = (os::elapsedTime() - start);
1628 _claimed_region_time += region_time;
1629 if (region_time > _max_region_time) _max_region_time = region_time;
1630 }
1631 return false;
1632 }
1634 void ConcurrentMark::cleanup() {
1635 // world is stopped at this checkpoint
1636 assert(SafepointSynchronize::is_at_safepoint(),
1637 "world should be stopped");
1638 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1640 // If a full collection has happened, we shouldn't do this.
1641 if (has_aborted()) {
1642 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1643 return;
1644 }
1646 g1h->verify_region_sets_optional();
1648 if (VerifyDuringGC) {
1649 HandleMark hm; // handle scope
1650 gclog_or_tty->print(" VerifyDuringGC:(before)");
1651 Universe::heap()->prepare_for_verify();
1652 Universe::verify(/* allow dirty */ true,
1653 /* silent */ false,
1654 /* prev marking */ true);
1655 }
1657 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1658 g1p->record_concurrent_mark_cleanup_start();
1660 double start = os::elapsedTime();
1662 // Do counting once more with the world stopped for good measure.
1663 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1664 &_region_bm, &_card_bm);
1665 if (G1CollectedHeap::use_parallel_gc_threads()) {
1666 assert(g1h->check_heap_region_claim_values(
1667 HeapRegion::InitialClaimValue),
1668 "sanity check");
1670 int n_workers = g1h->workers()->total_workers();
1671 g1h->set_par_threads(n_workers);
1672 g1h->workers()->run_task(&g1_par_count_task);
1673 g1h->set_par_threads(0);
1675 assert(g1h->check_heap_region_claim_values(
1676 HeapRegion::FinalCountClaimValue),
1677 "sanity check");
1678 } else {
1679 g1_par_count_task.work(0);
1680 }
1682 size_t known_garbage_bytes =
1683 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1684 #if 0
1685 gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1686 (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1687 (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1688 (double) known_garbage_bytes / (double) (1024 * 1024));
1689 #endif // 0
1690 g1p->set_known_garbage_bytes(known_garbage_bytes);
1692 size_t start_used_bytes = g1h->used();
1693 _at_least_one_mark_complete = true;
1694 g1h->set_marking_complete();
1696 double count_end = os::elapsedTime();
1697 double this_final_counting_time = (count_end - start);
1698 if (G1PrintParCleanupStats) {
1699 gclog_or_tty->print_cr("Cleanup:");
1700 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1701 this_final_counting_time*1000.0);
1702 }
1703 _total_counting_time += this_final_counting_time;
1705 // Install newly created mark bitMap as "prev".
1706 swapMarkBitMaps();
1708 g1h->reset_gc_time_stamp();
1710 // Note end of marking in all heap regions.
1711 double note_end_start = os::elapsedTime();
1712 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1713 if (G1CollectedHeap::use_parallel_gc_threads()) {
1714 int n_workers = g1h->workers()->total_workers();
1715 g1h->set_par_threads(n_workers);
1716 g1h->workers()->run_task(&g1_par_note_end_task);
1717 g1h->set_par_threads(0);
1719 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1720 "sanity check");
1721 } else {
1722 g1_par_note_end_task.work(0);
1723 }
1725 if (!cleanup_list_is_empty()) {
1726 // The cleanup list is not empty, so we'll have to process it
1727 // concurrently. Notify anyone else that might be wanting free
1728 // regions that there will be more free regions coming soon.
1729 g1h->set_free_regions_coming();
1730 }
1731 double note_end_end = os::elapsedTime();
1732 if (G1PrintParCleanupStats) {
1733 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1734 (note_end_end - note_end_start)*1000.0);
1735 }
1738 // call below, since it affects the metric by which we sort the heap
1739 // regions.
1740 if (G1ScrubRemSets) {
1741 double rs_scrub_start = os::elapsedTime();
1742 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1743 if (G1CollectedHeap::use_parallel_gc_threads()) {
1744 int n_workers = g1h->workers()->total_workers();
1745 g1h->set_par_threads(n_workers);
1746 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1747 g1h->set_par_threads(0);
1749 assert(g1h->check_heap_region_claim_values(
1750 HeapRegion::ScrubRemSetClaimValue),
1751 "sanity check");
1752 } else {
1753 g1_par_scrub_rs_task.work(0);
1754 }
1756 double rs_scrub_end = os::elapsedTime();
1757 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1758 _total_rs_scrub_time += this_rs_scrub_time;
1759 }
1761 // this will also free any regions totally full of garbage objects,
1762 // and sort the regions.
1763 g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1764 g1_par_note_end_task.freed_bytes(),
1765 g1_par_note_end_task.max_live_bytes());
1767 // Statistics.
1768 double end = os::elapsedTime();
1769 _cleanup_times.add((end - start) * 1000.0);
1771 // G1CollectedHeap::heap()->print();
1772 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1773 // G1CollectedHeap::heap()->get_gc_time_stamp());
1775 if (PrintGC || PrintGCDetails) {
1776 g1h->print_size_transition(gclog_or_tty,
1777 start_used_bytes,
1778 g1h->used(),
1779 g1h->capacity());
1780 }
1782 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1783 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1785 // We need to make this be a "collection" so any collection pause that
1786 // races with it goes around and waits for completeCleanup to finish.
1787 g1h->increment_total_collections();
1789 if (VerifyDuringGC) {
1790 HandleMark hm; // handle scope
1791 gclog_or_tty->print(" VerifyDuringGC:(after)");
1792 Universe::heap()->prepare_for_verify();
1793 Universe::verify(/* allow dirty */ true,
1794 /* silent */ false,
1795 /* prev marking */ true);
1796 }
1798 g1h->verify_region_sets_optional();
1799 }
1801 void ConcurrentMark::completeCleanup() {
1802 if (has_aborted()) return;
1804 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1806 _cleanup_list.verify_optional();
1807 FreeRegionList local_free_list("Local Cleanup List");
1809 if (G1ConcRegionFreeingVerbose) {
1810 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1811 "cleanup list has "SIZE_FORMAT" entries",
1812 _cleanup_list.length());
1813 }
1815 // Noone else should be accessing the _cleanup_list at this point,
1816 // so it's not necessary to take any locks
1817 while (!_cleanup_list.is_empty()) {
1818 HeapRegion* hr = _cleanup_list.remove_head();
1819 assert(hr != NULL, "the list was not empty");
1820 hr->rem_set()->clear();
1821 local_free_list.add_as_tail(hr);
1823 // Instead of adding one region at a time to the secondary_free_list,
1824 // we accumulate them in the local list and move them a few at a
1825 // time. This also cuts down on the number of notify_all() calls
1826 // we do during this process. We'll also append the local list when
1827 // _cleanup_list is empty (which means we just removed the last
1828 // region from the _cleanup_list).
1829 if ((local_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1830 _cleanup_list.is_empty()) {
1831 if (G1ConcRegionFreeingVerbose) {
1832 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1833 "appending "SIZE_FORMAT" entries to the "
1834 "secondary_free_list, clean list still has "
1835 SIZE_FORMAT" entries",
1836 local_free_list.length(),
1837 _cleanup_list.length());
1838 }
1840 {
1841 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1842 g1h->secondary_free_list_add_as_tail(&local_free_list);
1843 SecondaryFreeList_lock->notify_all();
1844 }
1846 if (G1StressConcRegionFreeing) {
1847 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1848 os::sleep(Thread::current(), (jlong) 1, false);
1849 }
1850 }
1851 }
1852 }
1853 assert(local_free_list.is_empty(), "post-condition");
1854 }
1856 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1857 HeapWord* addr = (HeapWord*)obj;
1858 return addr != NULL &&
1859 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1860 }
1862 class G1CMKeepAliveClosure: public OopClosure {
1863 G1CollectedHeap* _g1;
1864 ConcurrentMark* _cm;
1865 CMBitMap* _bitMap;
1866 public:
1867 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1868 CMBitMap* bitMap) :
1869 _g1(g1), _cm(cm),
1870 _bitMap(bitMap) {}
1872 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1873 virtual void do_oop( oop* p) { do_oop_work(p); }
1875 template <class T> void do_oop_work(T* p) {
1876 oop thisOop = oopDesc::load_decode_heap_oop(p);
1877 HeapWord* addr = (HeapWord*)thisOop;
1878 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1879 _bitMap->mark(addr);
1880 _cm->mark_stack_push(thisOop);
1881 }
1882 }
1883 };
1885 class G1CMDrainMarkingStackClosure: public VoidClosure {
1886 CMMarkStack* _markStack;
1887 CMBitMap* _bitMap;
1888 G1CMKeepAliveClosure* _oopClosure;
1889 public:
1890 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1891 G1CMKeepAliveClosure* oopClosure) :
1892 _bitMap(bitMap),
1893 _markStack(markStack),
1894 _oopClosure(oopClosure)
1895 {}
1897 void do_void() {
1898 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1899 }
1900 };
1902 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1903 ResourceMark rm;
1904 HandleMark hm;
1905 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1906 ReferenceProcessor* rp = g1h->ref_processor();
1908 // See the comment in G1CollectedHeap::ref_processing_init()
1909 // about how reference processing currently works in G1.
1911 // Process weak references.
1912 rp->setup_policy(clear_all_soft_refs);
1913 assert(_markStack.isEmpty(), "mark stack should be empty");
1915 G1CMIsAliveClosure g1_is_alive(g1h);
1916 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
1917 G1CMDrainMarkingStackClosure
1918 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
1920 // XXXYYY Also: copy the parallel ref processing code from CMS.
1921 rp->process_discovered_references(&g1_is_alive,
1922 &g1_keep_alive,
1923 &g1_drain_mark_stack,
1924 NULL);
1925 assert(_markStack.overflow() || _markStack.isEmpty(),
1926 "mark stack should be empty (unless it overflowed)");
1927 if (_markStack.overflow()) {
1928 set_has_overflown();
1929 }
1931 rp->enqueue_discovered_references();
1932 rp->verify_no_references_recorded();
1933 assert(!rp->discovery_enabled(), "should have been disabled");
1935 // Now clean up stale oops in SymbolTable and StringTable
1936 SymbolTable::unlink(&g1_is_alive);
1937 StringTable::unlink(&g1_is_alive);
1938 }
1940 void ConcurrentMark::swapMarkBitMaps() {
1941 CMBitMapRO* temp = _prevMarkBitMap;
1942 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
1943 _nextMarkBitMap = (CMBitMap*) temp;
1944 }
1946 class CMRemarkTask: public AbstractGangTask {
1947 private:
1948 ConcurrentMark *_cm;
1950 public:
1951 void work(int worker_i) {
1952 // Since all available tasks are actually started, we should
1953 // only proceed if we're supposed to be actived.
1954 if ((size_t)worker_i < _cm->active_tasks()) {
1955 CMTask* task = _cm->task(worker_i);
1956 task->record_start_time();
1957 do {
1958 task->do_marking_step(1000000000.0 /* something very large */);
1959 } while (task->has_aborted() && !_cm->has_overflown());
1960 // If we overflow, then we do not want to restart. We instead
1961 // want to abort remark and do concurrent marking again.
1962 task->record_end_time();
1963 }
1964 }
1966 CMRemarkTask(ConcurrentMark* cm) :
1967 AbstractGangTask("Par Remark"), _cm(cm) { }
1968 };
1970 void ConcurrentMark::checkpointRootsFinalWork() {
1971 ResourceMark rm;
1972 HandleMark hm;
1973 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1975 g1h->ensure_parsability(false);
1977 if (G1CollectedHeap::use_parallel_gc_threads()) {
1978 G1CollectedHeap::StrongRootsScope srs(g1h);
1979 // this is remark, so we'll use up all available threads
1980 int active_workers = ParallelGCThreads;
1981 set_phase(active_workers, false);
1983 CMRemarkTask remarkTask(this);
1984 // We will start all available threads, even if we decide that the
1985 // active_workers will be fewer. The extra ones will just bail out
1986 // immediately.
1987 int n_workers = g1h->workers()->total_workers();
1988 g1h->set_par_threads(n_workers);
1989 g1h->workers()->run_task(&remarkTask);
1990 g1h->set_par_threads(0);
1991 } else {
1992 G1CollectedHeap::StrongRootsScope srs(g1h);
1993 // this is remark, so we'll use up all available threads
1994 int active_workers = 1;
1995 set_phase(active_workers, false);
1997 CMRemarkTask remarkTask(this);
1998 // We will start all available threads, even if we decide that the
1999 // active_workers will be fewer. The extra ones will just bail out
2000 // immediately.
2001 remarkTask.work(0);
2002 }
2003 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2004 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2006 print_stats();
2008 if (!restart_for_overflow())
2009 set_non_marking_state();
2011 #if VERIFY_OBJS_PROCESSED
2012 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2013 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2014 _scan_obj_cl.objs_processed,
2015 ThreadLocalObjQueue::objs_enqueued);
2016 guarantee(_scan_obj_cl.objs_processed ==
2017 ThreadLocalObjQueue::objs_enqueued,
2018 "Different number of objs processed and enqueued.");
2019 }
2020 #endif
2021 }
2023 #ifndef PRODUCT
2025 class PrintReachableOopClosure: public OopClosure {
2026 private:
2027 G1CollectedHeap* _g1h;
2028 CMBitMapRO* _bitmap;
2029 outputStream* _out;
2030 bool _use_prev_marking;
2031 bool _all;
2033 public:
2034 PrintReachableOopClosure(CMBitMapRO* bitmap,
2035 outputStream* out,
2036 bool use_prev_marking,
2037 bool all) :
2038 _g1h(G1CollectedHeap::heap()),
2039 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2041 void do_oop(narrowOop* p) { do_oop_work(p); }
2042 void do_oop( oop* p) { do_oop_work(p); }
2044 template <class T> void do_oop_work(T* p) {
2045 oop obj = oopDesc::load_decode_heap_oop(p);
2046 const char* str = NULL;
2047 const char* str2 = "";
2049 if (obj == NULL) {
2050 str = "";
2051 } else if (!_g1h->is_in_g1_reserved(obj)) {
2052 str = " O";
2053 } else {
2054 HeapRegion* hr = _g1h->heap_region_containing(obj);
2055 guarantee(hr != NULL, "invariant");
2056 bool over_tams = false;
2057 if (_use_prev_marking) {
2058 over_tams = hr->obj_allocated_since_prev_marking(obj);
2059 } else {
2060 over_tams = hr->obj_allocated_since_next_marking(obj);
2061 }
2062 bool marked = _bitmap->isMarked((HeapWord*) obj);
2064 if (over_tams) {
2065 str = " >";
2066 if (marked) {
2067 str2 = " AND MARKED";
2068 }
2069 } else if (marked) {
2070 str = " M";
2071 } else {
2072 str = " NOT";
2073 }
2074 }
2076 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2077 p, (void*) obj, str, str2);
2078 }
2079 };
2081 class PrintReachableObjectClosure : public ObjectClosure {
2082 private:
2083 CMBitMapRO* _bitmap;
2084 outputStream* _out;
2085 bool _use_prev_marking;
2086 bool _all;
2087 HeapRegion* _hr;
2089 public:
2090 PrintReachableObjectClosure(CMBitMapRO* bitmap,
2091 outputStream* out,
2092 bool use_prev_marking,
2093 bool all,
2094 HeapRegion* hr) :
2095 _bitmap(bitmap), _out(out),
2096 _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2098 void do_object(oop o) {
2099 bool over_tams;
2100 if (_use_prev_marking) {
2101 over_tams = _hr->obj_allocated_since_prev_marking(o);
2102 } else {
2103 over_tams = _hr->obj_allocated_since_next_marking(o);
2104 }
2105 bool marked = _bitmap->isMarked((HeapWord*) o);
2106 bool print_it = _all || over_tams || marked;
2108 if (print_it) {
2109 _out->print_cr(" "PTR_FORMAT"%s",
2110 o, (over_tams) ? " >" : (marked) ? " M" : "");
2111 PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2112 o->oop_iterate(&oopCl);
2113 }
2114 }
2115 };
2117 class PrintReachableRegionClosure : public HeapRegionClosure {
2118 private:
2119 CMBitMapRO* _bitmap;
2120 outputStream* _out;
2121 bool _use_prev_marking;
2122 bool _all;
2124 public:
2125 bool doHeapRegion(HeapRegion* hr) {
2126 HeapWord* b = hr->bottom();
2127 HeapWord* e = hr->end();
2128 HeapWord* t = hr->top();
2129 HeapWord* p = NULL;
2130 if (_use_prev_marking) {
2131 p = hr->prev_top_at_mark_start();
2132 } else {
2133 p = hr->next_top_at_mark_start();
2134 }
2135 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2136 "TAMS: "PTR_FORMAT, b, e, t, p);
2137 _out->cr();
2139 HeapWord* from = b;
2140 HeapWord* to = t;
2142 if (to > from) {
2143 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2144 _out->cr();
2145 PrintReachableObjectClosure ocl(_bitmap, _out,
2146 _use_prev_marking, _all, hr);
2147 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2148 _out->cr();
2149 }
2151 return false;
2152 }
2154 PrintReachableRegionClosure(CMBitMapRO* bitmap,
2155 outputStream* out,
2156 bool use_prev_marking,
2157 bool all) :
2158 _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2159 };
2161 void ConcurrentMark::print_reachable(const char* str,
2162 bool use_prev_marking,
2163 bool all) {
2164 gclog_or_tty->cr();
2165 gclog_or_tty->print_cr("== Doing heap dump... ");
2167 if (G1PrintReachableBaseFile == NULL) {
2168 gclog_or_tty->print_cr(" #### error: no base file defined");
2169 return;
2170 }
2172 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2173 (JVM_MAXPATHLEN - 1)) {
2174 gclog_or_tty->print_cr(" #### error: file name too long");
2175 return;
2176 }
2178 char file_name[JVM_MAXPATHLEN];
2179 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2180 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2182 fileStream fout(file_name);
2183 if (!fout.is_open()) {
2184 gclog_or_tty->print_cr(" #### error: could not open file");
2185 return;
2186 }
2188 outputStream* out = &fout;
2190 CMBitMapRO* bitmap = NULL;
2191 if (use_prev_marking) {
2192 bitmap = _prevMarkBitMap;
2193 } else {
2194 bitmap = _nextMarkBitMap;
2195 }
2197 out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2198 out->cr();
2200 out->print_cr("--- ITERATING OVER REGIONS");
2201 out->cr();
2202 PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2203 _g1h->heap_region_iterate(&rcl);
2204 out->cr();
2206 gclog_or_tty->print_cr(" done");
2207 gclog_or_tty->flush();
2208 }
2210 #endif // PRODUCT
2212 // This note is for drainAllSATBBuffers and the code in between.
2213 // In the future we could reuse a task to do this work during an
2214 // evacuation pause (since now tasks are not active and can be claimed
2215 // during an evacuation pause). This was a late change to the code and
2216 // is currently not being taken advantage of.
2218 class CMGlobalObjectClosure : public ObjectClosure {
2219 private:
2220 ConcurrentMark* _cm;
2222 public:
2223 void do_object(oop obj) {
2224 _cm->deal_with_reference(obj);
2225 }
2227 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2228 };
2230 void ConcurrentMark::deal_with_reference(oop obj) {
2231 if (verbose_high())
2232 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2233 (void*) obj);
2236 HeapWord* objAddr = (HeapWord*) obj;
2237 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2238 if (_g1h->is_in_g1_reserved(objAddr)) {
2239 assert(obj != NULL, "is_in_g1_reserved should ensure this");
2240 HeapRegion* hr = _g1h->heap_region_containing(obj);
2241 if (_g1h->is_obj_ill(obj, hr)) {
2242 if (verbose_high())
2243 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2244 "marked", (void*) obj);
2246 // we need to mark it first
2247 if (_nextMarkBitMap->parMark(objAddr)) {
2248 // No OrderAccess:store_load() is needed. It is implicit in the
2249 // CAS done in parMark(objAddr) above
2250 HeapWord* finger = _finger;
2251 if (objAddr < finger) {
2252 if (verbose_high())
2253 gclog_or_tty->print_cr("[global] below the global finger "
2254 "("PTR_FORMAT"), pushing it", finger);
2255 if (!mark_stack_push(obj)) {
2256 if (verbose_low())
2257 gclog_or_tty->print_cr("[global] global stack overflow during "
2258 "deal_with_reference");
2259 }
2260 }
2261 }
2262 }
2263 }
2264 }
2266 void ConcurrentMark::drainAllSATBBuffers() {
2267 CMGlobalObjectClosure oc(this);
2268 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2269 satb_mq_set.set_closure(&oc);
2271 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2272 if (verbose_medium())
2273 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2274 }
2276 // no need to check whether we should do this, as this is only
2277 // called during an evacuation pause
2278 satb_mq_set.iterate_closure_all_threads();
2280 satb_mq_set.set_closure(NULL);
2281 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2282 }
2284 void ConcurrentMark::markPrev(oop p) {
2285 // Note we are overriding the read-only view of the prev map here, via
2286 // the cast.
2287 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2288 }
2290 void ConcurrentMark::clear(oop p) {
2291 assert(p != NULL && p->is_oop(), "expected an oop");
2292 HeapWord* addr = (HeapWord*)p;
2293 assert(addr >= _nextMarkBitMap->startWord() ||
2294 addr < _nextMarkBitMap->endWord(), "in a region");
2296 _nextMarkBitMap->clear(addr);
2297 }
2299 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2300 // Note we are overriding the read-only view of the prev map here, via
2301 // the cast.
2302 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2303 _nextMarkBitMap->clearRange(mr);
2304 }
2306 HeapRegion*
2307 ConcurrentMark::claim_region(int task_num) {
2308 // "checkpoint" the finger
2309 HeapWord* finger = _finger;
2311 // _heap_end will not change underneath our feet; it only changes at
2312 // yield points.
2313 while (finger < _heap_end) {
2314 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2316 // is the gap between reading the finger and doing the CAS too long?
2318 HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2319 HeapWord* bottom = curr_region->bottom();
2320 HeapWord* end = curr_region->end();
2321 HeapWord* limit = curr_region->next_top_at_mark_start();
2323 if (verbose_low())
2324 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2325 "["PTR_FORMAT", "PTR_FORMAT"), "
2326 "limit = "PTR_FORMAT,
2327 task_num, curr_region, bottom, end, limit);
2329 HeapWord* res =
2330 (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2331 if (res == finger) {
2332 // we succeeded
2334 // notice that _finger == end cannot be guaranteed here since,
2335 // someone else might have moved the finger even further
2336 assert(_finger >= end, "the finger should have moved forward");
2338 if (verbose_low())
2339 gclog_or_tty->print_cr("[%d] we were successful with region = "
2340 PTR_FORMAT, task_num, curr_region);
2342 if (limit > bottom) {
2343 if (verbose_low())
2344 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2345 "returning it ", task_num, curr_region);
2346 return curr_region;
2347 } else {
2348 assert(limit == bottom,
2349 "the region limit should be at bottom");
2350 if (verbose_low())
2351 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2352 "returning NULL", task_num, curr_region);
2353 // we return NULL and the caller should try calling
2354 // claim_region() again.
2355 return NULL;
2356 }
2357 } else {
2358 assert(_finger > finger, "the finger should have moved forward");
2359 if (verbose_low())
2360 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2361 "global finger = "PTR_FORMAT", "
2362 "our finger = "PTR_FORMAT,
2363 task_num, _finger, finger);
2365 // read it again
2366 finger = _finger;
2367 }
2368 }
2370 return NULL;
2371 }
2373 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2374 bool result = false;
2375 for (int i = 0; i < (int)_max_task_num; ++i) {
2376 CMTask* the_task = _tasks[i];
2377 MemRegion mr = the_task->aborted_region();
2378 if (mr.start() != NULL) {
2379 assert(mr.end() != NULL, "invariant");
2380 assert(mr.word_size() > 0, "invariant");
2381 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2382 assert(hr != NULL, "invariant");
2383 if (hr->in_collection_set()) {
2384 // The region points into the collection set
2385 the_task->set_aborted_region(MemRegion());
2386 result = true;
2387 }
2388 }
2389 }
2390 return result;
2391 }
2393 bool ConcurrentMark::has_aborted_regions() {
2394 for (int i = 0; i < (int)_max_task_num; ++i) {
2395 CMTask* the_task = _tasks[i];
2396 MemRegion mr = the_task->aborted_region();
2397 if (mr.start() != NULL) {
2398 assert(mr.end() != NULL, "invariant");
2399 assert(mr.word_size() > 0, "invariant");
2400 return true;
2401 }
2402 }
2403 return false;
2404 }
2406 void ConcurrentMark::oops_do(OopClosure* cl) {
2407 if (_markStack.size() > 0 && verbose_low())
2408 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2409 "size = %d", _markStack.size());
2410 // we first iterate over the contents of the mark stack...
2411 _markStack.oops_do(cl);
2413 for (int i = 0; i < (int)_max_task_num; ++i) {
2414 OopTaskQueue* queue = _task_queues->queue((int)i);
2416 if (queue->size() > 0 && verbose_low())
2417 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2418 "size = %d", i, queue->size());
2420 // ...then over the contents of the all the task queues.
2421 queue->oops_do(cl);
2422 }
2424 // Invalidate any entries, that are in the region stack, that
2425 // point into the collection set
2426 if (_regionStack.invalidate_entries_into_cset()) {
2427 // otherwise, any gray objects copied during the evacuation pause
2428 // might not be visited.
2429 assert(_should_gray_objects, "invariant");
2430 }
2432 // Invalidate any aborted regions, recorded in the individual CM
2433 // tasks, that point into the collection set.
2434 if (invalidate_aborted_regions_in_cset()) {
2435 // otherwise, any gray objects copied during the evacuation pause
2436 // might not be visited.
2437 assert(_should_gray_objects, "invariant");
2438 }
2440 }
2442 void ConcurrentMark::clear_marking_state() {
2443 _markStack.setEmpty();
2444 _markStack.clear_overflow();
2445 _regionStack.setEmpty();
2446 _regionStack.clear_overflow();
2447 clear_has_overflown();
2448 _finger = _heap_start;
2450 for (int i = 0; i < (int)_max_task_num; ++i) {
2451 OopTaskQueue* queue = _task_queues->queue(i);
2452 queue->set_empty();
2453 // Clear any partial regions from the CMTasks
2454 _tasks[i]->clear_aborted_region();
2455 }
2456 }
2458 void ConcurrentMark::print_stats() {
2459 if (verbose_stats()) {
2460 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2461 for (size_t i = 0; i < _active_tasks; ++i) {
2462 _tasks[i]->print_stats();
2463 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2464 }
2465 }
2466 }
2468 class CSMarkOopClosure: public OopClosure {
2469 friend class CSMarkBitMapClosure;
2471 G1CollectedHeap* _g1h;
2472 CMBitMap* _bm;
2473 ConcurrentMark* _cm;
2474 oop* _ms;
2475 jint* _array_ind_stack;
2476 int _ms_size;
2477 int _ms_ind;
2478 int _array_increment;
2480 bool push(oop obj, int arr_ind = 0) {
2481 if (_ms_ind == _ms_size) {
2482 gclog_or_tty->print_cr("Mark stack is full.");
2483 return false;
2484 }
2485 _ms[_ms_ind] = obj;
2486 if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2487 _ms_ind++;
2488 return true;
2489 }
2491 oop pop() {
2492 if (_ms_ind == 0) return NULL;
2493 else {
2494 _ms_ind--;
2495 return _ms[_ms_ind];
2496 }
2497 }
2499 template <class T> bool drain() {
2500 while (_ms_ind > 0) {
2501 oop obj = pop();
2502 assert(obj != NULL, "Since index was non-zero.");
2503 if (obj->is_objArray()) {
2504 jint arr_ind = _array_ind_stack[_ms_ind];
2505 objArrayOop aobj = objArrayOop(obj);
2506 jint len = aobj->length();
2507 jint next_arr_ind = arr_ind + _array_increment;
2508 if (next_arr_ind < len) {
2509 push(obj, next_arr_ind);
2510 }
2511 // Now process this portion of this one.
2512 int lim = MIN2(next_arr_ind, len);
2513 for (int j = arr_ind; j < lim; j++) {
2514 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2515 }
2517 } else {
2518 obj->oop_iterate(this);
2519 }
2520 if (abort()) return false;
2521 }
2522 return true;
2523 }
2525 public:
2526 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2527 _g1h(G1CollectedHeap::heap()),
2528 _cm(cm),
2529 _bm(cm->nextMarkBitMap()),
2530 _ms_size(ms_size), _ms_ind(0),
2531 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2532 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2533 _array_increment(MAX2(ms_size/8, 16))
2534 {}
2536 ~CSMarkOopClosure() {
2537 FREE_C_HEAP_ARRAY(oop, _ms);
2538 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2539 }
2541 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2542 virtual void do_oop( oop* p) { do_oop_work(p); }
2544 template <class T> void do_oop_work(T* p) {
2545 T heap_oop = oopDesc::load_heap_oop(p);
2546 if (oopDesc::is_null(heap_oop)) return;
2547 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2548 if (obj->is_forwarded()) {
2549 // If the object has already been forwarded, we have to make sure
2550 // that it's marked. So follow the forwarding pointer. Note that
2551 // this does the right thing for self-forwarding pointers in the
2552 // evacuation failure case.
2553 obj = obj->forwardee();
2554 }
2555 HeapRegion* hr = _g1h->heap_region_containing(obj);
2556 if (hr != NULL) {
2557 if (hr->in_collection_set()) {
2558 if (_g1h->is_obj_ill(obj)) {
2559 _bm->mark((HeapWord*)obj);
2560 if (!push(obj)) {
2561 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2562 set_abort();
2563 }
2564 }
2565 } else {
2566 // Outside the collection set; we need to gray it
2567 _cm->deal_with_reference(obj);
2568 }
2569 }
2570 }
2571 };
2573 class CSMarkBitMapClosure: public BitMapClosure {
2574 G1CollectedHeap* _g1h;
2575 CMBitMap* _bitMap;
2576 ConcurrentMark* _cm;
2577 CSMarkOopClosure _oop_cl;
2578 public:
2579 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2580 _g1h(G1CollectedHeap::heap()),
2581 _bitMap(cm->nextMarkBitMap()),
2582 _oop_cl(cm, ms_size)
2583 {}
2585 ~CSMarkBitMapClosure() {}
2587 bool do_bit(size_t offset) {
2588 // convert offset into a HeapWord*
2589 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2590 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2591 "address out of range");
2592 assert(_bitMap->isMarked(addr), "tautology");
2593 oop obj = oop(addr);
2594 if (!obj->is_forwarded()) {
2595 if (!_oop_cl.push(obj)) return false;
2596 if (UseCompressedOops) {
2597 if (!_oop_cl.drain<narrowOop>()) return false;
2598 } else {
2599 if (!_oop_cl.drain<oop>()) return false;
2600 }
2601 }
2602 // Otherwise...
2603 return true;
2604 }
2605 };
2608 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2609 CMBitMap* _bm;
2610 CSMarkBitMapClosure _bit_cl;
2611 enum SomePrivateConstants {
2612 MSSize = 1000
2613 };
2614 bool _completed;
2615 public:
2616 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2617 _bm(cm->nextMarkBitMap()),
2618 _bit_cl(cm, MSSize),
2619 _completed(true)
2620 {}
2622 ~CompleteMarkingInCSHRClosure() {}
2624 bool doHeapRegion(HeapRegion* r) {
2625 if (!r->evacuation_failed()) {
2626 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2627 if (!mr.is_empty()) {
2628 if (!_bm->iterate(&_bit_cl, mr)) {
2629 _completed = false;
2630 return true;
2631 }
2632 }
2633 }
2634 return false;
2635 }
2637 bool completed() { return _completed; }
2638 };
2640 class ClearMarksInHRClosure: public HeapRegionClosure {
2641 CMBitMap* _bm;
2642 public:
2643 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2645 bool doHeapRegion(HeapRegion* r) {
2646 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2647 MemRegion usedMR = r->used_region();
2648 _bm->clearRange(r->used_region());
2649 }
2650 return false;
2651 }
2652 };
2654 void ConcurrentMark::complete_marking_in_collection_set() {
2655 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2657 if (!g1h->mark_in_progress()) {
2658 g1h->g1_policy()->record_mark_closure_time(0.0);
2659 return;
2660 }
2662 int i = 1;
2663 double start = os::elapsedTime();
2664 while (true) {
2665 i++;
2666 CompleteMarkingInCSHRClosure cmplt(this);
2667 g1h->collection_set_iterate(&cmplt);
2668 if (cmplt.completed()) break;
2669 }
2670 double end_time = os::elapsedTime();
2671 double elapsed_time_ms = (end_time - start) * 1000.0;
2672 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2674 ClearMarksInHRClosure clr(nextMarkBitMap());
2675 g1h->collection_set_iterate(&clr);
2676 }
2678 // The next two methods deal with the following optimisation. Some
2679 // objects are gray by being marked and located above the finger. If
2680 // they are copied, during an evacuation pause, below the finger then
2681 // the need to be pushed on the stack. The observation is that, if
2682 // there are no regions in the collection set located above the
2683 // finger, then the above cannot happen, hence we do not need to
2684 // explicitly gray any objects when copying them to below the
2685 // finger. The global stack will be scanned to ensure that, if it
2686 // points to objects being copied, it will update their
2687 // location. There is a tricky situation with the gray objects in
2688 // region stack that are being coped, however. See the comment in
2689 // newCSet().
2691 void ConcurrentMark::newCSet() {
2692 if (!concurrent_marking_in_progress())
2693 // nothing to do if marking is not in progress
2694 return;
2696 // find what the lowest finger is among the global and local fingers
2697 _min_finger = _finger;
2698 for (int i = 0; i < (int)_max_task_num; ++i) {
2699 CMTask* task = _tasks[i];
2700 HeapWord* task_finger = task->finger();
2701 if (task_finger != NULL && task_finger < _min_finger)
2702 _min_finger = task_finger;
2703 }
2705 _should_gray_objects = false;
2707 // This fixes a very subtle and fustrating bug. It might be the case
2708 // that, during en evacuation pause, heap regions that contain
2709 // objects that are gray (by being in regions contained in the
2710 // region stack) are included in the collection set. Since such gray
2711 // objects will be moved, and because it's not easy to redirect
2712 // region stack entries to point to a new location (because objects
2713 // in one region might be scattered to multiple regions after they
2714 // are copied), one option is to ensure that all marked objects
2715 // copied during a pause are pushed on the stack. Notice, however,
2716 // that this problem can only happen when the region stack is not
2717 // empty during an evacuation pause. So, we make the fix a bit less
2718 // conservative and ensure that regions are pushed on the stack,
2719 // irrespective whether all collection set regions are below the
2720 // finger, if the region stack is not empty. This is expected to be
2721 // a rare case, so I don't think it's necessary to be smarted about it.
2722 if (!region_stack_empty() || has_aborted_regions())
2723 _should_gray_objects = true;
2724 }
2726 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2727 if (!concurrent_marking_in_progress())
2728 return;
2730 HeapWord* region_end = hr->end();
2731 if (region_end > _min_finger)
2732 _should_gray_objects = true;
2733 }
2735 // abandon current marking iteration due to a Full GC
2736 void ConcurrentMark::abort() {
2737 // Clear all marks to force marking thread to do nothing
2738 _nextMarkBitMap->clearAll();
2739 // Empty mark stack
2740 clear_marking_state();
2741 for (int i = 0; i < (int)_max_task_num; ++i) {
2742 _tasks[i]->clear_region_fields();
2743 }
2744 _has_aborted = true;
2746 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2747 satb_mq_set.abandon_partial_marking();
2748 // This can be called either during or outside marking, we'll read
2749 // the expected_active value from the SATB queue set.
2750 satb_mq_set.set_active_all_threads(
2751 false, /* new active value */
2752 satb_mq_set.is_active() /* expected_active */);
2753 }
2755 static void print_ms_time_info(const char* prefix, const char* name,
2756 NumberSeq& ns) {
2757 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2758 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2759 if (ns.num() > 0) {
2760 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
2761 prefix, ns.sd(), ns.maximum());
2762 }
2763 }
2765 void ConcurrentMark::print_summary_info() {
2766 gclog_or_tty->print_cr(" Concurrent marking:");
2767 print_ms_time_info(" ", "init marks", _init_times);
2768 print_ms_time_info(" ", "remarks", _remark_times);
2769 {
2770 print_ms_time_info(" ", "final marks", _remark_mark_times);
2771 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
2773 }
2774 print_ms_time_info(" ", "cleanups", _cleanup_times);
2775 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
2776 _total_counting_time,
2777 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2778 (double)_cleanup_times.num()
2779 : 0.0));
2780 if (G1ScrubRemSets) {
2781 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
2782 _total_rs_scrub_time,
2783 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2784 (double)_cleanup_times.num()
2785 : 0.0));
2786 }
2787 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
2788 (_init_times.sum() + _remark_times.sum() +
2789 _cleanup_times.sum())/1000.0);
2790 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
2791 "(%8.2f s marking, %8.2f s counting).",
2792 cmThread()->vtime_accum(),
2793 cmThread()->vtime_mark_accum(),
2794 cmThread()->vtime_count_accum());
2795 }
2797 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2798 _parallel_workers->print_worker_threads_on(st);
2799 }
2801 // Closures
2802 // XXX: there seems to be a lot of code duplication here;
2803 // should refactor and consolidate the shared code.
2805 // This closure is used to mark refs into the CMS generation in
2806 // the CMS bit map. Called at the first checkpoint.
2808 // We take a break if someone is trying to stop the world.
2809 bool ConcurrentMark::do_yield_check(int worker_i) {
2810 if (should_yield()) {
2811 if (worker_i == 0)
2812 _g1h->g1_policy()->record_concurrent_pause();
2813 cmThread()->yield();
2814 if (worker_i == 0)
2815 _g1h->g1_policy()->record_concurrent_pause_end();
2816 return true;
2817 } else {
2818 return false;
2819 }
2820 }
2822 bool ConcurrentMark::should_yield() {
2823 return cmThread()->should_yield();
2824 }
2826 bool ConcurrentMark::containing_card_is_marked(void* p) {
2827 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2828 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2829 }
2831 bool ConcurrentMark::containing_cards_are_marked(void* start,
2832 void* last) {
2833 return
2834 containing_card_is_marked(start) &&
2835 containing_card_is_marked(last);
2836 }
2838 #ifndef PRODUCT
2839 // for debugging purposes
2840 void ConcurrentMark::print_finger() {
2841 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2842 _heap_start, _heap_end, _finger);
2843 for (int i = 0; i < (int) _max_task_num; ++i) {
2844 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
2845 }
2846 gclog_or_tty->print_cr("");
2847 }
2848 #endif
2850 // Closure for iteration over bitmaps
2851 class CMBitMapClosure : public BitMapClosure {
2852 private:
2853 // the bitmap that is being iterated over
2854 CMBitMap* _nextMarkBitMap;
2855 ConcurrentMark* _cm;
2856 CMTask* _task;
2857 // true if we're scanning a heap region claimed by the task (so that
2858 // we move the finger along), false if we're not, i.e. currently when
2859 // scanning a heap region popped from the region stack (so that we
2860 // do not move the task finger along; it'd be a mistake if we did so).
2861 bool _scanning_heap_region;
2863 public:
2864 CMBitMapClosure(CMTask *task,
2865 ConcurrentMark* cm,
2866 CMBitMap* nextMarkBitMap)
2867 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2869 void set_scanning_heap_region(bool scanning_heap_region) {
2870 _scanning_heap_region = scanning_heap_region;
2871 }
2873 bool do_bit(size_t offset) {
2874 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2875 assert(_nextMarkBitMap->isMarked(addr), "invariant");
2876 assert( addr < _cm->finger(), "invariant");
2878 if (_scanning_heap_region) {
2879 statsOnly( _task->increase_objs_found_on_bitmap() );
2880 assert(addr >= _task->finger(), "invariant");
2881 // We move that task's local finger along.
2882 _task->move_finger_to(addr);
2883 } else {
2884 // We move the task's region finger along.
2885 _task->move_region_finger_to(addr);
2886 }
2888 _task->scan_object(oop(addr));
2889 // we only partially drain the local queue and global stack
2890 _task->drain_local_queue(true);
2891 _task->drain_global_stack(true);
2893 // if the has_aborted flag has been raised, we need to bail out of
2894 // the iteration
2895 return !_task->has_aborted();
2896 }
2897 };
2899 // Closure for iterating over objects, currently only used for
2900 // processing SATB buffers.
2901 class CMObjectClosure : public ObjectClosure {
2902 private:
2903 CMTask* _task;
2905 public:
2906 void do_object(oop obj) {
2907 _task->deal_with_reference(obj);
2908 }
2910 CMObjectClosure(CMTask* task) : _task(task) { }
2911 };
2913 // Closure for iterating over object fields
2914 class CMOopClosure : public OopClosure {
2915 private:
2916 G1CollectedHeap* _g1h;
2917 ConcurrentMark* _cm;
2918 CMTask* _task;
2920 public:
2921 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2922 virtual void do_oop( oop* p) { do_oop_work(p); }
2924 template <class T> void do_oop_work(T* p) {
2925 assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2926 assert(!_g1h->is_on_free_list(
2927 _g1h->heap_region_containing((HeapWord*) p)), "invariant");
2929 oop obj = oopDesc::load_decode_heap_oop(p);
2930 if (_cm->verbose_high())
2931 gclog_or_tty->print_cr("[%d] we're looking at location "
2932 "*"PTR_FORMAT" = "PTR_FORMAT,
2933 _task->task_id(), p, (void*) obj);
2934 _task->deal_with_reference(obj);
2935 }
2937 CMOopClosure(G1CollectedHeap* g1h,
2938 ConcurrentMark* cm,
2939 CMTask* task)
2940 : _g1h(g1h), _cm(cm), _task(task)
2941 {
2942 _ref_processor = g1h->ref_processor();
2943 assert(_ref_processor != NULL, "should not be NULL");
2944 }
2945 };
2947 void CMTask::setup_for_region(HeapRegion* hr) {
2948 // Separated the asserts so that we know which one fires.
2949 assert(hr != NULL,
2950 "claim_region() should have filtered out continues humongous regions");
2951 assert(!hr->continuesHumongous(),
2952 "claim_region() should have filtered out continues humongous regions");
2954 if (_cm->verbose_low())
2955 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2956 _task_id, hr);
2958 _curr_region = hr;
2959 _finger = hr->bottom();
2960 update_region_limit();
2961 }
2963 void CMTask::update_region_limit() {
2964 HeapRegion* hr = _curr_region;
2965 HeapWord* bottom = hr->bottom();
2966 HeapWord* limit = hr->next_top_at_mark_start();
2968 if (limit == bottom) {
2969 if (_cm->verbose_low())
2970 gclog_or_tty->print_cr("[%d] found an empty region "
2971 "["PTR_FORMAT", "PTR_FORMAT")",
2972 _task_id, bottom, limit);
2973 // The region was collected underneath our feet.
2974 // We set the finger to bottom to ensure that the bitmap
2975 // iteration that will follow this will not do anything.
2976 // (this is not a condition that holds when we set the region up,
2977 // as the region is not supposed to be empty in the first place)
2978 _finger = bottom;
2979 } else if (limit >= _region_limit) {
2980 assert(limit >= _finger, "peace of mind");
2981 } else {
2982 assert(limit < _region_limit, "only way to get here");
2983 // This can happen under some pretty unusual circumstances. An
2984 // evacuation pause empties the region underneath our feet (NTAMS
2985 // at bottom). We then do some allocation in the region (NTAMS
2986 // stays at bottom), followed by the region being used as a GC
2987 // alloc region (NTAMS will move to top() and the objects
2988 // originally below it will be grayed). All objects now marked in
2989 // the region are explicitly grayed, if below the global finger,
2990 // and we do not need in fact to scan anything else. So, we simply
2991 // set _finger to be limit to ensure that the bitmap iteration
2992 // doesn't do anything.
2993 _finger = limit;
2994 }
2996 _region_limit = limit;
2997 }
2999 void CMTask::giveup_current_region() {
3000 assert(_curr_region != NULL, "invariant");
3001 if (_cm->verbose_low())
3002 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3003 _task_id, _curr_region);
3004 clear_region_fields();
3005 }
3007 void CMTask::clear_region_fields() {
3008 // Values for these three fields that indicate that we're not
3009 // holding on to a region.
3010 _curr_region = NULL;
3011 _finger = NULL;
3012 _region_limit = NULL;
3014 _region_finger = NULL;
3015 }
3017 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3018 guarantee(nextMarkBitMap != NULL, "invariant");
3020 if (_cm->verbose_low())
3021 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3023 _nextMarkBitMap = nextMarkBitMap;
3024 clear_region_fields();
3025 assert(_aborted_region.is_empty(), "should have been cleared");
3027 _calls = 0;
3028 _elapsed_time_ms = 0.0;
3029 _termination_time_ms = 0.0;
3030 _termination_start_time_ms = 0.0;
3032 #if _MARKING_STATS_
3033 _local_pushes = 0;
3034 _local_pops = 0;
3035 _local_max_size = 0;
3036 _objs_scanned = 0;
3037 _global_pushes = 0;
3038 _global_pops = 0;
3039 _global_max_size = 0;
3040 _global_transfers_to = 0;
3041 _global_transfers_from = 0;
3042 _region_stack_pops = 0;
3043 _regions_claimed = 0;
3044 _objs_found_on_bitmap = 0;
3045 _satb_buffers_processed = 0;
3046 _steal_attempts = 0;
3047 _steals = 0;
3048 _aborted = 0;
3049 _aborted_overflow = 0;
3050 _aborted_cm_aborted = 0;
3051 _aborted_yield = 0;
3052 _aborted_timed_out = 0;
3053 _aborted_satb = 0;
3054 _aborted_termination = 0;
3055 #endif // _MARKING_STATS_
3056 }
3058 bool CMTask::should_exit_termination() {
3059 regular_clock_call();
3060 // This is called when we are in the termination protocol. We should
3061 // quit if, for some reason, this task wants to abort or the global
3062 // stack is not empty (this means that we can get work from it).
3063 return !_cm->mark_stack_empty() || has_aborted();
3064 }
3066 // This determines whether the method below will check both the local
3067 // and global fingers when determining whether to push on the stack a
3068 // gray object (value 1) or whether it will only check the global one
3069 // (value 0). The tradeoffs are that the former will be a bit more
3070 // accurate and possibly push less on the stack, but it might also be
3071 // a little bit slower.
3073 #define _CHECK_BOTH_FINGERS_ 1
3075 void CMTask::deal_with_reference(oop obj) {
3076 if (_cm->verbose_high())
3077 gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3078 _task_id, (void*) obj);
3080 ++_refs_reached;
3082 HeapWord* objAddr = (HeapWord*) obj;
3083 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3084 if (_g1h->is_in_g1_reserved(objAddr)) {
3085 assert(obj != NULL, "is_in_g1_reserved should ensure this");
3086 HeapRegion* hr = _g1h->heap_region_containing(obj);
3087 if (_g1h->is_obj_ill(obj, hr)) {
3088 if (_cm->verbose_high())
3089 gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3090 _task_id, (void*) obj);
3092 // we need to mark it first
3093 if (_nextMarkBitMap->parMark(objAddr)) {
3094 // No OrderAccess:store_load() is needed. It is implicit in the
3095 // CAS done in parMark(objAddr) above
3096 HeapWord* global_finger = _cm->finger();
3098 #if _CHECK_BOTH_FINGERS_
3099 // we will check both the local and global fingers
3101 if (_finger != NULL && objAddr < _finger) {
3102 if (_cm->verbose_high())
3103 gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3104 "pushing it", _task_id, _finger);
3105 push(obj);
3106 } else if (_curr_region != NULL && objAddr < _region_limit) {
3107 // do nothing
3108 } else if (objAddr < global_finger) {
3109 // Notice that the global finger might be moving forward
3110 // concurrently. This is not a problem. In the worst case, we
3111 // mark the object while it is above the global finger and, by
3112 // the time we read the global finger, it has moved forward
3113 // passed this object. In this case, the object will probably
3114 // be visited when a task is scanning the region and will also
3115 // be pushed on the stack. So, some duplicate work, but no
3116 // correctness problems.
3118 if (_cm->verbose_high())
3119 gclog_or_tty->print_cr("[%d] below the global finger "
3120 "("PTR_FORMAT"), pushing it",
3121 _task_id, global_finger);
3122 push(obj);
3123 } else {
3124 // do nothing
3125 }
3126 #else // _CHECK_BOTH_FINGERS_
3127 // we will only check the global finger
3129 if (objAddr < global_finger) {
3130 // see long comment above
3132 if (_cm->verbose_high())
3133 gclog_or_tty->print_cr("[%d] below the global finger "
3134 "("PTR_FORMAT"), pushing it",
3135 _task_id, global_finger);
3136 push(obj);
3137 }
3138 #endif // _CHECK_BOTH_FINGERS_
3139 }
3140 }
3141 }
3142 }
3144 void CMTask::push(oop obj) {
3145 HeapWord* objAddr = (HeapWord*) obj;
3146 assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3147 assert(!_g1h->is_on_free_list(
3148 _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3149 assert(!_g1h->is_obj_ill(obj), "invariant");
3150 assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3152 if (_cm->verbose_high())
3153 gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3155 if (!_task_queue->push(obj)) {
3156 // The local task queue looks full. We need to push some entries
3157 // to the global stack.
3159 if (_cm->verbose_medium())
3160 gclog_or_tty->print_cr("[%d] task queue overflow, "
3161 "moving entries to the global stack",
3162 _task_id);
3163 move_entries_to_global_stack();
3165 // this should succeed since, even if we overflow the global
3166 // stack, we should have definitely removed some entries from the
3167 // local queue. So, there must be space on it.
3168 bool success = _task_queue->push(obj);
3169 assert(success, "invariant");
3170 }
3172 statsOnly( int tmp_size = _task_queue->size();
3173 if (tmp_size > _local_max_size)
3174 _local_max_size = tmp_size;
3175 ++_local_pushes );
3176 }
3178 void CMTask::reached_limit() {
3179 assert(_words_scanned >= _words_scanned_limit ||
3180 _refs_reached >= _refs_reached_limit ,
3181 "shouldn't have been called otherwise");
3182 regular_clock_call();
3183 }
3185 void CMTask::regular_clock_call() {
3186 if (has_aborted())
3187 return;
3189 // First, we need to recalculate the words scanned and refs reached
3190 // limits for the next clock call.
3191 recalculate_limits();
3193 // During the regular clock call we do the following
3195 // (1) If an overflow has been flagged, then we abort.
3196 if (_cm->has_overflown()) {
3197 set_has_aborted();
3198 return;
3199 }
3201 // If we are not concurrent (i.e. we're doing remark) we don't need
3202 // to check anything else. The other steps are only needed during
3203 // the concurrent marking phase.
3204 if (!concurrent())
3205 return;
3207 // (2) If marking has been aborted for Full GC, then we also abort.
3208 if (_cm->has_aborted()) {
3209 set_has_aborted();
3210 statsOnly( ++_aborted_cm_aborted );
3211 return;
3212 }
3214 double curr_time_ms = os::elapsedVTime() * 1000.0;
3216 // (3) If marking stats are enabled, then we update the step history.
3217 #if _MARKING_STATS_
3218 if (_words_scanned >= _words_scanned_limit)
3219 ++_clock_due_to_scanning;
3220 if (_refs_reached >= _refs_reached_limit)
3221 ++_clock_due_to_marking;
3223 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3224 _interval_start_time_ms = curr_time_ms;
3225 _all_clock_intervals_ms.add(last_interval_ms);
3227 if (_cm->verbose_medium()) {
3228 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3229 "scanned = %d%s, refs reached = %d%s",
3230 _task_id, last_interval_ms,
3231 _words_scanned,
3232 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3233 _refs_reached,
3234 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3235 }
3236 #endif // _MARKING_STATS_
3238 // (4) We check whether we should yield. If we have to, then we abort.
3239 if (_cm->should_yield()) {
3240 // We should yield. To do this we abort the task. The caller is
3241 // responsible for yielding.
3242 set_has_aborted();
3243 statsOnly( ++_aborted_yield );
3244 return;
3245 }
3247 // (5) We check whether we've reached our time quota. If we have,
3248 // then we abort.
3249 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3250 if (elapsed_time_ms > _time_target_ms) {
3251 set_has_aborted();
3252 _has_aborted_timed_out = true;
3253 statsOnly( ++_aborted_timed_out );
3254 return;
3255 }
3257 // (6) Finally, we check whether there are enough completed STAB
3258 // buffers available for processing. If there are, we abort.
3259 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3260 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3261 if (_cm->verbose_low())
3262 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3263 _task_id);
3264 // we do need to process SATB buffers, we'll abort and restart
3265 // the marking task to do so
3266 set_has_aborted();
3267 statsOnly( ++_aborted_satb );
3268 return;
3269 }
3270 }
3272 void CMTask::recalculate_limits() {
3273 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3274 _words_scanned_limit = _real_words_scanned_limit;
3276 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3277 _refs_reached_limit = _real_refs_reached_limit;
3278 }
3280 void CMTask::decrease_limits() {
3281 // This is called when we believe that we're going to do an infrequent
3282 // operation which will increase the per byte scanned cost (i.e. move
3283 // entries to/from the global stack). It basically tries to decrease the
3284 // scanning limit so that the clock is called earlier.
3286 if (_cm->verbose_medium())
3287 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3289 _words_scanned_limit = _real_words_scanned_limit -
3290 3 * words_scanned_period / 4;
3291 _refs_reached_limit = _real_refs_reached_limit -
3292 3 * refs_reached_period / 4;
3293 }
3295 void CMTask::move_entries_to_global_stack() {
3296 // local array where we'll store the entries that will be popped
3297 // from the local queue
3298 oop buffer[global_stack_transfer_size];
3300 int n = 0;
3301 oop obj;
3302 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3303 buffer[n] = obj;
3304 ++n;
3305 }
3307 if (n > 0) {
3308 // we popped at least one entry from the local queue
3310 statsOnly( ++_global_transfers_to; _local_pops += n );
3312 if (!_cm->mark_stack_push(buffer, n)) {
3313 if (_cm->verbose_low())
3314 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3315 set_has_aborted();
3316 } else {
3317 // the transfer was successful
3319 if (_cm->verbose_medium())
3320 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3321 _task_id, n);
3322 statsOnly( int tmp_size = _cm->mark_stack_size();
3323 if (tmp_size > _global_max_size)
3324 _global_max_size = tmp_size;
3325 _global_pushes += n );
3326 }
3327 }
3329 // this operation was quite expensive, so decrease the limits
3330 decrease_limits();
3331 }
3333 void CMTask::get_entries_from_global_stack() {
3334 // local array where we'll store the entries that will be popped
3335 // from the global stack.
3336 oop buffer[global_stack_transfer_size];
3337 int n;
3338 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3339 assert(n <= global_stack_transfer_size,
3340 "we should not pop more than the given limit");
3341 if (n > 0) {
3342 // yes, we did actually pop at least one entry
3344 statsOnly( ++_global_transfers_from; _global_pops += n );
3345 if (_cm->verbose_medium())
3346 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3347 _task_id, n);
3348 for (int i = 0; i < n; ++i) {
3349 bool success = _task_queue->push(buffer[i]);
3350 // We only call this when the local queue is empty or under a
3351 // given target limit. So, we do not expect this push to fail.
3352 assert(success, "invariant");
3353 }
3355 statsOnly( int tmp_size = _task_queue->size();
3356 if (tmp_size > _local_max_size)
3357 _local_max_size = tmp_size;
3358 _local_pushes += n );
3359 }
3361 // this operation was quite expensive, so decrease the limits
3362 decrease_limits();
3363 }
3365 void CMTask::drain_local_queue(bool partially) {
3366 if (has_aborted())
3367 return;
3369 // Decide what the target size is, depending whether we're going to
3370 // drain it partially (so that other tasks can steal if they run out
3371 // of things to do) or totally (at the very end).
3372 size_t target_size;
3373 if (partially)
3374 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3375 else
3376 target_size = 0;
3378 if (_task_queue->size() > target_size) {
3379 if (_cm->verbose_high())
3380 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3381 _task_id, target_size);
3383 oop obj;
3384 bool ret = _task_queue->pop_local(obj);
3385 while (ret) {
3386 statsOnly( ++_local_pops );
3388 if (_cm->verbose_high())
3389 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3390 (void*) obj);
3392 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3393 assert(!_g1h->is_on_free_list(
3394 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3396 scan_object(obj);
3398 if (_task_queue->size() <= target_size || has_aborted())
3399 ret = false;
3400 else
3401 ret = _task_queue->pop_local(obj);
3402 }
3404 if (_cm->verbose_high())
3405 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3406 _task_id, _task_queue->size());
3407 }
3408 }
3410 void CMTask::drain_global_stack(bool partially) {
3411 if (has_aborted())
3412 return;
3414 // We have a policy to drain the local queue before we attempt to
3415 // drain the global stack.
3416 assert(partially || _task_queue->size() == 0, "invariant");
3418 // Decide what the target size is, depending whether we're going to
3419 // drain it partially (so that other tasks can steal if they run out
3420 // of things to do) or totally (at the very end). Notice that,
3421 // because we move entries from the global stack in chunks or
3422 // because another task might be doing the same, we might in fact
3423 // drop below the target. But, this is not a problem.
3424 size_t target_size;
3425 if (partially)
3426 target_size = _cm->partial_mark_stack_size_target();
3427 else
3428 target_size = 0;
3430 if (_cm->mark_stack_size() > target_size) {
3431 if (_cm->verbose_low())
3432 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3433 _task_id, target_size);
3435 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3436 get_entries_from_global_stack();
3437 drain_local_queue(partially);
3438 }
3440 if (_cm->verbose_low())
3441 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3442 _task_id, _cm->mark_stack_size());
3443 }
3444 }
3446 // SATB Queue has several assumptions on whether to call the par or
3447 // non-par versions of the methods. this is why some of the code is
3448 // replicated. We should really get rid of the single-threaded version
3449 // of the code to simplify things.
3450 void CMTask::drain_satb_buffers() {
3451 if (has_aborted())
3452 return;
3454 // We set this so that the regular clock knows that we're in the
3455 // middle of draining buffers and doesn't set the abort flag when it
3456 // notices that SATB buffers are available for draining. It'd be
3457 // very counter productive if it did that. :-)
3458 _draining_satb_buffers = true;
3460 CMObjectClosure oc(this);
3461 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3462 if (G1CollectedHeap::use_parallel_gc_threads())
3463 satb_mq_set.set_par_closure(_task_id, &oc);
3464 else
3465 satb_mq_set.set_closure(&oc);
3467 // This keeps claiming and applying the closure to completed buffers
3468 // until we run out of buffers or we need to abort.
3469 if (G1CollectedHeap::use_parallel_gc_threads()) {
3470 while (!has_aborted() &&
3471 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3472 if (_cm->verbose_medium())
3473 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3474 statsOnly( ++_satb_buffers_processed );
3475 regular_clock_call();
3476 }
3477 } else {
3478 while (!has_aborted() &&
3479 satb_mq_set.apply_closure_to_completed_buffer()) {
3480 if (_cm->verbose_medium())
3481 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3482 statsOnly( ++_satb_buffers_processed );
3483 regular_clock_call();
3484 }
3485 }
3487 if (!concurrent() && !has_aborted()) {
3488 // We should only do this during remark.
3489 if (G1CollectedHeap::use_parallel_gc_threads())
3490 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3491 else
3492 satb_mq_set.iterate_closure_all_threads();
3493 }
3495 _draining_satb_buffers = false;
3497 assert(has_aborted() ||
3498 concurrent() ||
3499 satb_mq_set.completed_buffers_num() == 0, "invariant");
3501 if (G1CollectedHeap::use_parallel_gc_threads())
3502 satb_mq_set.set_par_closure(_task_id, NULL);
3503 else
3504 satb_mq_set.set_closure(NULL);
3506 // again, this was a potentially expensive operation, decrease the
3507 // limits to get the regular clock call early
3508 decrease_limits();
3509 }
3511 void CMTask::drain_region_stack(BitMapClosure* bc) {
3512 if (has_aborted())
3513 return;
3515 assert(_region_finger == NULL,
3516 "it should be NULL when we're not scanning a region");
3518 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3519 if (_cm->verbose_low())
3520 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3521 _task_id, _cm->region_stack_size());
3523 MemRegion mr;
3525 if (!_aborted_region.is_empty()) {
3526 mr = _aborted_region;
3527 _aborted_region = MemRegion();
3529 if (_cm->verbose_low())
3530 gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3531 _task_id, mr.start(), mr.end());
3532 } else {
3533 mr = _cm->region_stack_pop_lock_free();
3534 // it returns MemRegion() if the pop fails
3535 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3536 }
3538 while (mr.start() != NULL) {
3539 if (_cm->verbose_medium())
3540 gclog_or_tty->print_cr("[%d] we are scanning region "
3541 "["PTR_FORMAT", "PTR_FORMAT")",
3542 _task_id, mr.start(), mr.end());
3544 assert(mr.end() <= _cm->finger(),
3545 "otherwise the region shouldn't be on the stack");
3546 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3547 if (_nextMarkBitMap->iterate(bc, mr)) {
3548 assert(!has_aborted(),
3549 "cannot abort the task without aborting the bitmap iteration");
3551 // We finished iterating over the region without aborting.
3552 regular_clock_call();
3553 if (has_aborted())
3554 mr = MemRegion();
3555 else {
3556 mr = _cm->region_stack_pop_lock_free();
3557 // it returns MemRegion() if the pop fails
3558 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3559 }
3560 } else {
3561 assert(has_aborted(), "currently the only way to do so");
3563 // The only way to abort the bitmap iteration is to return
3564 // false from the do_bit() method. However, inside the
3565 // do_bit() method we move the _region_finger to point to the
3566 // object currently being looked at. So, if we bail out, we
3567 // have definitely set _region_finger to something non-null.
3568 assert(_region_finger != NULL, "invariant");
3570 // Make sure that any previously aborted region has been
3571 // cleared.
3572 assert(_aborted_region.is_empty(), "aborted region not cleared");
3574 // The iteration was actually aborted. So now _region_finger
3575 // points to the address of the object we last scanned. If we
3576 // leave it there, when we restart this task, we will rescan
3577 // the object. It is easy to avoid this. We move the finger by
3578 // enough to point to the next possible object header (the
3579 // bitmap knows by how much we need to move it as it knows its
3580 // granularity).
3581 MemRegion newRegion =
3582 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3584 if (!newRegion.is_empty()) {
3585 if (_cm->verbose_low()) {
3586 gclog_or_tty->print_cr("[%d] recording unscanned region"
3587 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3588 _task_id,
3589 newRegion.start(), newRegion.end());
3590 }
3591 // Now record the part of the region we didn't scan to
3592 // make sure this task scans it later.
3593 _aborted_region = newRegion;
3594 }
3595 // break from while
3596 mr = MemRegion();
3597 }
3598 _region_finger = NULL;
3599 }
3601 if (_cm->verbose_low())
3602 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3603 _task_id, _cm->region_stack_size());
3604 }
3605 }
3607 void CMTask::print_stats() {
3608 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3609 _task_id, _calls);
3610 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3611 _elapsed_time_ms, _termination_time_ms);
3612 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3613 _step_times_ms.num(), _step_times_ms.avg(),
3614 _step_times_ms.sd());
3615 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3616 _step_times_ms.maximum(), _step_times_ms.sum());
3618 #if _MARKING_STATS_
3619 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3620 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3621 _all_clock_intervals_ms.sd());
3622 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3623 _all_clock_intervals_ms.maximum(),
3624 _all_clock_intervals_ms.sum());
3625 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3626 _clock_due_to_scanning, _clock_due_to_marking);
3627 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3628 _objs_scanned, _objs_found_on_bitmap);
3629 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3630 _local_pushes, _local_pops, _local_max_size);
3631 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3632 _global_pushes, _global_pops, _global_max_size);
3633 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3634 _global_transfers_to,_global_transfers_from);
3635 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3636 _regions_claimed, _region_stack_pops);
3637 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3638 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3639 _steal_attempts, _steals);
3640 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3641 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3642 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3643 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3644 _aborted_timed_out, _aborted_satb, _aborted_termination);
3645 #endif // _MARKING_STATS_
3646 }
3648 /*****************************************************************************
3650 The do_marking_step(time_target_ms) method is the building block
3651 of the parallel marking framework. It can be called in parallel
3652 with other invocations of do_marking_step() on different tasks
3653 (but only one per task, obviously) and concurrently with the
3654 mutator threads, or during remark, hence it eliminates the need
3655 for two versions of the code. When called during remark, it will
3656 pick up from where the task left off during the concurrent marking
3657 phase. Interestingly, tasks are also claimable during evacuation
3658 pauses too, since do_marking_step() ensures that it aborts before
3659 it needs to yield.
3661 The data structures that is uses to do marking work are the
3662 following:
3664 (1) Marking Bitmap. If there are gray objects that appear only
3665 on the bitmap (this happens either when dealing with an overflow
3666 or when the initial marking phase has simply marked the roots
3667 and didn't push them on the stack), then tasks claim heap
3668 regions whose bitmap they then scan to find gray objects. A
3669 global finger indicates where the end of the last claimed region
3670 is. A local finger indicates how far into the region a task has
3671 scanned. The two fingers are used to determine how to gray an
3672 object (i.e. whether simply marking it is OK, as it will be
3673 visited by a task in the future, or whether it needs to be also
3674 pushed on a stack).
3676 (2) Local Queue. The local queue of the task which is accessed
3677 reasonably efficiently by the task. Other tasks can steal from
3678 it when they run out of work. Throughout the marking phase, a
3679 task attempts to keep its local queue short but not totally
3680 empty, so that entries are available for stealing by other
3681 tasks. Only when there is no more work, a task will totally
3682 drain its local queue.
3684 (3) Global Mark Stack. This handles local queue overflow. During
3685 marking only sets of entries are moved between it and the local
3686 queues, as access to it requires a mutex and more fine-grain
3687 interaction with it which might cause contention. If it
3688 overflows, then the marking phase should restart and iterate
3689 over the bitmap to identify gray objects. Throughout the marking
3690 phase, tasks attempt to keep the global mark stack at a small
3691 length but not totally empty, so that entries are available for
3692 popping by other tasks. Only when there is no more work, tasks
3693 will totally drain the global mark stack.
3695 (4) Global Region Stack. Entries on it correspond to areas of
3696 the bitmap that need to be scanned since they contain gray
3697 objects. Pushes on the region stack only happen during
3698 evacuation pauses and typically correspond to areas covered by
3699 GC LABS. If it overflows, then the marking phase should restart
3700 and iterate over the bitmap to identify gray objects. Tasks will
3701 try to totally drain the region stack as soon as possible.
3703 (5) SATB Buffer Queue. This is where completed SATB buffers are
3704 made available. Buffers are regularly removed from this queue
3705 and scanned for roots, so that the queue doesn't get too
3706 long. During remark, all completed buffers are processed, as
3707 well as the filled in parts of any uncompleted buffers.
3709 The do_marking_step() method tries to abort when the time target
3710 has been reached. There are a few other cases when the
3711 do_marking_step() method also aborts:
3713 (1) When the marking phase has been aborted (after a Full GC).
3715 (2) When a global overflow (either on the global stack or the
3716 region stack) has been triggered. Before the task aborts, it
3717 will actually sync up with the other tasks to ensure that all
3718 the marking data structures (local queues, stacks, fingers etc.)
3719 are re-initialised so that when do_marking_step() completes,
3720 the marking phase can immediately restart.
3722 (3) When enough completed SATB buffers are available. The
3723 do_marking_step() method only tries to drain SATB buffers right
3724 at the beginning. So, if enough buffers are available, the
3725 marking step aborts and the SATB buffers are processed at
3726 the beginning of the next invocation.
3728 (4) To yield. when we have to yield then we abort and yield
3729 right at the end of do_marking_step(). This saves us from a lot
3730 of hassle as, by yielding we might allow a Full GC. If this
3731 happens then objects will be compacted underneath our feet, the
3732 heap might shrink, etc. We save checking for this by just
3733 aborting and doing the yield right at the end.
3735 From the above it follows that the do_marking_step() method should
3736 be called in a loop (or, otherwise, regularly) until it completes.
3738 If a marking step completes without its has_aborted() flag being
3739 true, it means it has completed the current marking phase (and
3740 also all other marking tasks have done so and have all synced up).
3742 A method called regular_clock_call() is invoked "regularly" (in
3743 sub ms intervals) throughout marking. It is this clock method that
3744 checks all the abort conditions which were mentioned above and
3745 decides when the task should abort. A work-based scheme is used to
3746 trigger this clock method: when the number of object words the
3747 marking phase has scanned or the number of references the marking
3748 phase has visited reach a given limit. Additional invocations to
3749 the method clock have been planted in a few other strategic places
3750 too. The initial reason for the clock method was to avoid calling
3751 vtime too regularly, as it is quite expensive. So, once it was in
3752 place, it was natural to piggy-back all the other conditions on it
3753 too and not constantly check them throughout the code.
3755 *****************************************************************************/
3757 void CMTask::do_marking_step(double time_target_ms) {
3758 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3759 assert(concurrent() == _cm->concurrent(), "they should be the same");
3761 assert(concurrent() || _cm->region_stack_empty(),
3762 "the region stack should have been cleared before remark");
3763 assert(concurrent() || !_cm->has_aborted_regions(),
3764 "aborted regions should have been cleared before remark");
3765 assert(_region_finger == NULL,
3766 "this should be non-null only when a region is being scanned");
3768 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3769 assert(_task_queues != NULL, "invariant");
3770 assert(_task_queue != NULL, "invariant");
3771 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3773 assert(!_claimed,
3774 "only one thread should claim this task at any one time");
3776 // OK, this doesn't safeguard again all possible scenarios, as it is
3777 // possible for two threads to set the _claimed flag at the same
3778 // time. But it is only for debugging purposes anyway and it will
3779 // catch most problems.
3780 _claimed = true;
3782 _start_time_ms = os::elapsedVTime() * 1000.0;
3783 statsOnly( _interval_start_time_ms = _start_time_ms );
3785 double diff_prediction_ms =
3786 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3787 _time_target_ms = time_target_ms - diff_prediction_ms;
3789 // set up the variables that are used in the work-based scheme to
3790 // call the regular clock method
3791 _words_scanned = 0;
3792 _refs_reached = 0;
3793 recalculate_limits();
3795 // clear all flags
3796 clear_has_aborted();
3797 _has_aborted_timed_out = false;
3798 _draining_satb_buffers = false;
3800 ++_calls;
3802 if (_cm->verbose_low())
3803 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3804 "target = %1.2lfms >>>>>>>>>>",
3805 _task_id, _calls, _time_target_ms);
3807 // Set up the bitmap and oop closures. Anything that uses them is
3808 // eventually called from this method, so it is OK to allocate these
3809 // statically.
3810 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3811 CMOopClosure oop_closure(_g1h, _cm, this);
3812 set_oop_closure(&oop_closure);
3814 if (_cm->has_overflown()) {
3815 // This can happen if the region stack or the mark stack overflows
3816 // during a GC pause and this task, after a yield point,
3817 // restarts. We have to abort as we need to get into the overflow
3818 // protocol which happens right at the end of this task.
3819 set_has_aborted();
3820 }
3822 // First drain any available SATB buffers. After this, we will not
3823 // look at SATB buffers before the next invocation of this method.
3824 // If enough completed SATB buffers are queued up, the regular clock
3825 // will abort this task so that it restarts.
3826 drain_satb_buffers();
3827 // ...then partially drain the local queue and the global stack
3828 drain_local_queue(true);
3829 drain_global_stack(true);
3831 // Then totally drain the region stack. We will not look at
3832 // it again before the next invocation of this method. Entries on
3833 // the region stack are only added during evacuation pauses, for
3834 // which we have to yield. When we do, we abort the task anyway so
3835 // it will look at the region stack again when it restarts.
3836 bitmap_closure.set_scanning_heap_region(false);
3837 drain_region_stack(&bitmap_closure);
3838 // ...then partially drain the local queue and the global stack
3839 drain_local_queue(true);
3840 drain_global_stack(true);
3842 do {
3843 if (!has_aborted() && _curr_region != NULL) {
3844 // This means that we're already holding on to a region.
3845 assert(_finger != NULL, "if region is not NULL, then the finger "
3846 "should not be NULL either");
3848 // We might have restarted this task after an evacuation pause
3849 // which might have evacuated the region we're holding on to
3850 // underneath our feet. Let's read its limit again to make sure
3851 // that we do not iterate over a region of the heap that
3852 // contains garbage (update_region_limit() will also move
3853 // _finger to the start of the region if it is found empty).
3854 update_region_limit();
3855 // We will start from _finger not from the start of the region,
3856 // as we might be restarting this task after aborting half-way
3857 // through scanning this region. In this case, _finger points to
3858 // the address where we last found a marked object. If this is a
3859 // fresh region, _finger points to start().
3860 MemRegion mr = MemRegion(_finger, _region_limit);
3862 if (_cm->verbose_low())
3863 gclog_or_tty->print_cr("[%d] we're scanning part "
3864 "["PTR_FORMAT", "PTR_FORMAT") "
3865 "of region "PTR_FORMAT,
3866 _task_id, _finger, _region_limit, _curr_region);
3868 // Let's iterate over the bitmap of the part of the
3869 // region that is left.
3870 bitmap_closure.set_scanning_heap_region(true);
3871 if (mr.is_empty() ||
3872 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3873 // We successfully completed iterating over the region. Now,
3874 // let's give up the region.
3875 giveup_current_region();
3876 regular_clock_call();
3877 } else {
3878 assert(has_aborted(), "currently the only way to do so");
3879 // The only way to abort the bitmap iteration is to return
3880 // false from the do_bit() method. However, inside the
3881 // do_bit() method we move the _finger to point to the
3882 // object currently being looked at. So, if we bail out, we
3883 // have definitely set _finger to something non-null.
3884 assert(_finger != NULL, "invariant");
3886 // Region iteration was actually aborted. So now _finger
3887 // points to the address of the object we last scanned. If we
3888 // leave it there, when we restart this task, we will rescan
3889 // the object. It is easy to avoid this. We move the finger by
3890 // enough to point to the next possible object header (the
3891 // bitmap knows by how much we need to move it as it knows its
3892 // granularity).
3893 assert(_finger < _region_limit, "invariant");
3894 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3895 // Check if bitmap iteration was aborted while scanning the last object
3896 if (new_finger >= _region_limit) {
3897 giveup_current_region();
3898 } else {
3899 move_finger_to(new_finger);
3900 }
3901 }
3902 }
3903 // At this point we have either completed iterating over the
3904 // region we were holding on to, or we have aborted.
3906 // We then partially drain the local queue and the global stack.
3907 // (Do we really need this?)
3908 drain_local_queue(true);
3909 drain_global_stack(true);
3911 // Read the note on the claim_region() method on why it might
3912 // return NULL with potentially more regions available for
3913 // claiming and why we have to check out_of_regions() to determine
3914 // whether we're done or not.
3915 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3916 // We are going to try to claim a new region. We should have
3917 // given up on the previous one.
3918 // Separated the asserts so that we know which one fires.
3919 assert(_curr_region == NULL, "invariant");
3920 assert(_finger == NULL, "invariant");
3921 assert(_region_limit == NULL, "invariant");
3922 if (_cm->verbose_low())
3923 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3924 HeapRegion* claimed_region = _cm->claim_region(_task_id);
3925 if (claimed_region != NULL) {
3926 // Yes, we managed to claim one
3927 statsOnly( ++_regions_claimed );
3929 if (_cm->verbose_low())
3930 gclog_or_tty->print_cr("[%d] we successfully claimed "
3931 "region "PTR_FORMAT,
3932 _task_id, claimed_region);
3934 setup_for_region(claimed_region);
3935 assert(_curr_region == claimed_region, "invariant");
3936 }
3937 // It is important to call the regular clock here. It might take
3938 // a while to claim a region if, for example, we hit a large
3939 // block of empty regions. So we need to call the regular clock
3940 // method once round the loop to make sure it's called
3941 // frequently enough.
3942 regular_clock_call();
3943 }
3945 if (!has_aborted() && _curr_region == NULL) {
3946 assert(_cm->out_of_regions(),
3947 "at this point we should be out of regions");
3948 }
3949 } while ( _curr_region != NULL && !has_aborted());
3951 if (!has_aborted()) {
3952 // We cannot check whether the global stack is empty, since other
3953 // tasks might be pushing objects to it concurrently. We also cannot
3954 // check if the region stack is empty because if a thread is aborting
3955 // it can push a partially done region back.
3956 assert(_cm->out_of_regions(),
3957 "at this point we should be out of regions");
3959 if (_cm->verbose_low())
3960 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3962 // Try to reduce the number of available SATB buffers so that
3963 // remark has less work to do.
3964 drain_satb_buffers();
3965 }
3967 // Since we've done everything else, we can now totally drain the
3968 // local queue and global stack.
3969 drain_local_queue(false);
3970 drain_global_stack(false);
3972 // Attempt at work stealing from other task's queues.
3973 if (!has_aborted()) {
3974 // We have not aborted. This means that we have finished all that
3975 // we could. Let's try to do some stealing...
3977 // We cannot check whether the global stack is empty, since other
3978 // tasks might be pushing objects to it concurrently. We also cannot
3979 // check if the region stack is empty because if a thread is aborting
3980 // it can push a partially done region back.
3981 assert(_cm->out_of_regions() && _task_queue->size() == 0,
3982 "only way to reach here");
3984 if (_cm->verbose_low())
3985 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3987 while (!has_aborted()) {
3988 oop obj;
3989 statsOnly( ++_steal_attempts );
3991 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3992 if (_cm->verbose_medium())
3993 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3994 _task_id, (void*) obj);
3996 statsOnly( ++_steals );
3998 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3999 "any stolen object should be marked");
4000 scan_object(obj);
4002 // And since we're towards the end, let's totally drain the
4003 // local queue and global stack.
4004 drain_local_queue(false);
4005 drain_global_stack(false);
4006 } else {
4007 break;
4008 }
4009 }
4010 }
4012 // We still haven't aborted. Now, let's try to get into the
4013 // termination protocol.
4014 if (!has_aborted()) {
4015 // We cannot check whether the global stack is empty, since other
4016 // tasks might be concurrently pushing objects on it. We also cannot
4017 // check if the region stack is empty because if a thread is aborting
4018 // it can push a partially done region back.
4019 // Separated the asserts so that we know which one fires.
4020 assert(_cm->out_of_regions(), "only way to reach here");
4021 assert(_task_queue->size() == 0, "only way to reach here");
4023 if (_cm->verbose_low())
4024 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4026 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4027 // The CMTask class also extends the TerminatorTerminator class,
4028 // hence its should_exit_termination() method will also decide
4029 // whether to exit the termination protocol or not.
4030 bool finished = _cm->terminator()->offer_termination(this);
4031 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4032 _termination_time_ms +=
4033 termination_end_time_ms - _termination_start_time_ms;
4035 if (finished) {
4036 // We're all done.
4038 if (_task_id == 0) {
4039 // let's allow task 0 to do this
4040 if (concurrent()) {
4041 assert(_cm->concurrent_marking_in_progress(), "invariant");
4042 // we need to set this to false before the next
4043 // safepoint. This way we ensure that the marking phase
4044 // doesn't observe any more heap expansions.
4045 _cm->clear_concurrent_marking_in_progress();
4046 }
4047 }
4049 // We can now guarantee that the global stack is empty, since
4050 // all other tasks have finished. We separated the guarantees so
4051 // that, if a condition is false, we can immediately find out
4052 // which one.
4053 guarantee(_cm->out_of_regions(), "only way to reach here");
4054 guarantee(_aborted_region.is_empty(), "only way to reach here");
4055 guarantee(_cm->region_stack_empty(), "only way to reach here");
4056 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4057 guarantee(_task_queue->size() == 0, "only way to reach here");
4058 guarantee(!_cm->has_overflown(), "only way to reach here");
4059 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4060 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4062 if (_cm->verbose_low())
4063 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4064 } else {
4065 // Apparently there's more work to do. Let's abort this task. It
4066 // will restart it and we can hopefully find more things to do.
4068 if (_cm->verbose_low())
4069 gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4071 set_has_aborted();
4072 statsOnly( ++_aborted_termination );
4073 }
4074 }
4076 // Mainly for debugging purposes to make sure that a pointer to the
4077 // closure which was statically allocated in this frame doesn't
4078 // escape it by accident.
4079 set_oop_closure(NULL);
4080 double end_time_ms = os::elapsedVTime() * 1000.0;
4081 double elapsed_time_ms = end_time_ms - _start_time_ms;
4082 // Update the step history.
4083 _step_times_ms.add(elapsed_time_ms);
4085 if (has_aborted()) {
4086 // The task was aborted for some reason.
4088 statsOnly( ++_aborted );
4090 if (_has_aborted_timed_out) {
4091 double diff_ms = elapsed_time_ms - _time_target_ms;
4092 // Keep statistics of how well we did with respect to hitting
4093 // our target only if we actually timed out (if we aborted for
4094 // other reasons, then the results might get skewed).
4095 _marking_step_diffs_ms.add(diff_ms);
4096 }
4098 if (_cm->has_overflown()) {
4099 // This is the interesting one. We aborted because a global
4100 // overflow was raised. This means we have to restart the
4101 // marking phase and start iterating over regions. However, in
4102 // order to do this we have to make sure that all tasks stop
4103 // what they are doing and re-initialise in a safe manner. We
4104 // will achieve this with the use of two barrier sync points.
4106 if (_cm->verbose_low())
4107 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4109 _cm->enter_first_sync_barrier(_task_id);
4110 // When we exit this sync barrier we know that all tasks have
4111 // stopped doing marking work. So, it's now safe to
4112 // re-initialise our data structures. At the end of this method,
4113 // task 0 will clear the global data structures.
4115 statsOnly( ++_aborted_overflow );
4117 // We clear the local state of this task...
4118 clear_region_fields();
4120 // ...and enter the second barrier.
4121 _cm->enter_second_sync_barrier(_task_id);
4122 // At this point everything has bee re-initialised and we're
4123 // ready to restart.
4124 }
4126 if (_cm->verbose_low()) {
4127 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4128 "elapsed = %1.2lfms <<<<<<<<<<",
4129 _task_id, _time_target_ms, elapsed_time_ms);
4130 if (_cm->has_aborted())
4131 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4132 _task_id);
4133 }
4134 } else {
4135 if (_cm->verbose_low())
4136 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4137 "elapsed = %1.2lfms <<<<<<<<<<",
4138 _task_id, _time_target_ms, elapsed_time_ms);
4139 }
4141 _claimed = false;
4142 }
4144 CMTask::CMTask(int task_id,
4145 ConcurrentMark* cm,
4146 CMTaskQueue* task_queue,
4147 CMTaskQueueSet* task_queues)
4148 : _g1h(G1CollectedHeap::heap()),
4149 _task_id(task_id), _cm(cm),
4150 _claimed(false),
4151 _nextMarkBitMap(NULL), _hash_seed(17),
4152 _task_queue(task_queue),
4153 _task_queues(task_queues),
4154 _oop_closure(NULL),
4155 _aborted_region(MemRegion()) {
4156 guarantee(task_queue != NULL, "invariant");
4157 guarantee(task_queues != NULL, "invariant");
4159 statsOnly( _clock_due_to_scanning = 0;
4160 _clock_due_to_marking = 0 );
4162 _marking_step_diffs_ms.add(0.5);
4163 }