Thu, 20 Oct 2011 12:06:20 -0700
7099824: G1: we should take the pending list lock before doing the remark pause
Summary: Acquire the pending list lock in the prologue method of G1's concurrent VM_Operation and release the lock in the epilogue() method. The locking/unlocking order of the pending list lock and the Heap_lock should match that in the prologue and epilogue methods of VM_GC_Operation.
Reviewed-by: tonyp, ysr
1 /*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
25 #include "precompiled.hpp"
26 #include "classfile/symbolTable.hpp"
27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
33 #include "gc_implementation/g1/g1RemSet.hpp"
34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
36 #include "gc_implementation/shared/vmGCOperations.hpp"
37 #include "memory/genOopClosures.inline.hpp"
38 #include "memory/referencePolicy.hpp"
39 #include "memory/resourceArea.hpp"
40 #include "oops/oop.inline.hpp"
41 #include "runtime/handles.inline.hpp"
42 #include "runtime/java.hpp"
44 //
45 // CMS Bit Map Wrapper
47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
48 _bm((uintptr_t*)NULL,0),
49 _shifter(shifter) {
50 _bmStartWord = (HeapWord*)(rs.base());
51 _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes
52 ReservedSpace brs(ReservedSpace::allocation_align_size_up(
53 (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
55 guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
56 // For now we'll just commit all of the bit map up fromt.
57 // Later on we'll try to be more parsimonious with swap.
58 guarantee(_virtual_space.initialize(brs, brs.size()),
59 "couldn't reseve backing store for CMS bit map");
60 assert(_virtual_space.committed_size() == brs.size(),
61 "didn't reserve backing store for all of CMS bit map?");
62 _bm.set_map((uintptr_t*)_virtual_space.low());
63 assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
64 _bmWordSize, "inconsistency in bit map sizing");
65 _bm.set_size(_bmWordSize >> _shifter);
66 }
68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
69 HeapWord* limit) const {
70 // First we must round addr *up* to a possible object boundary.
71 addr = (HeapWord*)align_size_up((intptr_t)addr,
72 HeapWordSize << _shifter);
73 size_t addrOffset = heapWordToOffset(addr);
74 if (limit == NULL) {
75 limit = _bmStartWord + _bmWordSize;
76 }
77 size_t limitOffset = heapWordToOffset(limit);
78 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
79 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
80 assert(nextAddr >= addr, "get_next_one postcondition");
81 assert(nextAddr == limit || isMarked(nextAddr),
82 "get_next_one postcondition");
83 return nextAddr;
84 }
86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
87 HeapWord* limit) const {
88 size_t addrOffset = heapWordToOffset(addr);
89 if (limit == NULL) {
90 limit = _bmStartWord + _bmWordSize;
91 }
92 size_t limitOffset = heapWordToOffset(limit);
93 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
94 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
95 assert(nextAddr >= addr, "get_next_one postcondition");
96 assert(nextAddr == limit || !isMarked(nextAddr),
97 "get_next_one postcondition");
98 return nextAddr;
99 }
101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
102 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
103 return (int) (diff >> _shifter);
104 }
106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
107 HeapWord* left = MAX2(_bmStartWord, mr.start());
108 HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
109 if (right > left) {
110 // Right-open interval [leftOffset, rightOffset).
111 return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
112 } else {
113 return true;
114 }
115 }
117 void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap,
118 size_t from_start_index,
119 HeapWord* to_start_word,
120 size_t word_num) {
121 _bm.mostly_disjoint_range_union(from_bitmap,
122 from_start_index,
123 heapWordToOffset(to_start_word),
124 word_num);
125 }
127 #ifndef PRODUCT
128 bool CMBitMapRO::covers(ReservedSpace rs) const {
129 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
130 assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
131 "size inconsistency");
132 return _bmStartWord == (HeapWord*)(rs.base()) &&
133 _bmWordSize == rs.size()>>LogHeapWordSize;
134 }
135 #endif
137 void CMBitMap::clearAll() {
138 _bm.clear();
139 return;
140 }
142 void CMBitMap::markRange(MemRegion mr) {
143 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
144 assert(!mr.is_empty(), "unexpected empty region");
145 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
146 ((HeapWord *) mr.end())),
147 "markRange memory region end is not card aligned");
148 // convert address range into offset range
149 _bm.at_put_range(heapWordToOffset(mr.start()),
150 heapWordToOffset(mr.end()), true);
151 }
153 void CMBitMap::clearRange(MemRegion mr) {
154 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
155 assert(!mr.is_empty(), "unexpected empty region");
156 // convert address range into offset range
157 _bm.at_put_range(heapWordToOffset(mr.start()),
158 heapWordToOffset(mr.end()), false);
159 }
161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
162 HeapWord* end_addr) {
163 HeapWord* start = getNextMarkedWordAddress(addr);
164 start = MIN2(start, end_addr);
165 HeapWord* end = getNextUnmarkedWordAddress(start);
166 end = MIN2(end, end_addr);
167 assert(start <= end, "Consistency check");
168 MemRegion mr(start, end);
169 if (!mr.is_empty()) {
170 clearRange(mr);
171 }
172 return mr;
173 }
175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
176 _base(NULL), _cm(cm)
177 #ifdef ASSERT
178 , _drain_in_progress(false)
179 , _drain_in_progress_yields(false)
180 #endif
181 {}
183 void CMMarkStack::allocate(size_t size) {
184 _base = NEW_C_HEAP_ARRAY(oop, size);
185 if (_base == NULL) {
186 vm_exit_during_initialization("Failed to allocate "
187 "CM region mark stack");
188 }
189 _index = 0;
190 _capacity = (jint) size;
191 _oops_do_bound = -1;
192 NOT_PRODUCT(_max_depth = 0);
193 }
195 CMMarkStack::~CMMarkStack() {
196 if (_base != NULL) {
197 FREE_C_HEAP_ARRAY(oop, _base);
198 }
199 }
201 void CMMarkStack::par_push(oop ptr) {
202 while (true) {
203 if (isFull()) {
204 _overflow = true;
205 return;
206 }
207 // Otherwise...
208 jint index = _index;
209 jint next_index = index+1;
210 jint res = Atomic::cmpxchg(next_index, &_index, index);
211 if (res == index) {
212 _base[index] = ptr;
213 // Note that we don't maintain this atomically. We could, but it
214 // doesn't seem necessary.
215 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
216 return;
217 }
218 // Otherwise, we need to try again.
219 }
220 }
222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
223 while (true) {
224 if (isFull()) {
225 _overflow = true;
226 return;
227 }
228 // Otherwise...
229 jint index = _index;
230 jint next_index = index + n;
231 if (next_index > _capacity) {
232 _overflow = true;
233 return;
234 }
235 jint res = Atomic::cmpxchg(next_index, &_index, index);
236 if (res == index) {
237 for (int i = 0; i < n; i++) {
238 int ind = index + i;
239 assert(ind < _capacity, "By overflow test above.");
240 _base[ind] = ptr_arr[i];
241 }
242 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
243 return;
244 }
245 // Otherwise, we need to try again.
246 }
247 }
250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
251 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
252 jint start = _index;
253 jint next_index = start + n;
254 if (next_index > _capacity) {
255 _overflow = true;
256 return;
257 }
258 // Otherwise.
259 _index = next_index;
260 for (int i = 0; i < n; i++) {
261 int ind = start + i;
262 assert(ind < _capacity, "By overflow test above.");
263 _base[ind] = ptr_arr[i];
264 }
265 }
268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
269 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
270 jint index = _index;
271 if (index == 0) {
272 *n = 0;
273 return false;
274 } else {
275 int k = MIN2(max, index);
276 jint new_ind = index - k;
277 for (int j = 0; j < k; j++) {
278 ptr_arr[j] = _base[new_ind + j];
279 }
280 _index = new_ind;
281 *n = k;
282 return true;
283 }
284 }
287 CMRegionStack::CMRegionStack() : _base(NULL) {}
289 void CMRegionStack::allocate(size_t size) {
290 _base = NEW_C_HEAP_ARRAY(MemRegion, size);
291 if (_base == NULL) {
292 vm_exit_during_initialization("Failed to allocate CM region mark stack");
293 }
294 _index = 0;
295 _capacity = (jint) size;
296 }
298 CMRegionStack::~CMRegionStack() {
299 if (_base != NULL) {
300 FREE_C_HEAP_ARRAY(oop, _base);
301 }
302 }
304 void CMRegionStack::push_lock_free(MemRegion mr) {
305 assert(mr.word_size() > 0, "Precondition");
306 while (true) {
307 jint index = _index;
309 if (index >= _capacity) {
310 _overflow = true;
311 return;
312 }
313 // Otherwise...
314 jint next_index = index+1;
315 jint res = Atomic::cmpxchg(next_index, &_index, index);
316 if (res == index) {
317 _base[index] = mr;
318 return;
319 }
320 // Otherwise, we need to try again.
321 }
322 }
324 // Lock-free pop of the region stack. Called during the concurrent
325 // marking / remark phases. Should only be called in tandem with
326 // other lock-free pops.
327 MemRegion CMRegionStack::pop_lock_free() {
328 while (true) {
329 jint index = _index;
331 if (index == 0) {
332 return MemRegion();
333 }
334 // Otherwise...
335 jint next_index = index-1;
336 jint res = Atomic::cmpxchg(next_index, &_index, index);
337 if (res == index) {
338 MemRegion mr = _base[next_index];
339 if (mr.start() != NULL) {
340 assert(mr.end() != NULL, "invariant");
341 assert(mr.word_size() > 0, "invariant");
342 return mr;
343 } else {
344 // that entry was invalidated... let's skip it
345 assert(mr.end() == NULL, "invariant");
346 }
347 }
348 // Otherwise, we need to try again.
349 }
350 }
352 #if 0
353 // The routines that manipulate the region stack with a lock are
354 // not currently used. They should be retained, however, as a
355 // diagnostic aid.
357 void CMRegionStack::push_with_lock(MemRegion mr) {
358 assert(mr.word_size() > 0, "Precondition");
359 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
361 if (isFull()) {
362 _overflow = true;
363 return;
364 }
366 _base[_index] = mr;
367 _index += 1;
368 }
370 MemRegion CMRegionStack::pop_with_lock() {
371 MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
373 while (true) {
374 if (_index == 0) {
375 return MemRegion();
376 }
377 _index -= 1;
379 MemRegion mr = _base[_index];
380 if (mr.start() != NULL) {
381 assert(mr.end() != NULL, "invariant");
382 assert(mr.word_size() > 0, "invariant");
383 return mr;
384 } else {
385 // that entry was invalidated... let's skip it
386 assert(mr.end() == NULL, "invariant");
387 }
388 }
389 }
390 #endif
392 bool CMRegionStack::invalidate_entries_into_cset() {
393 bool result = false;
394 G1CollectedHeap* g1h = G1CollectedHeap::heap();
395 for (int i = 0; i < _oops_do_bound; ++i) {
396 MemRegion mr = _base[i];
397 if (mr.start() != NULL) {
398 assert(mr.end() != NULL, "invariant");
399 assert(mr.word_size() > 0, "invariant");
400 HeapRegion* hr = g1h->heap_region_containing(mr.start());
401 assert(hr != NULL, "invariant");
402 if (hr->in_collection_set()) {
403 // The region points into the collection set
404 _base[i] = MemRegion();
405 result = true;
406 }
407 } else {
408 // that entry was invalidated... let's skip it
409 assert(mr.end() == NULL, "invariant");
410 }
411 }
412 return result;
413 }
415 template<class OopClosureClass>
416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
417 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
418 || SafepointSynchronize::is_at_safepoint(),
419 "Drain recursion must be yield-safe.");
420 bool res = true;
421 debug_only(_drain_in_progress = true);
422 debug_only(_drain_in_progress_yields = yield_after);
423 while (!isEmpty()) {
424 oop newOop = pop();
425 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
426 assert(newOop->is_oop(), "Expected an oop");
427 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
428 "only grey objects on this stack");
429 // iterate over the oops in this oop, marking and pushing
430 // the ones in CMS generation.
431 newOop->oop_iterate(cl);
432 if (yield_after && _cm->do_yield_check()) {
433 res = false;
434 break;
435 }
436 }
437 debug_only(_drain_in_progress = false);
438 return res;
439 }
441 void CMMarkStack::oops_do(OopClosure* f) {
442 if (_index == 0) return;
443 assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
444 "Bound must be set.");
445 for (int i = 0; i < _oops_do_bound; i++) {
446 f->do_oop(&_base[i]);
447 }
448 _oops_do_bound = -1;
449 }
451 bool ConcurrentMark::not_yet_marked(oop obj) const {
452 return (_g1h->is_obj_ill(obj)
453 || (_g1h->is_in_permanent(obj)
454 && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
455 }
457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
459 #endif // _MSC_VER
461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
462 int max_regions) :
463 _markBitMap1(rs, MinObjAlignment - 1),
464 _markBitMap2(rs, MinObjAlignment - 1),
466 _parallel_marking_threads(0),
467 _sleep_factor(0.0),
468 _marking_task_overhead(1.0),
469 _cleanup_sleep_factor(0.0),
470 _cleanup_task_overhead(1.0),
471 _cleanup_list("Cleanup List"),
472 _region_bm(max_regions, false /* in_resource_area*/),
473 _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
474 CardTableModRefBS::card_shift,
475 false /* in_resource_area*/),
476 _prevMarkBitMap(&_markBitMap1),
477 _nextMarkBitMap(&_markBitMap2),
478 _at_least_one_mark_complete(false),
480 _markStack(this),
481 _regionStack(),
482 // _finger set in set_non_marking_state
484 _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
485 // _active_tasks set in set_non_marking_state
486 // _tasks set inside the constructor
487 _task_queues(new CMTaskQueueSet((int) _max_task_num)),
488 _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
490 _has_overflown(false),
491 _concurrent(false),
492 _has_aborted(false),
493 _restart_for_overflow(false),
494 _concurrent_marking_in_progress(false),
495 _should_gray_objects(false),
497 // _verbose_level set below
499 _init_times(),
500 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
501 _cleanup_times(),
502 _total_counting_time(0.0),
503 _total_rs_scrub_time(0.0),
505 _parallel_workers(NULL) {
506 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
507 if (verbose_level < no_verbose) {
508 verbose_level = no_verbose;
509 }
510 if (verbose_level > high_verbose) {
511 verbose_level = high_verbose;
512 }
513 _verbose_level = verbose_level;
515 if (verbose_low()) {
516 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
517 "heap end = "PTR_FORMAT, _heap_start, _heap_end);
518 }
520 _markStack.allocate(MarkStackSize);
521 _regionStack.allocate(G1MarkRegionStackSize);
523 // Create & start a ConcurrentMark thread.
524 _cmThread = new ConcurrentMarkThread(this);
525 assert(cmThread() != NULL, "CM Thread should have been created");
526 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
528 _g1h = G1CollectedHeap::heap();
529 assert(CGC_lock != NULL, "Where's the CGC_lock?");
530 assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
531 assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
533 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
534 satb_qs.set_buffer_size(G1SATBBufferSize);
536 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
537 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
539 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
540 _active_tasks = _max_task_num;
541 for (int i = 0; i < (int) _max_task_num; ++i) {
542 CMTaskQueue* task_queue = new CMTaskQueue();
543 task_queue->initialize();
544 _task_queues->register_queue(i, task_queue);
546 _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
547 _accum_task_vtime[i] = 0.0;
548 }
550 if (ConcGCThreads > ParallelGCThreads) {
551 vm_exit_during_initialization("Can't have more ConcGCThreads "
552 "than ParallelGCThreads.");
553 }
554 if (ParallelGCThreads == 0) {
555 // if we are not running with any parallel GC threads we will not
556 // spawn any marking threads either
557 _parallel_marking_threads = 0;
558 _sleep_factor = 0.0;
559 _marking_task_overhead = 1.0;
560 } else {
561 if (ConcGCThreads > 0) {
562 // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
563 // if both are set
565 _parallel_marking_threads = ConcGCThreads;
566 _sleep_factor = 0.0;
567 _marking_task_overhead = 1.0;
568 } else if (G1MarkingOverheadPercent > 0) {
569 // we will calculate the number of parallel marking threads
570 // based on a target overhead with respect to the soft real-time
571 // goal
573 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
574 double overall_cm_overhead =
575 (double) MaxGCPauseMillis * marking_overhead /
576 (double) GCPauseIntervalMillis;
577 double cpu_ratio = 1.0 / (double) os::processor_count();
578 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
579 double marking_task_overhead =
580 overall_cm_overhead / marking_thread_num *
581 (double) os::processor_count();
582 double sleep_factor =
583 (1.0 - marking_task_overhead) / marking_task_overhead;
585 _parallel_marking_threads = (size_t) marking_thread_num;
586 _sleep_factor = sleep_factor;
587 _marking_task_overhead = marking_task_overhead;
588 } else {
589 _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
590 _sleep_factor = 0.0;
591 _marking_task_overhead = 1.0;
592 }
594 if (parallel_marking_threads() > 1) {
595 _cleanup_task_overhead = 1.0;
596 } else {
597 _cleanup_task_overhead = marking_task_overhead();
598 }
599 _cleanup_sleep_factor =
600 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
602 #if 0
603 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
604 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
605 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
606 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
607 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
608 #endif
610 guarantee(parallel_marking_threads() > 0, "peace of mind");
611 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
612 (int) _parallel_marking_threads, false, true);
613 if (_parallel_workers == NULL) {
614 vm_exit_during_initialization("Failed necessary allocation.");
615 } else {
616 _parallel_workers->initialize_workers();
617 }
618 }
620 // so that the call below can read a sensible value
621 _heap_start = (HeapWord*) rs.base();
622 set_non_marking_state();
623 }
625 void ConcurrentMark::update_g1_committed(bool force) {
626 // If concurrent marking is not in progress, then we do not need to
627 // update _heap_end. This has a subtle and important
628 // side-effect. Imagine that two evacuation pauses happen between
629 // marking completion and remark. The first one can grow the
630 // heap (hence now the finger is below the heap end). Then, the
631 // second one could unnecessarily push regions on the region
632 // stack. This causes the invariant that the region stack is empty
633 // at the beginning of remark to be false. By ensuring that we do
634 // not observe heap expansions after marking is complete, then we do
635 // not have this problem.
636 if (!concurrent_marking_in_progress() && !force) return;
638 MemRegion committed = _g1h->g1_committed();
639 assert(committed.start() == _heap_start, "start shouldn't change");
640 HeapWord* new_end = committed.end();
641 if (new_end > _heap_end) {
642 // The heap has been expanded.
644 _heap_end = new_end;
645 }
646 // Notice that the heap can also shrink. However, this only happens
647 // during a Full GC (at least currently) and the entire marking
648 // phase will bail out and the task will not be restarted. So, let's
649 // do nothing.
650 }
652 void ConcurrentMark::reset() {
653 // Starting values for these two. This should be called in a STW
654 // phase. CM will be notified of any future g1_committed expansions
655 // will be at the end of evacuation pauses, when tasks are
656 // inactive.
657 MemRegion committed = _g1h->g1_committed();
658 _heap_start = committed.start();
659 _heap_end = committed.end();
661 // Separated the asserts so that we know which one fires.
662 assert(_heap_start != NULL, "heap bounds should look ok");
663 assert(_heap_end != NULL, "heap bounds should look ok");
664 assert(_heap_start < _heap_end, "heap bounds should look ok");
666 // reset all the marking data structures and any necessary flags
667 clear_marking_state();
669 if (verbose_low()) {
670 gclog_or_tty->print_cr("[global] resetting");
671 }
673 // We do reset all of them, since different phases will use
674 // different number of active threads. So, it's easiest to have all
675 // of them ready.
676 for (int i = 0; i < (int) _max_task_num; ++i) {
677 _tasks[i]->reset(_nextMarkBitMap);
678 }
680 // we need this to make sure that the flag is on during the evac
681 // pause with initial mark piggy-backed
682 set_concurrent_marking_in_progress();
683 }
685 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
686 assert(active_tasks <= _max_task_num, "we should not have more");
688 _active_tasks = active_tasks;
689 // Need to update the three data structures below according to the
690 // number of active threads for this phase.
691 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
692 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
693 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
695 _concurrent = concurrent;
696 // We propagate this to all tasks, not just the active ones.
697 for (int i = 0; i < (int) _max_task_num; ++i)
698 _tasks[i]->set_concurrent(concurrent);
700 if (concurrent) {
701 set_concurrent_marking_in_progress();
702 } else {
703 // We currently assume that the concurrent flag has been set to
704 // false before we start remark. At this point we should also be
705 // in a STW phase.
706 assert(!concurrent_marking_in_progress(), "invariant");
707 assert(_finger == _heap_end, "only way to get here");
708 update_g1_committed(true);
709 }
710 }
712 void ConcurrentMark::set_non_marking_state() {
713 // We set the global marking state to some default values when we're
714 // not doing marking.
715 clear_marking_state();
716 _active_tasks = 0;
717 clear_concurrent_marking_in_progress();
718 }
720 ConcurrentMark::~ConcurrentMark() {
721 for (int i = 0; i < (int) _max_task_num; ++i) {
722 delete _task_queues->queue(i);
723 delete _tasks[i];
724 }
725 delete _task_queues;
726 FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
727 }
729 // This closure is used to mark refs into the g1 generation
730 // from external roots in the CMS bit map.
731 // Called at the first checkpoint.
732 //
734 void ConcurrentMark::clearNextBitmap() {
735 G1CollectedHeap* g1h = G1CollectedHeap::heap();
736 G1CollectorPolicy* g1p = g1h->g1_policy();
738 // Make sure that the concurrent mark thread looks to still be in
739 // the current cycle.
740 guarantee(cmThread()->during_cycle(), "invariant");
742 // We are finishing up the current cycle by clearing the next
743 // marking bitmap and getting it ready for the next cycle. During
744 // this time no other cycle can start. So, let's make sure that this
745 // is the case.
746 guarantee(!g1h->mark_in_progress(), "invariant");
748 // clear the mark bitmap (no grey objects to start with).
749 // We need to do this in chunks and offer to yield in between
750 // each chunk.
751 HeapWord* start = _nextMarkBitMap->startWord();
752 HeapWord* end = _nextMarkBitMap->endWord();
753 HeapWord* cur = start;
754 size_t chunkSize = M;
755 while (cur < end) {
756 HeapWord* next = cur + chunkSize;
757 if (next > end) {
758 next = end;
759 }
760 MemRegion mr(cur,next);
761 _nextMarkBitMap->clearRange(mr);
762 cur = next;
763 do_yield_check();
765 // Repeat the asserts from above. We'll do them as asserts here to
766 // minimize their overhead on the product. However, we'll have
767 // them as guarantees at the beginning / end of the bitmap
768 // clearing to get some checking in the product.
769 assert(cmThread()->during_cycle(), "invariant");
770 assert(!g1h->mark_in_progress(), "invariant");
771 }
773 // Repeat the asserts from above.
774 guarantee(cmThread()->during_cycle(), "invariant");
775 guarantee(!g1h->mark_in_progress(), "invariant");
776 }
778 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
779 public:
780 bool doHeapRegion(HeapRegion* r) {
781 if (!r->continuesHumongous()) {
782 r->note_start_of_marking(true);
783 }
784 return false;
785 }
786 };
788 void ConcurrentMark::checkpointRootsInitialPre() {
789 G1CollectedHeap* g1h = G1CollectedHeap::heap();
790 G1CollectorPolicy* g1p = g1h->g1_policy();
792 _has_aborted = false;
794 #ifndef PRODUCT
795 if (G1PrintReachableAtInitialMark) {
796 print_reachable("at-cycle-start",
797 VerifyOption_G1UsePrevMarking, true /* all */);
798 }
799 #endif
801 // Initialise marking structures. This has to be done in a STW phase.
802 reset();
803 }
806 void ConcurrentMark::checkpointRootsInitialPost() {
807 G1CollectedHeap* g1h = G1CollectedHeap::heap();
809 // If we force an overflow during remark, the remark operation will
810 // actually abort and we'll restart concurrent marking. If we always
811 // force an oveflow during remark we'll never actually complete the
812 // marking phase. So, we initilize this here, at the start of the
813 // cycle, so that at the remaining overflow number will decrease at
814 // every remark and we'll eventually not need to cause one.
815 force_overflow_stw()->init();
817 // For each region note start of marking.
818 NoteStartOfMarkHRClosure startcl;
819 g1h->heap_region_iterate(&startcl);
821 // Start Concurrent Marking weak-reference discovery.
822 ReferenceProcessor* rp = g1h->ref_processor_cm();
823 // enable ("weak") refs discovery
824 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
825 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
827 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
828 // This is the start of the marking cycle, we're expected all
829 // threads to have SATB queues with active set to false.
830 satb_mq_set.set_active_all_threads(true, /* new active value */
831 false /* expected_active */);
833 // update_g1_committed() will be called at the end of an evac pause
834 // when marking is on. So, it's also called at the end of the
835 // initial-mark pause to update the heap end, if the heap expands
836 // during it. No need to call it here.
837 }
839 /*
840 * Notice that in the next two methods, we actually leave the STS
841 * during the barrier sync and join it immediately afterwards. If we
842 * do not do this, the following deadlock can occur: one thread could
843 * be in the barrier sync code, waiting for the other thread to also
844 * sync up, whereas another one could be trying to yield, while also
845 * waiting for the other threads to sync up too.
846 *
847 * Note, however, that this code is also used during remark and in
848 * this case we should not attempt to leave / enter the STS, otherwise
849 * we'll either hit an asseert (debug / fastdebug) or deadlock
850 * (product). So we should only leave / enter the STS if we are
851 * operating concurrently.
852 *
853 * Because the thread that does the sync barrier has left the STS, it
854 * is possible to be suspended for a Full GC or an evacuation pause
855 * could occur. This is actually safe, since the entering the sync
856 * barrier is one of the last things do_marking_step() does, and it
857 * doesn't manipulate any data structures afterwards.
858 */
860 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
861 if (verbose_low()) {
862 gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
863 }
865 if (concurrent()) {
866 ConcurrentGCThread::stsLeave();
867 }
868 _first_overflow_barrier_sync.enter();
869 if (concurrent()) {
870 ConcurrentGCThread::stsJoin();
871 }
872 // at this point everyone should have synced up and not be doing any
873 // more work
875 if (verbose_low()) {
876 gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
877 }
879 // let task 0 do this
880 if (task_num == 0) {
881 // task 0 is responsible for clearing the global data structures
882 // We should be here because of an overflow. During STW we should
883 // not clear the overflow flag since we rely on it being true when
884 // we exit this method to abort the pause and restart concurent
885 // marking.
886 clear_marking_state(concurrent() /* clear_overflow */);
887 force_overflow()->update();
889 if (PrintGC) {
890 gclog_or_tty->date_stamp(PrintGCDateStamps);
891 gclog_or_tty->stamp(PrintGCTimeStamps);
892 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
893 }
894 }
896 // after this, each task should reset its own data structures then
897 // then go into the second barrier
898 }
900 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
901 if (verbose_low()) {
902 gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
903 }
905 if (concurrent()) {
906 ConcurrentGCThread::stsLeave();
907 }
908 _second_overflow_barrier_sync.enter();
909 if (concurrent()) {
910 ConcurrentGCThread::stsJoin();
911 }
912 // at this point everything should be re-initialised and ready to go
914 if (verbose_low()) {
915 gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
916 }
917 }
919 #ifndef PRODUCT
920 void ForceOverflowSettings::init() {
921 _num_remaining = G1ConcMarkForceOverflow;
922 _force = false;
923 update();
924 }
926 void ForceOverflowSettings::update() {
927 if (_num_remaining > 0) {
928 _num_remaining -= 1;
929 _force = true;
930 } else {
931 _force = false;
932 }
933 }
935 bool ForceOverflowSettings::should_force() {
936 if (_force) {
937 _force = false;
938 return true;
939 } else {
940 return false;
941 }
942 }
943 #endif // !PRODUCT
945 void ConcurrentMark::grayRoot(oop p) {
946 HeapWord* addr = (HeapWord*) p;
947 // We can't really check against _heap_start and _heap_end, since it
948 // is possible during an evacuation pause with piggy-backed
949 // initial-mark that the committed space is expanded during the
950 // pause without CM observing this change. So the assertions below
951 // is a bit conservative; but better than nothing.
952 assert(_g1h->g1_committed().contains(addr),
953 "address should be within the heap bounds");
955 if (!_nextMarkBitMap->isMarked(addr)) {
956 _nextMarkBitMap->parMark(addr);
957 }
958 }
960 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
961 // The objects on the region have already been marked "in bulk" by
962 // the caller. We only need to decide whether to push the region on
963 // the region stack or not.
965 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
966 // We're done with marking and waiting for remark. We do not need to
967 // push anything else on the region stack.
968 return;
969 }
971 HeapWord* finger = _finger;
973 if (verbose_low()) {
974 gclog_or_tty->print_cr("[global] attempting to push "
975 "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
976 PTR_FORMAT, mr.start(), mr.end(), finger);
977 }
979 if (mr.start() < finger) {
980 // The finger is always heap region aligned and it is not possible
981 // for mr to span heap regions.
982 assert(mr.end() <= finger, "invariant");
984 // Separated the asserts so that we know which one fires.
985 assert(mr.start() <= mr.end(),
986 "region boundaries should fall within the committed space");
987 assert(_heap_start <= mr.start(),
988 "region boundaries should fall within the committed space");
989 assert(mr.end() <= _heap_end,
990 "region boundaries should fall within the committed space");
991 if (verbose_low()) {
992 gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
993 "below the finger, pushing it",
994 mr.start(), mr.end());
995 }
997 if (!region_stack_push_lock_free(mr)) {
998 if (verbose_low()) {
999 gclog_or_tty->print_cr("[global] region stack has overflown.");
1000 }
1001 }
1002 }
1003 }
1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006 // The object is not marked by the caller. We need to at least mark
1007 // it and maybe push in on the stack.
1009 HeapWord* addr = (HeapWord*)p;
1010 if (!_nextMarkBitMap->isMarked(addr)) {
1011 // We definitely need to mark it, irrespective whether we bail out
1012 // because we're done with marking.
1013 if (_nextMarkBitMap->parMark(addr)) {
1014 if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015 // If we're done with concurrent marking and we're waiting for
1016 // remark, then we're not pushing anything on the stack.
1017 return;
1018 }
1020 // No OrderAccess:store_load() is needed. It is implicit in the
1021 // CAS done in parMark(addr) above
1022 HeapWord* finger = _finger;
1024 if (addr < finger) {
1025 if (!mark_stack_push(oop(addr))) {
1026 if (verbose_low()) {
1027 gclog_or_tty->print_cr("[global] global stack overflow "
1028 "during parMark");
1029 }
1030 }
1031 }
1032 }
1033 }
1034 }
1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 private:
1038 ConcurrentMark* _cm;
1039 ConcurrentMarkThread* _cmt;
1041 public:
1042 void work(int worker_i) {
1043 assert(Thread::current()->is_ConcurrentGC_thread(),
1044 "this should only be done by a conc GC thread");
1045 ResourceMark rm;
1047 double start_vtime = os::elapsedVTime();
1049 ConcurrentGCThread::stsJoin();
1051 assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052 CMTask* the_task = _cm->task(worker_i);
1053 the_task->record_start_time();
1054 if (!_cm->has_aborted()) {
1055 do {
1056 double start_vtime_sec = os::elapsedVTime();
1057 double start_time_sec = os::elapsedTime();
1058 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1060 the_task->do_marking_step(mark_step_duration_ms,
1061 true /* do_stealing */,
1062 true /* do_termination */);
1064 double end_time_sec = os::elapsedTime();
1065 double end_vtime_sec = os::elapsedVTime();
1066 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067 double elapsed_time_sec = end_time_sec - start_time_sec;
1068 _cm->clear_has_overflown();
1070 bool ret = _cm->do_yield_check(worker_i);
1072 jlong sleep_time_ms;
1073 if (!_cm->has_aborted() && the_task->has_aborted()) {
1074 sleep_time_ms =
1075 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076 ConcurrentGCThread::stsLeave();
1077 os::sleep(Thread::current(), sleep_time_ms, false);
1078 ConcurrentGCThread::stsJoin();
1079 }
1080 double end_time2_sec = os::elapsedTime();
1081 double elapsed_time2_sec = end_time2_sec - start_time_sec;
1083 #if 0
1084 gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085 "overhead %1.4lf",
1086 elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087 the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088 gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089 elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 #endif
1091 } while (!_cm->has_aborted() && the_task->has_aborted());
1092 }
1093 the_task->record_end_time();
1094 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1096 ConcurrentGCThread::stsLeave();
1098 double end_vtime = os::elapsedVTime();
1099 _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100 }
1102 CMConcurrentMarkingTask(ConcurrentMark* cm,
1103 ConcurrentMarkThread* cmt) :
1104 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1106 ~CMConcurrentMarkingTask() { }
1107 };
1109 void ConcurrentMark::markFromRoots() {
1110 // we might be tempted to assert that:
1111 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112 // "inconsistent argument?");
1113 // However that wouldn't be right, because it's possible that
1114 // a safepoint is indeed in progress as a younger generation
1115 // stop-the-world GC happens even as we mark in this generation.
1117 _restart_for_overflow = false;
1119 size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120 force_overflow_conc()->init();
1121 set_phase(active_workers, true /* concurrent */);
1123 CMConcurrentMarkingTask markingTask(this, cmThread());
1124 if (parallel_marking_threads() > 0) {
1125 _parallel_workers->run_task(&markingTask);
1126 } else {
1127 markingTask.work(0);
1128 }
1129 print_stats();
1130 }
1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133 // world is stopped at this checkpoint
1134 assert(SafepointSynchronize::is_at_safepoint(),
1135 "world should be stopped");
1137 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1139 // If a full collection has happened, we shouldn't do this.
1140 if (has_aborted()) {
1141 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1142 return;
1143 }
1145 SvcGCMarker sgcm(SvcGCMarker::OTHER);
1147 if (VerifyDuringGC) {
1148 HandleMark hm; // handle scope
1149 gclog_or_tty->print(" VerifyDuringGC:(before)");
1150 Universe::heap()->prepare_for_verify();
1151 Universe::verify(/* allow dirty */ true,
1152 /* silent */ false,
1153 /* option */ VerifyOption_G1UsePrevMarking);
1154 }
1156 G1CollectorPolicy* g1p = g1h->g1_policy();
1157 g1p->record_concurrent_mark_remark_start();
1159 double start = os::elapsedTime();
1161 checkpointRootsFinalWork();
1163 double mark_work_end = os::elapsedTime();
1165 weakRefsWork(clear_all_soft_refs);
1167 if (has_overflown()) {
1168 // Oops. We overflowed. Restart concurrent marking.
1169 _restart_for_overflow = true;
1170 // Clear the flag. We do not need it any more.
1171 clear_has_overflown();
1172 if (G1TraceMarkStackOverflow) {
1173 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1174 }
1175 } else {
1176 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1177 // We're done with marking.
1178 // This is the end of the marking cycle, we're expected all
1179 // threads to have SATB queues with active set to true.
1180 satb_mq_set.set_active_all_threads(false, /* new active value */
1181 true /* expected_active */);
1183 if (VerifyDuringGC) {
1184 HandleMark hm; // handle scope
1185 gclog_or_tty->print(" VerifyDuringGC:(after)");
1186 Universe::heap()->prepare_for_verify();
1187 Universe::verify(/* allow dirty */ true,
1188 /* silent */ false,
1189 /* option */ VerifyOption_G1UseNextMarking);
1190 }
1191 assert(!restart_for_overflow(), "sanity");
1192 }
1194 // Reset the marking state if marking completed
1195 if (!restart_for_overflow()) {
1196 set_non_marking_state();
1197 }
1199 #if VERIFY_OBJS_PROCESSED
1200 _scan_obj_cl.objs_processed = 0;
1201 ThreadLocalObjQueue::objs_enqueued = 0;
1202 #endif
1204 // Statistics
1205 double now = os::elapsedTime();
1206 _remark_mark_times.add((mark_work_end - start) * 1000.0);
1207 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1208 _remark_times.add((now - start) * 1000.0);
1210 g1p->record_concurrent_mark_remark_end();
1211 }
1213 #define CARD_BM_TEST_MODE 0
1215 class CalcLiveObjectsClosure: public HeapRegionClosure {
1217 CMBitMapRO* _bm;
1218 ConcurrentMark* _cm;
1219 bool _changed;
1220 bool _yield;
1221 size_t _words_done;
1222 size_t _tot_live;
1223 size_t _tot_used;
1224 size_t _regions_done;
1225 double _start_vtime_sec;
1227 BitMap* _region_bm;
1228 BitMap* _card_bm;
1229 intptr_t _bottom_card_num;
1230 bool _final;
1232 void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1233 for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1234 #if CARD_BM_TEST_MODE
1235 guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1236 #else
1237 _card_bm->par_at_put(i - _bottom_card_num, 1);
1238 #endif
1239 }
1240 }
1242 public:
1243 CalcLiveObjectsClosure(bool final,
1244 CMBitMapRO *bm, ConcurrentMark *cm,
1245 BitMap* region_bm, BitMap* card_bm) :
1246 _bm(bm), _cm(cm), _changed(false), _yield(true),
1247 _words_done(0), _tot_live(0), _tot_used(0),
1248 _region_bm(region_bm), _card_bm(card_bm),_final(final),
1249 _regions_done(0), _start_vtime_sec(0.0)
1250 {
1251 _bottom_card_num =
1252 intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1253 CardTableModRefBS::card_shift);
1254 }
1256 // It takes a region that's not empty (i.e., it has at least one
1257 // live object in it and sets its corresponding bit on the region
1258 // bitmap to 1. If the region is "starts humongous" it will also set
1259 // to 1 the bits on the region bitmap that correspond to its
1260 // associated "continues humongous" regions.
1261 void set_bit_for_region(HeapRegion* hr) {
1262 assert(!hr->continuesHumongous(), "should have filtered those out");
1264 size_t index = hr->hrs_index();
1265 if (!hr->startsHumongous()) {
1266 // Normal (non-humongous) case: just set the bit.
1267 _region_bm->par_at_put((BitMap::idx_t) index, true);
1268 } else {
1269 // Starts humongous case: calculate how many regions are part of
1270 // this humongous region and then set the bit range. It might
1271 // have been a bit more efficient to look at the object that
1272 // spans these humongous regions to calculate their number from
1273 // the object's size. However, it's a good idea to calculate
1274 // this based on the metadata itself, and not the region
1275 // contents, so that this code is not aware of what goes into
1276 // the humongous regions (in case this changes in the future).
1277 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1278 size_t end_index = index + 1;
1279 while (end_index < g1h->n_regions()) {
1280 HeapRegion* chr = g1h->region_at(end_index);
1281 if (!chr->continuesHumongous()) break;
1282 end_index += 1;
1283 }
1284 _region_bm->par_at_put_range((BitMap::idx_t) index,
1285 (BitMap::idx_t) end_index, true);
1286 }
1287 }
1289 bool doHeapRegion(HeapRegion* hr) {
1290 if (!_final && _regions_done == 0) {
1291 _start_vtime_sec = os::elapsedVTime();
1292 }
1294 if (hr->continuesHumongous()) {
1295 // We will ignore these here and process them when their
1296 // associated "starts humongous" region is processed (see
1297 // set_bit_for_heap_region()). Note that we cannot rely on their
1298 // associated "starts humongous" region to have their bit set to
1299 // 1 since, due to the region chunking in the parallel region
1300 // iteration, a "continues humongous" region might be visited
1301 // before its associated "starts humongous".
1302 return false;
1303 }
1305 HeapWord* nextTop = hr->next_top_at_mark_start();
1306 HeapWord* start = hr->top_at_conc_mark_count();
1307 assert(hr->bottom() <= start && start <= hr->end() &&
1308 hr->bottom() <= nextTop && nextTop <= hr->end() &&
1309 start <= nextTop,
1310 "Preconditions.");
1311 // Otherwise, record the number of word's we'll examine.
1312 size_t words_done = (nextTop - start);
1313 // Find the first marked object at or after "start".
1314 start = _bm->getNextMarkedWordAddress(start, nextTop);
1315 size_t marked_bytes = 0;
1317 // Below, the term "card num" means the result of shifting an address
1318 // by the card shift -- address 0 corresponds to card number 0. One
1319 // must subtract the card num of the bottom of the heap to obtain a
1320 // card table index.
1321 // The first card num of the sequence of live cards currently being
1322 // constructed. -1 ==> no sequence.
1323 intptr_t start_card_num = -1;
1324 // The last card num of the sequence of live cards currently being
1325 // constructed. -1 ==> no sequence.
1326 intptr_t last_card_num = -1;
1328 while (start < nextTop) {
1329 if (_yield && _cm->do_yield_check()) {
1330 // We yielded. It might be for a full collection, in which case
1331 // all bets are off; terminate the traversal.
1332 if (_cm->has_aborted()) {
1333 _changed = false;
1334 return true;
1335 } else {
1336 // Otherwise, it might be a collection pause, and the region
1337 // we're looking at might be in the collection set. We'll
1338 // abandon this region.
1339 return false;
1340 }
1341 }
1342 oop obj = oop(start);
1343 int obj_sz = obj->size();
1344 // The card num of the start of the current object.
1345 intptr_t obj_card_num =
1346 intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1348 HeapWord* obj_last = start + obj_sz - 1;
1349 intptr_t obj_last_card_num =
1350 intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1352 if (obj_card_num != last_card_num) {
1353 if (start_card_num == -1) {
1354 assert(last_card_num == -1, "Both or neither.");
1355 start_card_num = obj_card_num;
1356 } else {
1357 assert(last_card_num != -1, "Both or neither.");
1358 assert(obj_card_num >= last_card_num, "Inv");
1359 if ((obj_card_num - last_card_num) > 1) {
1360 // Mark the last run, and start a new one.
1361 mark_card_num_range(start_card_num, last_card_num);
1362 start_card_num = obj_card_num;
1363 }
1364 }
1365 #if CARD_BM_TEST_MODE
1366 /*
1367 gclog_or_tty->print_cr("Setting bits from %d/%d.",
1368 obj_card_num - _bottom_card_num,
1369 obj_last_card_num - _bottom_card_num);
1370 */
1371 for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1372 _card_bm->par_at_put(j - _bottom_card_num, 1);
1373 }
1374 #endif
1375 }
1376 // In any case, we set the last card num.
1377 last_card_num = obj_last_card_num;
1379 marked_bytes += (size_t)obj_sz * HeapWordSize;
1380 // Find the next marked object after this one.
1381 start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1382 _changed = true;
1383 }
1384 // Handle the last range, if any.
1385 if (start_card_num != -1) {
1386 mark_card_num_range(start_card_num, last_card_num);
1387 }
1388 if (_final) {
1389 // Mark the allocated-since-marking portion...
1390 HeapWord* tp = hr->top();
1391 if (nextTop < tp) {
1392 start_card_num =
1393 intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1394 last_card_num =
1395 intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1396 mark_card_num_range(start_card_num, last_card_num);
1397 // This definitely means the region has live objects.
1398 set_bit_for_region(hr);
1399 }
1400 }
1402 hr->add_to_marked_bytes(marked_bytes);
1403 // Update the live region bitmap.
1404 if (marked_bytes > 0) {
1405 set_bit_for_region(hr);
1406 }
1407 hr->set_top_at_conc_mark_count(nextTop);
1408 _tot_live += hr->next_live_bytes();
1409 _tot_used += hr->used();
1410 _words_done = words_done;
1412 if (!_final) {
1413 ++_regions_done;
1414 if (_regions_done % 10 == 0) {
1415 double end_vtime_sec = os::elapsedVTime();
1416 double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1417 if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1418 jlong sleep_time_ms =
1419 (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1420 os::sleep(Thread::current(), sleep_time_ms, false);
1421 _start_vtime_sec = end_vtime_sec;
1422 }
1423 }
1424 }
1426 return false;
1427 }
1429 bool changed() { return _changed; }
1430 void reset() { _changed = false; _words_done = 0; }
1431 void no_yield() { _yield = false; }
1432 size_t words_done() { return _words_done; }
1433 size_t tot_live() { return _tot_live; }
1434 size_t tot_used() { return _tot_used; }
1435 };
1438 void ConcurrentMark::calcDesiredRegions() {
1439 _region_bm.clear();
1440 _card_bm.clear();
1441 CalcLiveObjectsClosure calccl(false /*final*/,
1442 nextMarkBitMap(), this,
1443 &_region_bm, &_card_bm);
1444 G1CollectedHeap *g1h = G1CollectedHeap::heap();
1445 g1h->heap_region_iterate(&calccl);
1447 do {
1448 calccl.reset();
1449 g1h->heap_region_iterate(&calccl);
1450 } while (calccl.changed());
1451 }
1453 class G1ParFinalCountTask: public AbstractGangTask {
1454 protected:
1455 G1CollectedHeap* _g1h;
1456 CMBitMap* _bm;
1457 size_t _n_workers;
1458 size_t *_live_bytes;
1459 size_t *_used_bytes;
1460 BitMap* _region_bm;
1461 BitMap* _card_bm;
1462 public:
1463 G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1464 BitMap* region_bm, BitMap* card_bm)
1465 : AbstractGangTask("G1 final counting"), _g1h(g1h),
1466 _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1467 if (ParallelGCThreads > 0) {
1468 _n_workers = _g1h->workers()->total_workers();
1469 } else {
1470 _n_workers = 1;
1471 }
1472 _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473 _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1474 }
1476 ~G1ParFinalCountTask() {
1477 FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1478 FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1479 }
1481 void work(int i) {
1482 CalcLiveObjectsClosure calccl(true /*final*/,
1483 _bm, _g1h->concurrent_mark(),
1484 _region_bm, _card_bm);
1485 calccl.no_yield();
1486 if (G1CollectedHeap::use_parallel_gc_threads()) {
1487 _g1h->heap_region_par_iterate_chunked(&calccl, i,
1488 HeapRegion::FinalCountClaimValue);
1489 } else {
1490 _g1h->heap_region_iterate(&calccl);
1491 }
1492 assert(calccl.complete(), "Shouldn't have yielded!");
1494 assert((size_t) i < _n_workers, "invariant");
1495 _live_bytes[i] = calccl.tot_live();
1496 _used_bytes[i] = calccl.tot_used();
1497 }
1498 size_t live_bytes() {
1499 size_t live_bytes = 0;
1500 for (size_t i = 0; i < _n_workers; ++i)
1501 live_bytes += _live_bytes[i];
1502 return live_bytes;
1503 }
1504 size_t used_bytes() {
1505 size_t used_bytes = 0;
1506 for (size_t i = 0; i < _n_workers; ++i)
1507 used_bytes += _used_bytes[i];
1508 return used_bytes;
1509 }
1510 };
1512 class G1ParNoteEndTask;
1514 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1515 G1CollectedHeap* _g1;
1516 int _worker_num;
1517 size_t _max_live_bytes;
1518 size_t _regions_claimed;
1519 size_t _freed_bytes;
1520 FreeRegionList* _local_cleanup_list;
1521 HumongousRegionSet* _humongous_proxy_set;
1522 HRRSCleanupTask* _hrrs_cleanup_task;
1523 double _claimed_region_time;
1524 double _max_region_time;
1526 public:
1527 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1528 int worker_num,
1529 FreeRegionList* local_cleanup_list,
1530 HumongousRegionSet* humongous_proxy_set,
1531 HRRSCleanupTask* hrrs_cleanup_task);
1532 size_t freed_bytes() { return _freed_bytes; }
1534 bool doHeapRegion(HeapRegion *r);
1536 size_t max_live_bytes() { return _max_live_bytes; }
1537 size_t regions_claimed() { return _regions_claimed; }
1538 double claimed_region_time_sec() { return _claimed_region_time; }
1539 double max_region_time_sec() { return _max_region_time; }
1540 };
1542 class G1ParNoteEndTask: public AbstractGangTask {
1543 friend class G1NoteEndOfConcMarkClosure;
1545 protected:
1546 G1CollectedHeap* _g1h;
1547 size_t _max_live_bytes;
1548 size_t _freed_bytes;
1549 FreeRegionList* _cleanup_list;
1551 public:
1552 G1ParNoteEndTask(G1CollectedHeap* g1h,
1553 FreeRegionList* cleanup_list) :
1554 AbstractGangTask("G1 note end"), _g1h(g1h),
1555 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1557 void work(int i) {
1558 double start = os::elapsedTime();
1559 FreeRegionList local_cleanup_list("Local Cleanup List");
1560 HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1561 HRRSCleanupTask hrrs_cleanup_task;
1562 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1563 &humongous_proxy_set,
1564 &hrrs_cleanup_task);
1565 if (G1CollectedHeap::use_parallel_gc_threads()) {
1566 _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1567 HeapRegion::NoteEndClaimValue);
1568 } else {
1569 _g1h->heap_region_iterate(&g1_note_end);
1570 }
1571 assert(g1_note_end.complete(), "Shouldn't have yielded!");
1573 // Now update the lists
1574 _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1575 NULL /* free_list */,
1576 &humongous_proxy_set,
1577 true /* par */);
1578 {
1579 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1580 _max_live_bytes += g1_note_end.max_live_bytes();
1581 _freed_bytes += g1_note_end.freed_bytes();
1583 // If we iterate over the global cleanup list at the end of
1584 // cleanup to do this printing we will not guarantee to only
1585 // generate output for the newly-reclaimed regions (the list
1586 // might not be empty at the beginning of cleanup; we might
1587 // still be working on its previous contents). So we do the
1588 // printing here, before we append the new regions to the global
1589 // cleanup list.
1591 G1HRPrinter* hr_printer = _g1h->hr_printer();
1592 if (hr_printer->is_active()) {
1593 HeapRegionLinkedListIterator iter(&local_cleanup_list);
1594 while (iter.more_available()) {
1595 HeapRegion* hr = iter.get_next();
1596 hr_printer->cleanup(hr);
1597 }
1598 }
1600 _cleanup_list->add_as_tail(&local_cleanup_list);
1601 assert(local_cleanup_list.is_empty(), "post-condition");
1603 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1604 }
1605 double end = os::elapsedTime();
1606 if (G1PrintParCleanupStats) {
1607 gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1608 "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1609 i, start, end, (end-start)*1000.0,
1610 g1_note_end.regions_claimed(),
1611 g1_note_end.claimed_region_time_sec()*1000.0,
1612 g1_note_end.max_region_time_sec()*1000.0);
1613 }
1614 }
1615 size_t max_live_bytes() { return _max_live_bytes; }
1616 size_t freed_bytes() { return _freed_bytes; }
1617 };
1619 class G1ParScrubRemSetTask: public AbstractGangTask {
1620 protected:
1621 G1RemSet* _g1rs;
1622 BitMap* _region_bm;
1623 BitMap* _card_bm;
1624 public:
1625 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1626 BitMap* region_bm, BitMap* card_bm) :
1627 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1628 _region_bm(region_bm), _card_bm(card_bm)
1629 {}
1631 void work(int i) {
1632 if (G1CollectedHeap::use_parallel_gc_threads()) {
1633 _g1rs->scrub_par(_region_bm, _card_bm, i,
1634 HeapRegion::ScrubRemSetClaimValue);
1635 } else {
1636 _g1rs->scrub(_region_bm, _card_bm);
1637 }
1638 }
1640 };
1642 G1NoteEndOfConcMarkClosure::
1643 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1644 int worker_num,
1645 FreeRegionList* local_cleanup_list,
1646 HumongousRegionSet* humongous_proxy_set,
1647 HRRSCleanupTask* hrrs_cleanup_task)
1648 : _g1(g1), _worker_num(worker_num),
1649 _max_live_bytes(0), _regions_claimed(0),
1650 _freed_bytes(0),
1651 _claimed_region_time(0.0), _max_region_time(0.0),
1652 _local_cleanup_list(local_cleanup_list),
1653 _humongous_proxy_set(humongous_proxy_set),
1654 _hrrs_cleanup_task(hrrs_cleanup_task) { }
1656 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1657 // We use a claim value of zero here because all regions
1658 // were claimed with value 1 in the FinalCount task.
1659 hr->reset_gc_time_stamp();
1660 if (!hr->continuesHumongous()) {
1661 double start = os::elapsedTime();
1662 _regions_claimed++;
1663 hr->note_end_of_marking();
1664 _max_live_bytes += hr->max_live_bytes();
1665 _g1->free_region_if_empty(hr,
1666 &_freed_bytes,
1667 _local_cleanup_list,
1668 _humongous_proxy_set,
1669 _hrrs_cleanup_task,
1670 true /* par */);
1671 double region_time = (os::elapsedTime() - start);
1672 _claimed_region_time += region_time;
1673 if (region_time > _max_region_time) {
1674 _max_region_time = region_time;
1675 }
1676 }
1677 return false;
1678 }
1680 void ConcurrentMark::cleanup() {
1681 // world is stopped at this checkpoint
1682 assert(SafepointSynchronize::is_at_safepoint(),
1683 "world should be stopped");
1684 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1686 // If a full collection has happened, we shouldn't do this.
1687 if (has_aborted()) {
1688 g1h->set_marking_complete(); // So bitmap clearing isn't confused
1689 return;
1690 }
1692 g1h->verify_region_sets_optional();
1694 if (VerifyDuringGC) {
1695 HandleMark hm; // handle scope
1696 gclog_or_tty->print(" VerifyDuringGC:(before)");
1697 Universe::heap()->prepare_for_verify();
1698 Universe::verify(/* allow dirty */ true,
1699 /* silent */ false,
1700 /* option */ VerifyOption_G1UsePrevMarking);
1701 }
1703 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1704 g1p->record_concurrent_mark_cleanup_start();
1706 double start = os::elapsedTime();
1708 HeapRegionRemSet::reset_for_cleanup_tasks();
1710 // Do counting once more with the world stopped for good measure.
1711 G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1712 &_region_bm, &_card_bm);
1713 if (G1CollectedHeap::use_parallel_gc_threads()) {
1714 assert(g1h->check_heap_region_claim_values(
1715 HeapRegion::InitialClaimValue),
1716 "sanity check");
1718 int n_workers = g1h->workers()->total_workers();
1719 g1h->set_par_threads(n_workers);
1720 g1h->workers()->run_task(&g1_par_count_task);
1721 g1h->set_par_threads(0);
1723 assert(g1h->check_heap_region_claim_values(
1724 HeapRegion::FinalCountClaimValue),
1725 "sanity check");
1726 } else {
1727 g1_par_count_task.work(0);
1728 }
1730 size_t known_garbage_bytes =
1731 g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1732 g1p->set_known_garbage_bytes(known_garbage_bytes);
1734 size_t start_used_bytes = g1h->used();
1735 _at_least_one_mark_complete = true;
1736 g1h->set_marking_complete();
1738 ergo_verbose4(ErgoConcCycles,
1739 "finish cleanup",
1740 ergo_format_byte("occupancy")
1741 ergo_format_byte("capacity")
1742 ergo_format_byte_perc("known garbage"),
1743 start_used_bytes, g1h->capacity(),
1744 known_garbage_bytes,
1745 ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1747 double count_end = os::elapsedTime();
1748 double this_final_counting_time = (count_end - start);
1749 if (G1PrintParCleanupStats) {
1750 gclog_or_tty->print_cr("Cleanup:");
1751 gclog_or_tty->print_cr(" Finalize counting: %8.3f ms",
1752 this_final_counting_time*1000.0);
1753 }
1754 _total_counting_time += this_final_counting_time;
1756 if (G1PrintRegionLivenessInfo) {
1757 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1758 _g1h->heap_region_iterate(&cl);
1759 }
1761 // Install newly created mark bitMap as "prev".
1762 swapMarkBitMaps();
1764 g1h->reset_gc_time_stamp();
1766 // Note end of marking in all heap regions.
1767 double note_end_start = os::elapsedTime();
1768 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1769 if (G1CollectedHeap::use_parallel_gc_threads()) {
1770 int n_workers = g1h->workers()->total_workers();
1771 g1h->set_par_threads(n_workers);
1772 g1h->workers()->run_task(&g1_par_note_end_task);
1773 g1h->set_par_threads(0);
1775 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1776 "sanity check");
1777 } else {
1778 g1_par_note_end_task.work(0);
1779 }
1781 if (!cleanup_list_is_empty()) {
1782 // The cleanup list is not empty, so we'll have to process it
1783 // concurrently. Notify anyone else that might be wanting free
1784 // regions that there will be more free regions coming soon.
1785 g1h->set_free_regions_coming();
1786 }
1787 double note_end_end = os::elapsedTime();
1788 if (G1PrintParCleanupStats) {
1789 gclog_or_tty->print_cr(" note end of marking: %8.3f ms.",
1790 (note_end_end - note_end_start)*1000.0);
1791 }
1794 // call below, since it affects the metric by which we sort the heap
1795 // regions.
1796 if (G1ScrubRemSets) {
1797 double rs_scrub_start = os::elapsedTime();
1798 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1799 if (G1CollectedHeap::use_parallel_gc_threads()) {
1800 int n_workers = g1h->workers()->total_workers();
1801 g1h->set_par_threads(n_workers);
1802 g1h->workers()->run_task(&g1_par_scrub_rs_task);
1803 g1h->set_par_threads(0);
1805 assert(g1h->check_heap_region_claim_values(
1806 HeapRegion::ScrubRemSetClaimValue),
1807 "sanity check");
1808 } else {
1809 g1_par_scrub_rs_task.work(0);
1810 }
1812 double rs_scrub_end = os::elapsedTime();
1813 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1814 _total_rs_scrub_time += this_rs_scrub_time;
1815 }
1817 // this will also free any regions totally full of garbage objects,
1818 // and sort the regions.
1819 g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1821 // Statistics.
1822 double end = os::elapsedTime();
1823 _cleanup_times.add((end - start) * 1000.0);
1825 // G1CollectedHeap::heap()->print();
1826 // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1827 // G1CollectedHeap::heap()->get_gc_time_stamp());
1829 if (PrintGC || PrintGCDetails) {
1830 g1h->print_size_transition(gclog_or_tty,
1831 start_used_bytes,
1832 g1h->used(),
1833 g1h->capacity());
1834 }
1836 size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1837 g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1839 // Clean up will have freed any regions completely full of garbage.
1840 // Update the soft reference policy with the new heap occupancy.
1841 Universe::update_heap_info_at_gc();
1843 // We need to make this be a "collection" so any collection pause that
1844 // races with it goes around and waits for completeCleanup to finish.
1845 g1h->increment_total_collections();
1847 if (VerifyDuringGC) {
1848 HandleMark hm; // handle scope
1849 gclog_or_tty->print(" VerifyDuringGC:(after)");
1850 Universe::heap()->prepare_for_verify();
1851 Universe::verify(/* allow dirty */ true,
1852 /* silent */ false,
1853 /* option */ VerifyOption_G1UsePrevMarking);
1854 }
1856 g1h->verify_region_sets_optional();
1857 }
1859 void ConcurrentMark::completeCleanup() {
1860 if (has_aborted()) return;
1862 G1CollectedHeap* g1h = G1CollectedHeap::heap();
1864 _cleanup_list.verify_optional();
1865 FreeRegionList tmp_free_list("Tmp Free List");
1867 if (G1ConcRegionFreeingVerbose) {
1868 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1869 "cleanup list has "SIZE_FORMAT" entries",
1870 _cleanup_list.length());
1871 }
1873 // Noone else should be accessing the _cleanup_list at this point,
1874 // so it's not necessary to take any locks
1875 while (!_cleanup_list.is_empty()) {
1876 HeapRegion* hr = _cleanup_list.remove_head();
1877 assert(hr != NULL, "the list was not empty");
1878 hr->par_clear();
1879 tmp_free_list.add_as_tail(hr);
1881 // Instead of adding one region at a time to the secondary_free_list,
1882 // we accumulate them in the local list and move them a few at a
1883 // time. This also cuts down on the number of notify_all() calls
1884 // we do during this process. We'll also append the local list when
1885 // _cleanup_list is empty (which means we just removed the last
1886 // region from the _cleanup_list).
1887 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1888 _cleanup_list.is_empty()) {
1889 if (G1ConcRegionFreeingVerbose) {
1890 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1891 "appending "SIZE_FORMAT" entries to the "
1892 "secondary_free_list, clean list still has "
1893 SIZE_FORMAT" entries",
1894 tmp_free_list.length(),
1895 _cleanup_list.length());
1896 }
1898 {
1899 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1900 g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1901 SecondaryFreeList_lock->notify_all();
1902 }
1904 if (G1StressConcRegionFreeing) {
1905 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1906 os::sleep(Thread::current(), (jlong) 1, false);
1907 }
1908 }
1909 }
1910 }
1911 assert(tmp_free_list.is_empty(), "post-condition");
1912 }
1914 // Support closures for reference procssing in G1
1916 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1917 HeapWord* addr = (HeapWord*)obj;
1918 return addr != NULL &&
1919 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1920 }
1922 class G1CMKeepAliveClosure: public OopClosure {
1923 G1CollectedHeap* _g1;
1924 ConcurrentMark* _cm;
1925 CMBitMap* _bitMap;
1926 public:
1927 G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1928 CMBitMap* bitMap) :
1929 _g1(g1), _cm(cm),
1930 _bitMap(bitMap) {}
1932 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1933 virtual void do_oop( oop* p) { do_oop_work(p); }
1935 template <class T> void do_oop_work(T* p) {
1936 oop obj = oopDesc::load_decode_heap_oop(p);
1937 HeapWord* addr = (HeapWord*)obj;
1939 if (_cm->verbose_high()) {
1940 gclog_or_tty->print_cr("\t[0] we're looking at location "
1941 "*"PTR_FORMAT" = "PTR_FORMAT,
1942 p, (void*) obj);
1943 }
1945 if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1946 _bitMap->mark(addr);
1947 _cm->mark_stack_push(obj);
1948 }
1949 }
1950 };
1952 class G1CMDrainMarkingStackClosure: public VoidClosure {
1953 CMMarkStack* _markStack;
1954 CMBitMap* _bitMap;
1955 G1CMKeepAliveClosure* _oopClosure;
1956 public:
1957 G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1958 G1CMKeepAliveClosure* oopClosure) :
1959 _bitMap(bitMap),
1960 _markStack(markStack),
1961 _oopClosure(oopClosure)
1962 {}
1964 void do_void() {
1965 _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1966 }
1967 };
1969 // 'Keep Alive' closure used by parallel reference processing.
1970 // An instance of this closure is used in the parallel reference processing
1971 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1972 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1973 // placed on to discovered ref lists once so we can mark and push with no
1974 // need to check whether the object has already been marked. Using the
1975 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1976 // operating on the global mark stack. This means that an individual
1977 // worker would be doing lock-free pushes while it processes its own
1978 // discovered ref list followed by drain call. If the discovered ref lists
1979 // are unbalanced then this could cause interference with the other
1980 // workers. Using a CMTask (and its embedded local data structures)
1981 // avoids that potential interference.
1982 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1983 ConcurrentMark* _cm;
1984 CMTask* _task;
1985 CMBitMap* _bitMap;
1986 int _ref_counter_limit;
1987 int _ref_counter;
1988 public:
1989 G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1990 CMTask* task,
1991 CMBitMap* bitMap) :
1992 _cm(cm), _task(task), _bitMap(bitMap),
1993 _ref_counter_limit(G1RefProcDrainInterval)
1994 {
1995 assert(_ref_counter_limit > 0, "sanity");
1996 _ref_counter = _ref_counter_limit;
1997 }
1999 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2000 virtual void do_oop( oop* p) { do_oop_work(p); }
2002 template <class T> void do_oop_work(T* p) {
2003 if (!_cm->has_overflown()) {
2004 oop obj = oopDesc::load_decode_heap_oop(p);
2005 if (_cm->verbose_high()) {
2006 gclog_or_tty->print_cr("\t[%d] we're looking at location "
2007 "*"PTR_FORMAT" = "PTR_FORMAT,
2008 _task->task_id(), p, (void*) obj);
2009 }
2011 _task->deal_with_reference(obj);
2012 _ref_counter--;
2014 if (_ref_counter == 0) {
2015 // We have dealt with _ref_counter_limit references, pushing them and objects
2016 // reachable from them on to the local stack (and possibly the global stack).
2017 // Call do_marking_step() to process these entries. We call the routine in a
2018 // loop, which we'll exit if there's nothing more to do (i.e. we're done
2019 // with the entries that we've pushed as a result of the deal_with_reference
2020 // calls above) or we overflow.
2021 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2022 // while there may still be some work to do. (See the comment at the
2023 // beginning of CMTask::do_marking_step() for those conditions - one of which
2024 // is reaching the specified time target.) It is only when
2025 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2026 // that the marking has completed.
2027 do {
2028 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2029 _task->do_marking_step(mark_step_duration_ms,
2030 false /* do_stealing */,
2031 false /* do_termination */);
2032 } while (_task->has_aborted() && !_cm->has_overflown());
2033 _ref_counter = _ref_counter_limit;
2034 }
2035 } else {
2036 if (_cm->verbose_high()) {
2037 gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2038 }
2039 }
2040 }
2041 };
2043 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2044 ConcurrentMark* _cm;
2045 CMTask* _task;
2046 public:
2047 G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2048 _cm(cm), _task(task)
2049 {}
2051 void do_void() {
2052 do {
2053 if (_cm->verbose_high()) {
2054 gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2055 _task->task_id());
2056 }
2058 // We call CMTask::do_marking_step() to completely drain the local and
2059 // global marking stacks. The routine is called in a loop, which we'll
2060 // exit if there's nothing more to do (i.e. we'completely drained the
2061 // entries that were pushed as a result of applying the
2062 // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2063 // lists above) or we overflow the global marking stack.
2064 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2065 // while there may still be some work to do. (See the comment at the
2066 // beginning of CMTask::do_marking_step() for those conditions - one of which
2067 // is reaching the specified time target.) It is only when
2068 // CMTask::do_marking_step() returns without setting the has_aborted() flag
2069 // that the marking has completed.
2071 _task->do_marking_step(1000000000.0 /* something very large */,
2072 true /* do_stealing */,
2073 true /* do_termination */);
2074 } while (_task->has_aborted() && !_cm->has_overflown());
2075 }
2076 };
2078 // Implementation of AbstractRefProcTaskExecutor for parallel
2079 // reference processing at the end of G1 concurrent marking
2081 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2082 private:
2083 G1CollectedHeap* _g1h;
2084 ConcurrentMark* _cm;
2085 CMBitMap* _bitmap;
2086 WorkGang* _workers;
2087 int _active_workers;
2089 public:
2090 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2091 ConcurrentMark* cm,
2092 CMBitMap* bitmap,
2093 WorkGang* workers,
2094 int n_workers) :
2095 _g1h(g1h), _cm(cm), _bitmap(bitmap),
2096 _workers(workers), _active_workers(n_workers)
2097 { }
2099 // Executes the given task using concurrent marking worker threads.
2100 virtual void execute(ProcessTask& task);
2101 virtual void execute(EnqueueTask& task);
2102 };
2104 class G1CMRefProcTaskProxy: public AbstractGangTask {
2105 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2106 ProcessTask& _proc_task;
2107 G1CollectedHeap* _g1h;
2108 ConcurrentMark* _cm;
2109 CMBitMap* _bitmap;
2111 public:
2112 G1CMRefProcTaskProxy(ProcessTask& proc_task,
2113 G1CollectedHeap* g1h,
2114 ConcurrentMark* cm,
2115 CMBitMap* bitmap) :
2116 AbstractGangTask("Process reference objects in parallel"),
2117 _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2118 {}
2120 virtual void work(int i) {
2121 CMTask* marking_task = _cm->task(i);
2122 G1CMIsAliveClosure g1_is_alive(_g1h);
2123 G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2124 G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2126 _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2127 }
2128 };
2130 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2131 assert(_workers != NULL, "Need parallel worker threads.");
2133 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2135 // We need to reset the phase for each task execution so that
2136 // the termination protocol of CMTask::do_marking_step works.
2137 _cm->set_phase(_active_workers, false /* concurrent */);
2138 _g1h->set_par_threads(_active_workers);
2139 _workers->run_task(&proc_task_proxy);
2140 _g1h->set_par_threads(0);
2141 }
2143 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2144 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2145 EnqueueTask& _enq_task;
2147 public:
2148 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2149 AbstractGangTask("Enqueue reference objects in parallel"),
2150 _enq_task(enq_task)
2151 { }
2153 virtual void work(int i) {
2154 _enq_task.work(i);
2155 }
2156 };
2158 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2159 assert(_workers != NULL, "Need parallel worker threads.");
2161 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2163 _g1h->set_par_threads(_active_workers);
2164 _workers->run_task(&enq_task_proxy);
2165 _g1h->set_par_threads(0);
2166 }
2168 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2169 ResourceMark rm;
2170 HandleMark hm;
2172 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2174 // Is alive closure.
2175 G1CMIsAliveClosure g1_is_alive(g1h);
2177 // Inner scope to exclude the cleaning of the string and symbol
2178 // tables from the displayed time.
2179 {
2180 bool verbose = PrintGC && PrintGCDetails;
2181 if (verbose) {
2182 gclog_or_tty->put(' ');
2183 }
2184 TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2186 ReferenceProcessor* rp = g1h->ref_processor_cm();
2188 // See the comment in G1CollectedHeap::ref_processing_init()
2189 // about how reference processing currently works in G1.
2191 // Process weak references.
2192 rp->setup_policy(clear_all_soft_refs);
2193 assert(_markStack.isEmpty(), "mark stack should be empty");
2195 G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2196 G1CMDrainMarkingStackClosure
2197 g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2199 // We use the work gang from the G1CollectedHeap and we utilize all
2200 // the worker threads.
2201 int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2202 active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2204 G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2205 g1h->workers(), active_workers);
2207 if (rp->processing_is_mt()) {
2208 // Set the degree of MT here. If the discovery is done MT, there
2209 // may have been a different number of threads doing the discovery
2210 // and a different number of discovered lists may have Ref objects.
2211 // That is OK as long as the Reference lists are balanced (see
2212 // balance_all_queues() and balance_queues()).
2213 rp->set_active_mt_degree(active_workers);
2215 rp->process_discovered_references(&g1_is_alive,
2216 &g1_keep_alive,
2217 &g1_drain_mark_stack,
2218 &par_task_executor);
2220 // The work routines of the parallel keep_alive and drain_marking_stack
2221 // will set the has_overflown flag if we overflow the global marking
2222 // stack.
2223 } else {
2224 rp->process_discovered_references(&g1_is_alive,
2225 &g1_keep_alive,
2226 &g1_drain_mark_stack,
2227 NULL);
2228 }
2230 assert(_markStack.overflow() || _markStack.isEmpty(),
2231 "mark stack should be empty (unless it overflowed)");
2232 if (_markStack.overflow()) {
2233 // Should have been done already when we tried to push an
2234 // entry on to the global mark stack. But let's do it again.
2235 set_has_overflown();
2236 }
2238 if (rp->processing_is_mt()) {
2239 assert(rp->num_q() == active_workers, "why not");
2240 rp->enqueue_discovered_references(&par_task_executor);
2241 } else {
2242 rp->enqueue_discovered_references();
2243 }
2245 rp->verify_no_references_recorded();
2246 assert(!rp->discovery_enabled(), "Post condition");
2247 }
2249 // Now clean up stale oops in StringTable
2250 StringTable::unlink(&g1_is_alive);
2251 // Clean up unreferenced symbols in symbol table.
2252 SymbolTable::unlink();
2253 }
2255 void ConcurrentMark::swapMarkBitMaps() {
2256 CMBitMapRO* temp = _prevMarkBitMap;
2257 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
2258 _nextMarkBitMap = (CMBitMap*) temp;
2259 }
2261 class CMRemarkTask: public AbstractGangTask {
2262 private:
2263 ConcurrentMark *_cm;
2265 public:
2266 void work(int worker_i) {
2267 // Since all available tasks are actually started, we should
2268 // only proceed if we're supposed to be actived.
2269 if ((size_t)worker_i < _cm->active_tasks()) {
2270 CMTask* task = _cm->task(worker_i);
2271 task->record_start_time();
2272 do {
2273 task->do_marking_step(1000000000.0 /* something very large */,
2274 true /* do_stealing */,
2275 true /* do_termination */);
2276 } while (task->has_aborted() && !_cm->has_overflown());
2277 // If we overflow, then we do not want to restart. We instead
2278 // want to abort remark and do concurrent marking again.
2279 task->record_end_time();
2280 }
2281 }
2283 CMRemarkTask(ConcurrentMark* cm) :
2284 AbstractGangTask("Par Remark"), _cm(cm) { }
2285 };
2287 void ConcurrentMark::checkpointRootsFinalWork() {
2288 ResourceMark rm;
2289 HandleMark hm;
2290 G1CollectedHeap* g1h = G1CollectedHeap::heap();
2292 g1h->ensure_parsability(false);
2294 if (G1CollectedHeap::use_parallel_gc_threads()) {
2295 G1CollectedHeap::StrongRootsScope srs(g1h);
2296 // this is remark, so we'll use up all available threads
2297 int active_workers = ParallelGCThreads;
2298 set_phase(active_workers, false /* concurrent */);
2300 CMRemarkTask remarkTask(this);
2301 // We will start all available threads, even if we decide that the
2302 // active_workers will be fewer. The extra ones will just bail out
2303 // immediately.
2304 int n_workers = g1h->workers()->total_workers();
2305 g1h->set_par_threads(n_workers);
2306 g1h->workers()->run_task(&remarkTask);
2307 g1h->set_par_threads(0);
2308 } else {
2309 G1CollectedHeap::StrongRootsScope srs(g1h);
2310 // this is remark, so we'll use up all available threads
2311 int active_workers = 1;
2312 set_phase(active_workers, false /* concurrent */);
2314 CMRemarkTask remarkTask(this);
2315 // We will start all available threads, even if we decide that the
2316 // active_workers will be fewer. The extra ones will just bail out
2317 // immediately.
2318 remarkTask.work(0);
2319 }
2320 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2321 guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2323 print_stats();
2325 #if VERIFY_OBJS_PROCESSED
2326 if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2327 gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2328 _scan_obj_cl.objs_processed,
2329 ThreadLocalObjQueue::objs_enqueued);
2330 guarantee(_scan_obj_cl.objs_processed ==
2331 ThreadLocalObjQueue::objs_enqueued,
2332 "Different number of objs processed and enqueued.");
2333 }
2334 #endif
2335 }
2337 #ifndef PRODUCT
2339 class PrintReachableOopClosure: public OopClosure {
2340 private:
2341 G1CollectedHeap* _g1h;
2342 outputStream* _out;
2343 VerifyOption _vo;
2344 bool _all;
2346 public:
2347 PrintReachableOopClosure(outputStream* out,
2348 VerifyOption vo,
2349 bool all) :
2350 _g1h(G1CollectedHeap::heap()),
2351 _out(out), _vo(vo), _all(all) { }
2353 void do_oop(narrowOop* p) { do_oop_work(p); }
2354 void do_oop( oop* p) { do_oop_work(p); }
2356 template <class T> void do_oop_work(T* p) {
2357 oop obj = oopDesc::load_decode_heap_oop(p);
2358 const char* str = NULL;
2359 const char* str2 = "";
2361 if (obj == NULL) {
2362 str = "";
2363 } else if (!_g1h->is_in_g1_reserved(obj)) {
2364 str = " O";
2365 } else {
2366 HeapRegion* hr = _g1h->heap_region_containing(obj);
2367 guarantee(hr != NULL, "invariant");
2368 bool over_tams = false;
2369 bool marked = false;
2371 switch (_vo) {
2372 case VerifyOption_G1UsePrevMarking:
2373 over_tams = hr->obj_allocated_since_prev_marking(obj);
2374 marked = _g1h->isMarkedPrev(obj);
2375 break;
2376 case VerifyOption_G1UseNextMarking:
2377 over_tams = hr->obj_allocated_since_next_marking(obj);
2378 marked = _g1h->isMarkedNext(obj);
2379 break;
2380 case VerifyOption_G1UseMarkWord:
2381 marked = obj->is_gc_marked();
2382 break;
2383 default:
2384 ShouldNotReachHere();
2385 }
2387 if (over_tams) {
2388 str = " >";
2389 if (marked) {
2390 str2 = " AND MARKED";
2391 }
2392 } else if (marked) {
2393 str = " M";
2394 } else {
2395 str = " NOT";
2396 }
2397 }
2399 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
2400 p, (void*) obj, str, str2);
2401 }
2402 };
2404 class PrintReachableObjectClosure : public ObjectClosure {
2405 private:
2406 G1CollectedHeap* _g1h;
2407 outputStream* _out;
2408 VerifyOption _vo;
2409 bool _all;
2410 HeapRegion* _hr;
2412 public:
2413 PrintReachableObjectClosure(outputStream* out,
2414 VerifyOption vo,
2415 bool all,
2416 HeapRegion* hr) :
2417 _g1h(G1CollectedHeap::heap()),
2418 _out(out), _vo(vo), _all(all), _hr(hr) { }
2420 void do_object(oop o) {
2421 bool over_tams = false;
2422 bool marked = false;
2424 switch (_vo) {
2425 case VerifyOption_G1UsePrevMarking:
2426 over_tams = _hr->obj_allocated_since_prev_marking(o);
2427 marked = _g1h->isMarkedPrev(o);
2428 break;
2429 case VerifyOption_G1UseNextMarking:
2430 over_tams = _hr->obj_allocated_since_next_marking(o);
2431 marked = _g1h->isMarkedNext(o);
2432 break;
2433 case VerifyOption_G1UseMarkWord:
2434 marked = o->is_gc_marked();
2435 break;
2436 default:
2437 ShouldNotReachHere();
2438 }
2439 bool print_it = _all || over_tams || marked;
2441 if (print_it) {
2442 _out->print_cr(" "PTR_FORMAT"%s",
2443 o, (over_tams) ? " >" : (marked) ? " M" : "");
2444 PrintReachableOopClosure oopCl(_out, _vo, _all);
2445 o->oop_iterate(&oopCl);
2446 }
2447 }
2448 };
2450 class PrintReachableRegionClosure : public HeapRegionClosure {
2451 private:
2452 outputStream* _out;
2453 VerifyOption _vo;
2454 bool _all;
2456 public:
2457 bool doHeapRegion(HeapRegion* hr) {
2458 HeapWord* b = hr->bottom();
2459 HeapWord* e = hr->end();
2460 HeapWord* t = hr->top();
2461 HeapWord* p = NULL;
2463 switch (_vo) {
2464 case VerifyOption_G1UsePrevMarking:
2465 p = hr->prev_top_at_mark_start();
2466 break;
2467 case VerifyOption_G1UseNextMarking:
2468 p = hr->next_top_at_mark_start();
2469 break;
2470 case VerifyOption_G1UseMarkWord:
2471 // When we are verifying marking using the mark word
2472 // TAMS has no relevance.
2473 assert(p == NULL, "post-condition");
2474 break;
2475 default:
2476 ShouldNotReachHere();
2477 }
2478 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2479 "TAMS: "PTR_FORMAT, b, e, t, p);
2480 _out->cr();
2482 HeapWord* from = b;
2483 HeapWord* to = t;
2485 if (to > from) {
2486 _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2487 _out->cr();
2488 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2489 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2490 _out->cr();
2491 }
2493 return false;
2494 }
2496 PrintReachableRegionClosure(outputStream* out,
2497 VerifyOption vo,
2498 bool all) :
2499 _out(out), _vo(vo), _all(all) { }
2500 };
2502 static const char* verify_option_to_tams(VerifyOption vo) {
2503 switch (vo) {
2504 case VerifyOption_G1UsePrevMarking:
2505 return "PTAMS";
2506 case VerifyOption_G1UseNextMarking:
2507 return "NTAMS";
2508 default:
2509 return "NONE";
2510 }
2511 }
2513 void ConcurrentMark::print_reachable(const char* str,
2514 VerifyOption vo,
2515 bool all) {
2516 gclog_or_tty->cr();
2517 gclog_or_tty->print_cr("== Doing heap dump... ");
2519 if (G1PrintReachableBaseFile == NULL) {
2520 gclog_or_tty->print_cr(" #### error: no base file defined");
2521 return;
2522 }
2524 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2525 (JVM_MAXPATHLEN - 1)) {
2526 gclog_or_tty->print_cr(" #### error: file name too long");
2527 return;
2528 }
2530 char file_name[JVM_MAXPATHLEN];
2531 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2532 gclog_or_tty->print_cr(" dumping to file %s", file_name);
2534 fileStream fout(file_name);
2535 if (!fout.is_open()) {
2536 gclog_or_tty->print_cr(" #### error: could not open file");
2537 return;
2538 }
2540 outputStream* out = &fout;
2541 out->print_cr("-- USING %s", verify_option_to_tams(vo));
2542 out->cr();
2544 out->print_cr("--- ITERATING OVER REGIONS");
2545 out->cr();
2546 PrintReachableRegionClosure rcl(out, vo, all);
2547 _g1h->heap_region_iterate(&rcl);
2548 out->cr();
2550 gclog_or_tty->print_cr(" done");
2551 gclog_or_tty->flush();
2552 }
2554 #endif // PRODUCT
2556 // This note is for drainAllSATBBuffers and the code in between.
2557 // In the future we could reuse a task to do this work during an
2558 // evacuation pause (since now tasks are not active and can be claimed
2559 // during an evacuation pause). This was a late change to the code and
2560 // is currently not being taken advantage of.
2562 class CMGlobalObjectClosure : public ObjectClosure {
2563 private:
2564 ConcurrentMark* _cm;
2566 public:
2567 void do_object(oop obj) {
2568 _cm->deal_with_reference(obj);
2569 }
2571 CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2572 };
2574 void ConcurrentMark::deal_with_reference(oop obj) {
2575 if (verbose_high()) {
2576 gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2577 (void*) obj);
2578 }
2580 HeapWord* objAddr = (HeapWord*) obj;
2581 assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2582 if (_g1h->is_in_g1_reserved(objAddr)) {
2583 assert(obj != NULL, "null check is implicit");
2584 if (!_nextMarkBitMap->isMarked(objAddr)) {
2585 // Only get the containing region if the object is not marked on the
2586 // bitmap (otherwise, it's a waste of time since we won't do
2587 // anything with it).
2588 HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2589 if (!hr->obj_allocated_since_next_marking(obj)) {
2590 if (verbose_high()) {
2591 gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2592 "marked", (void*) obj);
2593 }
2595 // we need to mark it first
2596 if (_nextMarkBitMap->parMark(objAddr)) {
2597 // No OrderAccess:store_load() is needed. It is implicit in the
2598 // CAS done in parMark(objAddr) above
2599 HeapWord* finger = _finger;
2600 if (objAddr < finger) {
2601 if (verbose_high()) {
2602 gclog_or_tty->print_cr("[global] below the global finger "
2603 "("PTR_FORMAT"), pushing it", finger);
2604 }
2605 if (!mark_stack_push(obj)) {
2606 if (verbose_low()) {
2607 gclog_or_tty->print_cr("[global] global stack overflow during "
2608 "deal_with_reference");
2609 }
2610 }
2611 }
2612 }
2613 }
2614 }
2615 }
2616 }
2618 void ConcurrentMark::drainAllSATBBuffers() {
2619 CMGlobalObjectClosure oc(this);
2620 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2621 satb_mq_set.set_closure(&oc);
2623 while (satb_mq_set.apply_closure_to_completed_buffer()) {
2624 if (verbose_medium()) {
2625 gclog_or_tty->print_cr("[global] processed an SATB buffer");
2626 }
2627 }
2629 // no need to check whether we should do this, as this is only
2630 // called during an evacuation pause
2631 satb_mq_set.iterate_closure_all_threads();
2633 satb_mq_set.set_closure(NULL);
2634 assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2635 }
2637 void ConcurrentMark::markPrev(oop p) {
2638 // Note we are overriding the read-only view of the prev map here, via
2639 // the cast.
2640 ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2641 }
2643 void ConcurrentMark::clear(oop p) {
2644 assert(p != NULL && p->is_oop(), "expected an oop");
2645 HeapWord* addr = (HeapWord*)p;
2646 assert(addr >= _nextMarkBitMap->startWord() ||
2647 addr < _nextMarkBitMap->endWord(), "in a region");
2649 _nextMarkBitMap->clear(addr);
2650 }
2652 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2653 // Note we are overriding the read-only view of the prev map here, via
2654 // the cast.
2655 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2656 _nextMarkBitMap->clearRange(mr);
2657 }
2659 HeapRegion*
2660 ConcurrentMark::claim_region(int task_num) {
2661 // "checkpoint" the finger
2662 HeapWord* finger = _finger;
2664 // _heap_end will not change underneath our feet; it only changes at
2665 // yield points.
2666 while (finger < _heap_end) {
2667 assert(_g1h->is_in_g1_reserved(finger), "invariant");
2669 // Note on how this code handles humongous regions. In the
2670 // normal case the finger will reach the start of a "starts
2671 // humongous" (SH) region. Its end will either be the end of the
2672 // last "continues humongous" (CH) region in the sequence, or the
2673 // standard end of the SH region (if the SH is the only region in
2674 // the sequence). That way claim_region() will skip over the CH
2675 // regions. However, there is a subtle race between a CM thread
2676 // executing this method and a mutator thread doing a humongous
2677 // object allocation. The two are not mutually exclusive as the CM
2678 // thread does not need to hold the Heap_lock when it gets
2679 // here. So there is a chance that claim_region() will come across
2680 // a free region that's in the progress of becoming a SH or a CH
2681 // region. In the former case, it will either
2682 // a) Miss the update to the region's end, in which case it will
2683 // visit every subsequent CH region, will find their bitmaps
2684 // empty, and do nothing, or
2685 // b) Will observe the update of the region's end (in which case
2686 // it will skip the subsequent CH regions).
2687 // If it comes across a region that suddenly becomes CH, the
2688 // scenario will be similar to b). So, the race between
2689 // claim_region() and a humongous object allocation might force us
2690 // to do a bit of unnecessary work (due to some unnecessary bitmap
2691 // iterations) but it should not introduce and correctness issues.
2692 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2693 HeapWord* bottom = curr_region->bottom();
2694 HeapWord* end = curr_region->end();
2695 HeapWord* limit = curr_region->next_top_at_mark_start();
2697 if (verbose_low()) {
2698 gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2699 "["PTR_FORMAT", "PTR_FORMAT"), "
2700 "limit = "PTR_FORMAT,
2701 task_num, curr_region, bottom, end, limit);
2702 }
2704 // Is the gap between reading the finger and doing the CAS too long?
2705 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2706 if (res == finger) {
2707 // we succeeded
2709 // notice that _finger == end cannot be guaranteed here since,
2710 // someone else might have moved the finger even further
2711 assert(_finger >= end, "the finger should have moved forward");
2713 if (verbose_low()) {
2714 gclog_or_tty->print_cr("[%d] we were successful with region = "
2715 PTR_FORMAT, task_num, curr_region);
2716 }
2718 if (limit > bottom) {
2719 if (verbose_low()) {
2720 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2721 "returning it ", task_num, curr_region);
2722 }
2723 return curr_region;
2724 } else {
2725 assert(limit == bottom,
2726 "the region limit should be at bottom");
2727 if (verbose_low()) {
2728 gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2729 "returning NULL", task_num, curr_region);
2730 }
2731 // we return NULL and the caller should try calling
2732 // claim_region() again.
2733 return NULL;
2734 }
2735 } else {
2736 assert(_finger > finger, "the finger should have moved forward");
2737 if (verbose_low()) {
2738 gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2739 "global finger = "PTR_FORMAT", "
2740 "our finger = "PTR_FORMAT,
2741 task_num, _finger, finger);
2742 }
2744 // read it again
2745 finger = _finger;
2746 }
2747 }
2749 return NULL;
2750 }
2752 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2753 bool result = false;
2754 for (int i = 0; i < (int)_max_task_num; ++i) {
2755 CMTask* the_task = _tasks[i];
2756 MemRegion mr = the_task->aborted_region();
2757 if (mr.start() != NULL) {
2758 assert(mr.end() != NULL, "invariant");
2759 assert(mr.word_size() > 0, "invariant");
2760 HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2761 assert(hr != NULL, "invariant");
2762 if (hr->in_collection_set()) {
2763 // The region points into the collection set
2764 the_task->set_aborted_region(MemRegion());
2765 result = true;
2766 }
2767 }
2768 }
2769 return result;
2770 }
2772 bool ConcurrentMark::has_aborted_regions() {
2773 for (int i = 0; i < (int)_max_task_num; ++i) {
2774 CMTask* the_task = _tasks[i];
2775 MemRegion mr = the_task->aborted_region();
2776 if (mr.start() != NULL) {
2777 assert(mr.end() != NULL, "invariant");
2778 assert(mr.word_size() > 0, "invariant");
2779 return true;
2780 }
2781 }
2782 return false;
2783 }
2785 void ConcurrentMark::oops_do(OopClosure* cl) {
2786 if (_markStack.size() > 0 && verbose_low()) {
2787 gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2788 "size = %d", _markStack.size());
2789 }
2790 // we first iterate over the contents of the mark stack...
2791 _markStack.oops_do(cl);
2793 for (int i = 0; i < (int)_max_task_num; ++i) {
2794 OopTaskQueue* queue = _task_queues->queue((int)i);
2796 if (queue->size() > 0 && verbose_low()) {
2797 gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2798 "size = %d", i, queue->size());
2799 }
2801 // ...then over the contents of the all the task queues.
2802 queue->oops_do(cl);
2803 }
2805 // Invalidate any entries, that are in the region stack, that
2806 // point into the collection set
2807 if (_regionStack.invalidate_entries_into_cset()) {
2808 // otherwise, any gray objects copied during the evacuation pause
2809 // might not be visited.
2810 assert(_should_gray_objects, "invariant");
2811 }
2813 // Invalidate any aborted regions, recorded in the individual CM
2814 // tasks, that point into the collection set.
2815 if (invalidate_aborted_regions_in_cset()) {
2816 // otherwise, any gray objects copied during the evacuation pause
2817 // might not be visited.
2818 assert(_should_gray_objects, "invariant");
2819 }
2821 }
2823 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2824 _markStack.setEmpty();
2825 _markStack.clear_overflow();
2826 _regionStack.setEmpty();
2827 _regionStack.clear_overflow();
2828 if (clear_overflow) {
2829 clear_has_overflown();
2830 } else {
2831 assert(has_overflown(), "pre-condition");
2832 }
2833 _finger = _heap_start;
2835 for (int i = 0; i < (int)_max_task_num; ++i) {
2836 OopTaskQueue* queue = _task_queues->queue(i);
2837 queue->set_empty();
2838 // Clear any partial regions from the CMTasks
2839 _tasks[i]->clear_aborted_region();
2840 }
2841 }
2843 void ConcurrentMark::print_stats() {
2844 if (verbose_stats()) {
2845 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2846 for (size_t i = 0; i < _active_tasks; ++i) {
2847 _tasks[i]->print_stats();
2848 gclog_or_tty->print_cr("---------------------------------------------------------------------");
2849 }
2850 }
2851 }
2853 class CSMarkOopClosure: public OopClosure {
2854 friend class CSMarkBitMapClosure;
2856 G1CollectedHeap* _g1h;
2857 CMBitMap* _bm;
2858 ConcurrentMark* _cm;
2859 oop* _ms;
2860 jint* _array_ind_stack;
2861 int _ms_size;
2862 int _ms_ind;
2863 int _array_increment;
2865 bool push(oop obj, int arr_ind = 0) {
2866 if (_ms_ind == _ms_size) {
2867 gclog_or_tty->print_cr("Mark stack is full.");
2868 return false;
2869 }
2870 _ms[_ms_ind] = obj;
2871 if (obj->is_objArray()) {
2872 _array_ind_stack[_ms_ind] = arr_ind;
2873 }
2874 _ms_ind++;
2875 return true;
2876 }
2878 oop pop() {
2879 if (_ms_ind == 0) {
2880 return NULL;
2881 } else {
2882 _ms_ind--;
2883 return _ms[_ms_ind];
2884 }
2885 }
2887 template <class T> bool drain() {
2888 while (_ms_ind > 0) {
2889 oop obj = pop();
2890 assert(obj != NULL, "Since index was non-zero.");
2891 if (obj->is_objArray()) {
2892 jint arr_ind = _array_ind_stack[_ms_ind];
2893 objArrayOop aobj = objArrayOop(obj);
2894 jint len = aobj->length();
2895 jint next_arr_ind = arr_ind + _array_increment;
2896 if (next_arr_ind < len) {
2897 push(obj, next_arr_ind);
2898 }
2899 // Now process this portion of this one.
2900 int lim = MIN2(next_arr_ind, len);
2901 for (int j = arr_ind; j < lim; j++) {
2902 do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2903 }
2905 } else {
2906 obj->oop_iterate(this);
2907 }
2908 if (abort()) return false;
2909 }
2910 return true;
2911 }
2913 public:
2914 CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2915 _g1h(G1CollectedHeap::heap()),
2916 _cm(cm),
2917 _bm(cm->nextMarkBitMap()),
2918 _ms_size(ms_size), _ms_ind(0),
2919 _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2920 _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2921 _array_increment(MAX2(ms_size/8, 16))
2922 {}
2924 ~CSMarkOopClosure() {
2925 FREE_C_HEAP_ARRAY(oop, _ms);
2926 FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2927 }
2929 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2930 virtual void do_oop( oop* p) { do_oop_work(p); }
2932 template <class T> void do_oop_work(T* p) {
2933 T heap_oop = oopDesc::load_heap_oop(p);
2934 if (oopDesc::is_null(heap_oop)) return;
2935 oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2936 if (obj->is_forwarded()) {
2937 // If the object has already been forwarded, we have to make sure
2938 // that it's marked. So follow the forwarding pointer. Note that
2939 // this does the right thing for self-forwarding pointers in the
2940 // evacuation failure case.
2941 obj = obj->forwardee();
2942 }
2943 HeapRegion* hr = _g1h->heap_region_containing(obj);
2944 if (hr != NULL) {
2945 if (hr->in_collection_set()) {
2946 if (_g1h->is_obj_ill(obj)) {
2947 _bm->mark((HeapWord*)obj);
2948 if (!push(obj)) {
2949 gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2950 set_abort();
2951 }
2952 }
2953 } else {
2954 // Outside the collection set; we need to gray it
2955 _cm->deal_with_reference(obj);
2956 }
2957 }
2958 }
2959 };
2961 class CSMarkBitMapClosure: public BitMapClosure {
2962 G1CollectedHeap* _g1h;
2963 CMBitMap* _bitMap;
2964 ConcurrentMark* _cm;
2965 CSMarkOopClosure _oop_cl;
2966 public:
2967 CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2968 _g1h(G1CollectedHeap::heap()),
2969 _bitMap(cm->nextMarkBitMap()),
2970 _oop_cl(cm, ms_size)
2971 {}
2973 ~CSMarkBitMapClosure() {}
2975 bool do_bit(size_t offset) {
2976 // convert offset into a HeapWord*
2977 HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2978 assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2979 "address out of range");
2980 assert(_bitMap->isMarked(addr), "tautology");
2981 oop obj = oop(addr);
2982 if (!obj->is_forwarded()) {
2983 if (!_oop_cl.push(obj)) return false;
2984 if (UseCompressedOops) {
2985 if (!_oop_cl.drain<narrowOop>()) return false;
2986 } else {
2987 if (!_oop_cl.drain<oop>()) return false;
2988 }
2989 }
2990 // Otherwise...
2991 return true;
2992 }
2993 };
2996 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2997 CMBitMap* _bm;
2998 CSMarkBitMapClosure _bit_cl;
2999 enum SomePrivateConstants {
3000 MSSize = 1000
3001 };
3002 bool _completed;
3003 public:
3004 CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3005 _bm(cm->nextMarkBitMap()),
3006 _bit_cl(cm, MSSize),
3007 _completed(true)
3008 {}
3010 ~CompleteMarkingInCSHRClosure() {}
3012 bool doHeapRegion(HeapRegion* r) {
3013 if (!r->evacuation_failed()) {
3014 MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3015 if (!mr.is_empty()) {
3016 if (!_bm->iterate(&_bit_cl, mr)) {
3017 _completed = false;
3018 return true;
3019 }
3020 }
3021 }
3022 return false;
3023 }
3025 bool completed() { return _completed; }
3026 };
3028 class ClearMarksInHRClosure: public HeapRegionClosure {
3029 CMBitMap* _bm;
3030 public:
3031 ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3033 bool doHeapRegion(HeapRegion* r) {
3034 if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3035 MemRegion usedMR = r->used_region();
3036 _bm->clearRange(r->used_region());
3037 }
3038 return false;
3039 }
3040 };
3042 void ConcurrentMark::complete_marking_in_collection_set() {
3043 G1CollectedHeap* g1h = G1CollectedHeap::heap();
3045 if (!g1h->mark_in_progress()) {
3046 g1h->g1_policy()->record_mark_closure_time(0.0);
3047 return;
3048 }
3050 int i = 1;
3051 double start = os::elapsedTime();
3052 while (true) {
3053 i++;
3054 CompleteMarkingInCSHRClosure cmplt(this);
3055 g1h->collection_set_iterate(&cmplt);
3056 if (cmplt.completed()) break;
3057 }
3058 double end_time = os::elapsedTime();
3059 double elapsed_time_ms = (end_time - start) * 1000.0;
3060 g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3062 ClearMarksInHRClosure clr(nextMarkBitMap());
3063 g1h->collection_set_iterate(&clr);
3064 }
3066 // The next two methods deal with the following optimisation. Some
3067 // objects are gray by being marked and located above the finger. If
3068 // they are copied, during an evacuation pause, below the finger then
3069 // the need to be pushed on the stack. The observation is that, if
3070 // there are no regions in the collection set located above the
3071 // finger, then the above cannot happen, hence we do not need to
3072 // explicitly gray any objects when copying them to below the
3073 // finger. The global stack will be scanned to ensure that, if it
3074 // points to objects being copied, it will update their
3075 // location. There is a tricky situation with the gray objects in
3076 // region stack that are being coped, however. See the comment in
3077 // newCSet().
3079 void ConcurrentMark::newCSet() {
3080 if (!concurrent_marking_in_progress()) {
3081 // nothing to do if marking is not in progress
3082 return;
3083 }
3085 // find what the lowest finger is among the global and local fingers
3086 _min_finger = _finger;
3087 for (int i = 0; i < (int)_max_task_num; ++i) {
3088 CMTask* task = _tasks[i];
3089 HeapWord* task_finger = task->finger();
3090 if (task_finger != NULL && task_finger < _min_finger) {
3091 _min_finger = task_finger;
3092 }
3093 }
3095 _should_gray_objects = false;
3097 // This fixes a very subtle and fustrating bug. It might be the case
3098 // that, during en evacuation pause, heap regions that contain
3099 // objects that are gray (by being in regions contained in the
3100 // region stack) are included in the collection set. Since such gray
3101 // objects will be moved, and because it's not easy to redirect
3102 // region stack entries to point to a new location (because objects
3103 // in one region might be scattered to multiple regions after they
3104 // are copied), one option is to ensure that all marked objects
3105 // copied during a pause are pushed on the stack. Notice, however,
3106 // that this problem can only happen when the region stack is not
3107 // empty during an evacuation pause. So, we make the fix a bit less
3108 // conservative and ensure that regions are pushed on the stack,
3109 // irrespective whether all collection set regions are below the
3110 // finger, if the region stack is not empty. This is expected to be
3111 // a rare case, so I don't think it's necessary to be smarted about it.
3112 if (!region_stack_empty() || has_aborted_regions()) {
3113 _should_gray_objects = true;
3114 }
3115 }
3117 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3118 if (!concurrent_marking_in_progress()) return;
3120 HeapWord* region_end = hr->end();
3121 if (region_end > _min_finger) {
3122 _should_gray_objects = true;
3123 }
3124 }
3126 // Resets the region fields of active CMTasks whose values point
3127 // into the collection set.
3128 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3129 assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3130 assert(parallel_marking_threads() <= _max_task_num, "sanity");
3132 for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3133 CMTask* task = _tasks[i];
3134 HeapWord* task_finger = task->finger();
3135 if (task_finger != NULL) {
3136 assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3137 HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3138 if (finger_region->in_collection_set()) {
3139 // The task's current region is in the collection set.
3140 // This region will be evacuated in the current GC and
3141 // the region fields in the task will be stale.
3142 task->giveup_current_region();
3143 }
3144 }
3145 }
3146 }
3148 // abandon current marking iteration due to a Full GC
3149 void ConcurrentMark::abort() {
3150 // Clear all marks to force marking thread to do nothing
3151 _nextMarkBitMap->clearAll();
3152 // Empty mark stack
3153 clear_marking_state();
3154 for (int i = 0; i < (int)_max_task_num; ++i) {
3155 _tasks[i]->clear_region_fields();
3156 }
3157 _has_aborted = true;
3159 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3160 satb_mq_set.abandon_partial_marking();
3161 // This can be called either during or outside marking, we'll read
3162 // the expected_active value from the SATB queue set.
3163 satb_mq_set.set_active_all_threads(
3164 false, /* new active value */
3165 satb_mq_set.is_active() /* expected_active */);
3166 }
3168 static void print_ms_time_info(const char* prefix, const char* name,
3169 NumberSeq& ns) {
3170 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3171 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3172 if (ns.num() > 0) {
3173 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
3174 prefix, ns.sd(), ns.maximum());
3175 }
3176 }
3178 void ConcurrentMark::print_summary_info() {
3179 gclog_or_tty->print_cr(" Concurrent marking:");
3180 print_ms_time_info(" ", "init marks", _init_times);
3181 print_ms_time_info(" ", "remarks", _remark_times);
3182 {
3183 print_ms_time_info(" ", "final marks", _remark_mark_times);
3184 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
3186 }
3187 print_ms_time_info(" ", "cleanups", _cleanup_times);
3188 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
3189 _total_counting_time,
3190 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3191 (double)_cleanup_times.num()
3192 : 0.0));
3193 if (G1ScrubRemSets) {
3194 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
3195 _total_rs_scrub_time,
3196 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3197 (double)_cleanup_times.num()
3198 : 0.0));
3199 }
3200 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
3201 (_init_times.sum() + _remark_times.sum() +
3202 _cleanup_times.sum())/1000.0);
3203 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
3204 "(%8.2f s marking, %8.2f s counting).",
3205 cmThread()->vtime_accum(),
3206 cmThread()->vtime_mark_accum(),
3207 cmThread()->vtime_count_accum());
3208 }
3210 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3211 _parallel_workers->print_worker_threads_on(st);
3212 }
3214 // Closures
3215 // XXX: there seems to be a lot of code duplication here;
3216 // should refactor and consolidate the shared code.
3218 // This closure is used to mark refs into the CMS generation in
3219 // the CMS bit map. Called at the first checkpoint.
3221 // We take a break if someone is trying to stop the world.
3222 bool ConcurrentMark::do_yield_check(int worker_i) {
3223 if (should_yield()) {
3224 if (worker_i == 0) {
3225 _g1h->g1_policy()->record_concurrent_pause();
3226 }
3227 cmThread()->yield();
3228 if (worker_i == 0) {
3229 _g1h->g1_policy()->record_concurrent_pause_end();
3230 }
3231 return true;
3232 } else {
3233 return false;
3234 }
3235 }
3237 bool ConcurrentMark::should_yield() {
3238 return cmThread()->should_yield();
3239 }
3241 bool ConcurrentMark::containing_card_is_marked(void* p) {
3242 size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3243 return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3244 }
3246 bool ConcurrentMark::containing_cards_are_marked(void* start,
3247 void* last) {
3248 return containing_card_is_marked(start) &&
3249 containing_card_is_marked(last);
3250 }
3252 #ifndef PRODUCT
3253 // for debugging purposes
3254 void ConcurrentMark::print_finger() {
3255 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3256 _heap_start, _heap_end, _finger);
3257 for (int i = 0; i < (int) _max_task_num; ++i) {
3258 gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger());
3259 }
3260 gclog_or_tty->print_cr("");
3261 }
3262 #endif
3264 void CMTask::scan_object(oop obj) {
3265 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3267 if (_cm->verbose_high()) {
3268 gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3269 _task_id, (void*) obj);
3270 }
3272 size_t obj_size = obj->size();
3273 _words_scanned += obj_size;
3275 obj->oop_iterate(_cm_oop_closure);
3276 statsOnly( ++_objs_scanned );
3277 check_limits();
3278 }
3280 // Closure for iteration over bitmaps
3281 class CMBitMapClosure : public BitMapClosure {
3282 private:
3283 // the bitmap that is being iterated over
3284 CMBitMap* _nextMarkBitMap;
3285 ConcurrentMark* _cm;
3286 CMTask* _task;
3287 // true if we're scanning a heap region claimed by the task (so that
3288 // we move the finger along), false if we're not, i.e. currently when
3289 // scanning a heap region popped from the region stack (so that we
3290 // do not move the task finger along; it'd be a mistake if we did so).
3291 bool _scanning_heap_region;
3293 public:
3294 CMBitMapClosure(CMTask *task,
3295 ConcurrentMark* cm,
3296 CMBitMap* nextMarkBitMap)
3297 : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3299 void set_scanning_heap_region(bool scanning_heap_region) {
3300 _scanning_heap_region = scanning_heap_region;
3301 }
3303 bool do_bit(size_t offset) {
3304 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3305 assert(_nextMarkBitMap->isMarked(addr), "invariant");
3306 assert( addr < _cm->finger(), "invariant");
3308 if (_scanning_heap_region) {
3309 statsOnly( _task->increase_objs_found_on_bitmap() );
3310 assert(addr >= _task->finger(), "invariant");
3311 // We move that task's local finger along.
3312 _task->move_finger_to(addr);
3313 } else {
3314 // We move the task's region finger along.
3315 _task->move_region_finger_to(addr);
3316 }
3318 _task->scan_object(oop(addr));
3319 // we only partially drain the local queue and global stack
3320 _task->drain_local_queue(true);
3321 _task->drain_global_stack(true);
3323 // if the has_aborted flag has been raised, we need to bail out of
3324 // the iteration
3325 return !_task->has_aborted();
3326 }
3327 };
3329 // Closure for iterating over objects, currently only used for
3330 // processing SATB buffers.
3331 class CMObjectClosure : public ObjectClosure {
3332 private:
3333 CMTask* _task;
3335 public:
3336 void do_object(oop obj) {
3337 _task->deal_with_reference(obj);
3338 }
3340 CMObjectClosure(CMTask* task) : _task(task) { }
3341 };
3343 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3344 ConcurrentMark* cm,
3345 CMTask* task)
3346 : _g1h(g1h), _cm(cm), _task(task) {
3347 assert(_ref_processor == NULL, "should be initialized to NULL");
3349 if (G1UseConcMarkReferenceProcessing) {
3350 _ref_processor = g1h->ref_processor_cm();
3351 assert(_ref_processor != NULL, "should not be NULL");
3352 }
3353 }
3355 void CMTask::setup_for_region(HeapRegion* hr) {
3356 // Separated the asserts so that we know which one fires.
3357 assert(hr != NULL,
3358 "claim_region() should have filtered out continues humongous regions");
3359 assert(!hr->continuesHumongous(),
3360 "claim_region() should have filtered out continues humongous regions");
3362 if (_cm->verbose_low()) {
3363 gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3364 _task_id, hr);
3365 }
3367 _curr_region = hr;
3368 _finger = hr->bottom();
3369 update_region_limit();
3370 }
3372 void CMTask::update_region_limit() {
3373 HeapRegion* hr = _curr_region;
3374 HeapWord* bottom = hr->bottom();
3375 HeapWord* limit = hr->next_top_at_mark_start();
3377 if (limit == bottom) {
3378 if (_cm->verbose_low()) {
3379 gclog_or_tty->print_cr("[%d] found an empty region "
3380 "["PTR_FORMAT", "PTR_FORMAT")",
3381 _task_id, bottom, limit);
3382 }
3383 // The region was collected underneath our feet.
3384 // We set the finger to bottom to ensure that the bitmap
3385 // iteration that will follow this will not do anything.
3386 // (this is not a condition that holds when we set the region up,
3387 // as the region is not supposed to be empty in the first place)
3388 _finger = bottom;
3389 } else if (limit >= _region_limit) {
3390 assert(limit >= _finger, "peace of mind");
3391 } else {
3392 assert(limit < _region_limit, "only way to get here");
3393 // This can happen under some pretty unusual circumstances. An
3394 // evacuation pause empties the region underneath our feet (NTAMS
3395 // at bottom). We then do some allocation in the region (NTAMS
3396 // stays at bottom), followed by the region being used as a GC
3397 // alloc region (NTAMS will move to top() and the objects
3398 // originally below it will be grayed). All objects now marked in
3399 // the region are explicitly grayed, if below the global finger,
3400 // and we do not need in fact to scan anything else. So, we simply
3401 // set _finger to be limit to ensure that the bitmap iteration
3402 // doesn't do anything.
3403 _finger = limit;
3404 }
3406 _region_limit = limit;
3407 }
3409 void CMTask::giveup_current_region() {
3410 assert(_curr_region != NULL, "invariant");
3411 if (_cm->verbose_low()) {
3412 gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3413 _task_id, _curr_region);
3414 }
3415 clear_region_fields();
3416 }
3418 void CMTask::clear_region_fields() {
3419 // Values for these three fields that indicate that we're not
3420 // holding on to a region.
3421 _curr_region = NULL;
3422 _finger = NULL;
3423 _region_limit = NULL;
3425 _region_finger = NULL;
3426 }
3428 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3429 if (cm_oop_closure == NULL) {
3430 assert(_cm_oop_closure != NULL, "invariant");
3431 } else {
3432 assert(_cm_oop_closure == NULL, "invariant");
3433 }
3434 _cm_oop_closure = cm_oop_closure;
3435 }
3437 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3438 guarantee(nextMarkBitMap != NULL, "invariant");
3440 if (_cm->verbose_low()) {
3441 gclog_or_tty->print_cr("[%d] resetting", _task_id);
3442 }
3444 _nextMarkBitMap = nextMarkBitMap;
3445 clear_region_fields();
3446 assert(_aborted_region.is_empty(), "should have been cleared");
3448 _calls = 0;
3449 _elapsed_time_ms = 0.0;
3450 _termination_time_ms = 0.0;
3451 _termination_start_time_ms = 0.0;
3453 #if _MARKING_STATS_
3454 _local_pushes = 0;
3455 _local_pops = 0;
3456 _local_max_size = 0;
3457 _objs_scanned = 0;
3458 _global_pushes = 0;
3459 _global_pops = 0;
3460 _global_max_size = 0;
3461 _global_transfers_to = 0;
3462 _global_transfers_from = 0;
3463 _region_stack_pops = 0;
3464 _regions_claimed = 0;
3465 _objs_found_on_bitmap = 0;
3466 _satb_buffers_processed = 0;
3467 _steal_attempts = 0;
3468 _steals = 0;
3469 _aborted = 0;
3470 _aborted_overflow = 0;
3471 _aborted_cm_aborted = 0;
3472 _aborted_yield = 0;
3473 _aborted_timed_out = 0;
3474 _aborted_satb = 0;
3475 _aborted_termination = 0;
3476 #endif // _MARKING_STATS_
3477 }
3479 bool CMTask::should_exit_termination() {
3480 regular_clock_call();
3481 // This is called when we are in the termination protocol. We should
3482 // quit if, for some reason, this task wants to abort or the global
3483 // stack is not empty (this means that we can get work from it).
3484 return !_cm->mark_stack_empty() || has_aborted();
3485 }
3487 void CMTask::reached_limit() {
3488 assert(_words_scanned >= _words_scanned_limit ||
3489 _refs_reached >= _refs_reached_limit ,
3490 "shouldn't have been called otherwise");
3491 regular_clock_call();
3492 }
3494 void CMTask::regular_clock_call() {
3495 if (has_aborted()) return;
3497 // First, we need to recalculate the words scanned and refs reached
3498 // limits for the next clock call.
3499 recalculate_limits();
3501 // During the regular clock call we do the following
3503 // (1) If an overflow has been flagged, then we abort.
3504 if (_cm->has_overflown()) {
3505 set_has_aborted();
3506 return;
3507 }
3509 // If we are not concurrent (i.e. we're doing remark) we don't need
3510 // to check anything else. The other steps are only needed during
3511 // the concurrent marking phase.
3512 if (!concurrent()) return;
3514 // (2) If marking has been aborted for Full GC, then we also abort.
3515 if (_cm->has_aborted()) {
3516 set_has_aborted();
3517 statsOnly( ++_aborted_cm_aborted );
3518 return;
3519 }
3521 double curr_time_ms = os::elapsedVTime() * 1000.0;
3523 // (3) If marking stats are enabled, then we update the step history.
3524 #if _MARKING_STATS_
3525 if (_words_scanned >= _words_scanned_limit) {
3526 ++_clock_due_to_scanning;
3527 }
3528 if (_refs_reached >= _refs_reached_limit) {
3529 ++_clock_due_to_marking;
3530 }
3532 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3533 _interval_start_time_ms = curr_time_ms;
3534 _all_clock_intervals_ms.add(last_interval_ms);
3536 if (_cm->verbose_medium()) {
3537 gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3538 "scanned = %d%s, refs reached = %d%s",
3539 _task_id, last_interval_ms,
3540 _words_scanned,
3541 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3542 _refs_reached,
3543 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3544 }
3545 #endif // _MARKING_STATS_
3547 // (4) We check whether we should yield. If we have to, then we abort.
3548 if (_cm->should_yield()) {
3549 // We should yield. To do this we abort the task. The caller is
3550 // responsible for yielding.
3551 set_has_aborted();
3552 statsOnly( ++_aborted_yield );
3553 return;
3554 }
3556 // (5) We check whether we've reached our time quota. If we have,
3557 // then we abort.
3558 double elapsed_time_ms = curr_time_ms - _start_time_ms;
3559 if (elapsed_time_ms > _time_target_ms) {
3560 set_has_aborted();
3561 _has_timed_out = true;
3562 statsOnly( ++_aborted_timed_out );
3563 return;
3564 }
3566 // (6) Finally, we check whether there are enough completed STAB
3567 // buffers available for processing. If there are, we abort.
3568 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3569 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3570 if (_cm->verbose_low()) {
3571 gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3572 _task_id);
3573 }
3574 // we do need to process SATB buffers, we'll abort and restart
3575 // the marking task to do so
3576 set_has_aborted();
3577 statsOnly( ++_aborted_satb );
3578 return;
3579 }
3580 }
3582 void CMTask::recalculate_limits() {
3583 _real_words_scanned_limit = _words_scanned + words_scanned_period;
3584 _words_scanned_limit = _real_words_scanned_limit;
3586 _real_refs_reached_limit = _refs_reached + refs_reached_period;
3587 _refs_reached_limit = _real_refs_reached_limit;
3588 }
3590 void CMTask::decrease_limits() {
3591 // This is called when we believe that we're going to do an infrequent
3592 // operation which will increase the per byte scanned cost (i.e. move
3593 // entries to/from the global stack). It basically tries to decrease the
3594 // scanning limit so that the clock is called earlier.
3596 if (_cm->verbose_medium()) {
3597 gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3598 }
3600 _words_scanned_limit = _real_words_scanned_limit -
3601 3 * words_scanned_period / 4;
3602 _refs_reached_limit = _real_refs_reached_limit -
3603 3 * refs_reached_period / 4;
3604 }
3606 void CMTask::move_entries_to_global_stack() {
3607 // local array where we'll store the entries that will be popped
3608 // from the local queue
3609 oop buffer[global_stack_transfer_size];
3611 int n = 0;
3612 oop obj;
3613 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3614 buffer[n] = obj;
3615 ++n;
3616 }
3618 if (n > 0) {
3619 // we popped at least one entry from the local queue
3621 statsOnly( ++_global_transfers_to; _local_pops += n );
3623 if (!_cm->mark_stack_push(buffer, n)) {
3624 if (_cm->verbose_low()) {
3625 gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3626 _task_id);
3627 }
3628 set_has_aborted();
3629 } else {
3630 // the transfer was successful
3632 if (_cm->verbose_medium()) {
3633 gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3634 _task_id, n);
3635 }
3636 statsOnly( int tmp_size = _cm->mark_stack_size();
3637 if (tmp_size > _global_max_size) {
3638 _global_max_size = tmp_size;
3639 }
3640 _global_pushes += n );
3641 }
3642 }
3644 // this operation was quite expensive, so decrease the limits
3645 decrease_limits();
3646 }
3648 void CMTask::get_entries_from_global_stack() {
3649 // local array where we'll store the entries that will be popped
3650 // from the global stack.
3651 oop buffer[global_stack_transfer_size];
3652 int n;
3653 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3654 assert(n <= global_stack_transfer_size,
3655 "we should not pop more than the given limit");
3656 if (n > 0) {
3657 // yes, we did actually pop at least one entry
3659 statsOnly( ++_global_transfers_from; _global_pops += n );
3660 if (_cm->verbose_medium()) {
3661 gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3662 _task_id, n);
3663 }
3664 for (int i = 0; i < n; ++i) {
3665 bool success = _task_queue->push(buffer[i]);
3666 // We only call this when the local queue is empty or under a
3667 // given target limit. So, we do not expect this push to fail.
3668 assert(success, "invariant");
3669 }
3671 statsOnly( int tmp_size = _task_queue->size();
3672 if (tmp_size > _local_max_size) {
3673 _local_max_size = tmp_size;
3674 }
3675 _local_pushes += n );
3676 }
3678 // this operation was quite expensive, so decrease the limits
3679 decrease_limits();
3680 }
3682 void CMTask::drain_local_queue(bool partially) {
3683 if (has_aborted()) return;
3685 // Decide what the target size is, depending whether we're going to
3686 // drain it partially (so that other tasks can steal if they run out
3687 // of things to do) or totally (at the very end).
3688 size_t target_size;
3689 if (partially) {
3690 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3691 } else {
3692 target_size = 0;
3693 }
3695 if (_task_queue->size() > target_size) {
3696 if (_cm->verbose_high()) {
3697 gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3698 _task_id, target_size);
3699 }
3701 oop obj;
3702 bool ret = _task_queue->pop_local(obj);
3703 while (ret) {
3704 statsOnly( ++_local_pops );
3706 if (_cm->verbose_high()) {
3707 gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3708 (void*) obj);
3709 }
3711 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3712 assert(!_g1h->is_on_master_free_list(
3713 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3715 scan_object(obj);
3717 if (_task_queue->size() <= target_size || has_aborted()) {
3718 ret = false;
3719 } else {
3720 ret = _task_queue->pop_local(obj);
3721 }
3722 }
3724 if (_cm->verbose_high()) {
3725 gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3726 _task_id, _task_queue->size());
3727 }
3728 }
3729 }
3731 void CMTask::drain_global_stack(bool partially) {
3732 if (has_aborted()) return;
3734 // We have a policy to drain the local queue before we attempt to
3735 // drain the global stack.
3736 assert(partially || _task_queue->size() == 0, "invariant");
3738 // Decide what the target size is, depending whether we're going to
3739 // drain it partially (so that other tasks can steal if they run out
3740 // of things to do) or totally (at the very end). Notice that,
3741 // because we move entries from the global stack in chunks or
3742 // because another task might be doing the same, we might in fact
3743 // drop below the target. But, this is not a problem.
3744 size_t target_size;
3745 if (partially) {
3746 target_size = _cm->partial_mark_stack_size_target();
3747 } else {
3748 target_size = 0;
3749 }
3751 if (_cm->mark_stack_size() > target_size) {
3752 if (_cm->verbose_low()) {
3753 gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3754 _task_id, target_size);
3755 }
3757 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3758 get_entries_from_global_stack();
3759 drain_local_queue(partially);
3760 }
3762 if (_cm->verbose_low()) {
3763 gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3764 _task_id, _cm->mark_stack_size());
3765 }
3766 }
3767 }
3769 // SATB Queue has several assumptions on whether to call the par or
3770 // non-par versions of the methods. this is why some of the code is
3771 // replicated. We should really get rid of the single-threaded version
3772 // of the code to simplify things.
3773 void CMTask::drain_satb_buffers() {
3774 if (has_aborted()) return;
3776 // We set this so that the regular clock knows that we're in the
3777 // middle of draining buffers and doesn't set the abort flag when it
3778 // notices that SATB buffers are available for draining. It'd be
3779 // very counter productive if it did that. :-)
3780 _draining_satb_buffers = true;
3782 CMObjectClosure oc(this);
3783 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3784 if (G1CollectedHeap::use_parallel_gc_threads()) {
3785 satb_mq_set.set_par_closure(_task_id, &oc);
3786 } else {
3787 satb_mq_set.set_closure(&oc);
3788 }
3790 // This keeps claiming and applying the closure to completed buffers
3791 // until we run out of buffers or we need to abort.
3792 if (G1CollectedHeap::use_parallel_gc_threads()) {
3793 while (!has_aborted() &&
3794 satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3795 if (_cm->verbose_medium()) {
3796 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3797 }
3798 statsOnly( ++_satb_buffers_processed );
3799 regular_clock_call();
3800 }
3801 } else {
3802 while (!has_aborted() &&
3803 satb_mq_set.apply_closure_to_completed_buffer()) {
3804 if (_cm->verbose_medium()) {
3805 gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3806 }
3807 statsOnly( ++_satb_buffers_processed );
3808 regular_clock_call();
3809 }
3810 }
3812 if (!concurrent() && !has_aborted()) {
3813 // We should only do this during remark.
3814 if (G1CollectedHeap::use_parallel_gc_threads()) {
3815 satb_mq_set.par_iterate_closure_all_threads(_task_id);
3816 } else {
3817 satb_mq_set.iterate_closure_all_threads();
3818 }
3819 }
3821 _draining_satb_buffers = false;
3823 assert(has_aborted() ||
3824 concurrent() ||
3825 satb_mq_set.completed_buffers_num() == 0, "invariant");
3827 if (G1CollectedHeap::use_parallel_gc_threads()) {
3828 satb_mq_set.set_par_closure(_task_id, NULL);
3829 } else {
3830 satb_mq_set.set_closure(NULL);
3831 }
3833 // again, this was a potentially expensive operation, decrease the
3834 // limits to get the regular clock call early
3835 decrease_limits();
3836 }
3838 void CMTask::drain_region_stack(BitMapClosure* bc) {
3839 if (has_aborted()) return;
3841 assert(_region_finger == NULL,
3842 "it should be NULL when we're not scanning a region");
3844 if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3845 if (_cm->verbose_low()) {
3846 gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3847 _task_id, _cm->region_stack_size());
3848 }
3850 MemRegion mr;
3852 if (!_aborted_region.is_empty()) {
3853 mr = _aborted_region;
3854 _aborted_region = MemRegion();
3856 if (_cm->verbose_low()) {
3857 gclog_or_tty->print_cr("[%d] scanning aborted region "
3858 "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3859 _task_id, mr.start(), mr.end());
3860 }
3861 } else {
3862 mr = _cm->region_stack_pop_lock_free();
3863 // it returns MemRegion() if the pop fails
3864 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3865 }
3867 while (mr.start() != NULL) {
3868 if (_cm->verbose_medium()) {
3869 gclog_or_tty->print_cr("[%d] we are scanning region "
3870 "["PTR_FORMAT", "PTR_FORMAT")",
3871 _task_id, mr.start(), mr.end());
3872 }
3874 assert(mr.end() <= _cm->finger(),
3875 "otherwise the region shouldn't be on the stack");
3876 assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3877 if (_nextMarkBitMap->iterate(bc, mr)) {
3878 assert(!has_aborted(),
3879 "cannot abort the task without aborting the bitmap iteration");
3881 // We finished iterating over the region without aborting.
3882 regular_clock_call();
3883 if (has_aborted()) {
3884 mr = MemRegion();
3885 } else {
3886 mr = _cm->region_stack_pop_lock_free();
3887 // it returns MemRegion() if the pop fails
3888 statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3889 }
3890 } else {
3891 assert(has_aborted(), "currently the only way to do so");
3893 // The only way to abort the bitmap iteration is to return
3894 // false from the do_bit() method. However, inside the
3895 // do_bit() method we move the _region_finger to point to the
3896 // object currently being looked at. So, if we bail out, we
3897 // have definitely set _region_finger to something non-null.
3898 assert(_region_finger != NULL, "invariant");
3900 // Make sure that any previously aborted region has been
3901 // cleared.
3902 assert(_aborted_region.is_empty(), "aborted region not cleared");
3904 // The iteration was actually aborted. So now _region_finger
3905 // points to the address of the object we last scanned. If we
3906 // leave it there, when we restart this task, we will rescan
3907 // the object. It is easy to avoid this. We move the finger by
3908 // enough to point to the next possible object header (the
3909 // bitmap knows by how much we need to move it as it knows its
3910 // granularity).
3911 MemRegion newRegion =
3912 MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3914 if (!newRegion.is_empty()) {
3915 if (_cm->verbose_low()) {
3916 gclog_or_tty->print_cr("[%d] recording unscanned region"
3917 "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3918 _task_id,
3919 newRegion.start(), newRegion.end());
3920 }
3921 // Now record the part of the region we didn't scan to
3922 // make sure this task scans it later.
3923 _aborted_region = newRegion;
3924 }
3925 // break from while
3926 mr = MemRegion();
3927 }
3928 _region_finger = NULL;
3929 }
3931 if (_cm->verbose_low()) {
3932 gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3933 _task_id, _cm->region_stack_size());
3934 }
3935 }
3936 }
3938 void CMTask::print_stats() {
3939 gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3940 _task_id, _calls);
3941 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3942 _elapsed_time_ms, _termination_time_ms);
3943 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3944 _step_times_ms.num(), _step_times_ms.avg(),
3945 _step_times_ms.sd());
3946 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3947 _step_times_ms.maximum(), _step_times_ms.sum());
3949 #if _MARKING_STATS_
3950 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3951 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3952 _all_clock_intervals_ms.sd());
3953 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
3954 _all_clock_intervals_ms.maximum(),
3955 _all_clock_intervals_ms.sum());
3956 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
3957 _clock_due_to_scanning, _clock_due_to_marking);
3958 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
3959 _objs_scanned, _objs_found_on_bitmap);
3960 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
3961 _local_pushes, _local_pops, _local_max_size);
3962 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
3963 _global_pushes, _global_pops, _global_max_size);
3964 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
3965 _global_transfers_to,_global_transfers_from);
3966 gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d",
3967 _regions_claimed, _region_stack_pops);
3968 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
3969 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
3970 _steal_attempts, _steals);
3971 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
3972 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
3973 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3974 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
3975 _aborted_timed_out, _aborted_satb, _aborted_termination);
3976 #endif // _MARKING_STATS_
3977 }
3979 /*****************************************************************************
3981 The do_marking_step(time_target_ms) method is the building block
3982 of the parallel marking framework. It can be called in parallel
3983 with other invocations of do_marking_step() on different tasks
3984 (but only one per task, obviously) and concurrently with the
3985 mutator threads, or during remark, hence it eliminates the need
3986 for two versions of the code. When called during remark, it will
3987 pick up from where the task left off during the concurrent marking
3988 phase. Interestingly, tasks are also claimable during evacuation
3989 pauses too, since do_marking_step() ensures that it aborts before
3990 it needs to yield.
3992 The data structures that is uses to do marking work are the
3993 following:
3995 (1) Marking Bitmap. If there are gray objects that appear only
3996 on the bitmap (this happens either when dealing with an overflow
3997 or when the initial marking phase has simply marked the roots
3998 and didn't push them on the stack), then tasks claim heap
3999 regions whose bitmap they then scan to find gray objects. A
4000 global finger indicates where the end of the last claimed region
4001 is. A local finger indicates how far into the region a task has
4002 scanned. The two fingers are used to determine how to gray an
4003 object (i.e. whether simply marking it is OK, as it will be
4004 visited by a task in the future, or whether it needs to be also
4005 pushed on a stack).
4007 (2) Local Queue. The local queue of the task which is accessed
4008 reasonably efficiently by the task. Other tasks can steal from
4009 it when they run out of work. Throughout the marking phase, a
4010 task attempts to keep its local queue short but not totally
4011 empty, so that entries are available for stealing by other
4012 tasks. Only when there is no more work, a task will totally
4013 drain its local queue.
4015 (3) Global Mark Stack. This handles local queue overflow. During
4016 marking only sets of entries are moved between it and the local
4017 queues, as access to it requires a mutex and more fine-grain
4018 interaction with it which might cause contention. If it
4019 overflows, then the marking phase should restart and iterate
4020 over the bitmap to identify gray objects. Throughout the marking
4021 phase, tasks attempt to keep the global mark stack at a small
4022 length but not totally empty, so that entries are available for
4023 popping by other tasks. Only when there is no more work, tasks
4024 will totally drain the global mark stack.
4026 (4) Global Region Stack. Entries on it correspond to areas of
4027 the bitmap that need to be scanned since they contain gray
4028 objects. Pushes on the region stack only happen during
4029 evacuation pauses and typically correspond to areas covered by
4030 GC LABS. If it overflows, then the marking phase should restart
4031 and iterate over the bitmap to identify gray objects. Tasks will
4032 try to totally drain the region stack as soon as possible.
4034 (5) SATB Buffer Queue. This is where completed SATB buffers are
4035 made available. Buffers are regularly removed from this queue
4036 and scanned for roots, so that the queue doesn't get too
4037 long. During remark, all completed buffers are processed, as
4038 well as the filled in parts of any uncompleted buffers.
4040 The do_marking_step() method tries to abort when the time target
4041 has been reached. There are a few other cases when the
4042 do_marking_step() method also aborts:
4044 (1) When the marking phase has been aborted (after a Full GC).
4046 (2) When a global overflow (either on the global stack or the
4047 region stack) has been triggered. Before the task aborts, it
4048 will actually sync up with the other tasks to ensure that all
4049 the marking data structures (local queues, stacks, fingers etc.)
4050 are re-initialised so that when do_marking_step() completes,
4051 the marking phase can immediately restart.
4053 (3) When enough completed SATB buffers are available. The
4054 do_marking_step() method only tries to drain SATB buffers right
4055 at the beginning. So, if enough buffers are available, the
4056 marking step aborts and the SATB buffers are processed at
4057 the beginning of the next invocation.
4059 (4) To yield. when we have to yield then we abort and yield
4060 right at the end of do_marking_step(). This saves us from a lot
4061 of hassle as, by yielding we might allow a Full GC. If this
4062 happens then objects will be compacted underneath our feet, the
4063 heap might shrink, etc. We save checking for this by just
4064 aborting and doing the yield right at the end.
4066 From the above it follows that the do_marking_step() method should
4067 be called in a loop (or, otherwise, regularly) until it completes.
4069 If a marking step completes without its has_aborted() flag being
4070 true, it means it has completed the current marking phase (and
4071 also all other marking tasks have done so and have all synced up).
4073 A method called regular_clock_call() is invoked "regularly" (in
4074 sub ms intervals) throughout marking. It is this clock method that
4075 checks all the abort conditions which were mentioned above and
4076 decides when the task should abort. A work-based scheme is used to
4077 trigger this clock method: when the number of object words the
4078 marking phase has scanned or the number of references the marking
4079 phase has visited reach a given limit. Additional invocations to
4080 the method clock have been planted in a few other strategic places
4081 too. The initial reason for the clock method was to avoid calling
4082 vtime too regularly, as it is quite expensive. So, once it was in
4083 place, it was natural to piggy-back all the other conditions on it
4084 too and not constantly check them throughout the code.
4086 *****************************************************************************/
4088 void CMTask::do_marking_step(double time_target_ms,
4089 bool do_stealing,
4090 bool do_termination) {
4091 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4092 assert(concurrent() == _cm->concurrent(), "they should be the same");
4094 assert(concurrent() || _cm->region_stack_empty(),
4095 "the region stack should have been cleared before remark");
4096 assert(concurrent() || !_cm->has_aborted_regions(),
4097 "aborted regions should have been cleared before remark");
4098 assert(_region_finger == NULL,
4099 "this should be non-null only when a region is being scanned");
4101 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4102 assert(_task_queues != NULL, "invariant");
4103 assert(_task_queue != NULL, "invariant");
4104 assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4106 assert(!_claimed,
4107 "only one thread should claim this task at any one time");
4109 // OK, this doesn't safeguard again all possible scenarios, as it is
4110 // possible for two threads to set the _claimed flag at the same
4111 // time. But it is only for debugging purposes anyway and it will
4112 // catch most problems.
4113 _claimed = true;
4115 _start_time_ms = os::elapsedVTime() * 1000.0;
4116 statsOnly( _interval_start_time_ms = _start_time_ms );
4118 double diff_prediction_ms =
4119 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4120 _time_target_ms = time_target_ms - diff_prediction_ms;
4122 // set up the variables that are used in the work-based scheme to
4123 // call the regular clock method
4124 _words_scanned = 0;
4125 _refs_reached = 0;
4126 recalculate_limits();
4128 // clear all flags
4129 clear_has_aborted();
4130 _has_timed_out = false;
4131 _draining_satb_buffers = false;
4133 ++_calls;
4135 if (_cm->verbose_low()) {
4136 gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4137 "target = %1.2lfms >>>>>>>>>>",
4138 _task_id, _calls, _time_target_ms);
4139 }
4141 // Set up the bitmap and oop closures. Anything that uses them is
4142 // eventually called from this method, so it is OK to allocate these
4143 // statically.
4144 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4145 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
4146 set_cm_oop_closure(&cm_oop_closure);
4148 if (_cm->has_overflown()) {
4149 // This can happen if the region stack or the mark stack overflows
4150 // during a GC pause and this task, after a yield point,
4151 // restarts. We have to abort as we need to get into the overflow
4152 // protocol which happens right at the end of this task.
4153 set_has_aborted();
4154 }
4156 // First drain any available SATB buffers. After this, we will not
4157 // look at SATB buffers before the next invocation of this method.
4158 // If enough completed SATB buffers are queued up, the regular clock
4159 // will abort this task so that it restarts.
4160 drain_satb_buffers();
4161 // ...then partially drain the local queue and the global stack
4162 drain_local_queue(true);
4163 drain_global_stack(true);
4165 // Then totally drain the region stack. We will not look at
4166 // it again before the next invocation of this method. Entries on
4167 // the region stack are only added during evacuation pauses, for
4168 // which we have to yield. When we do, we abort the task anyway so
4169 // it will look at the region stack again when it restarts.
4170 bitmap_closure.set_scanning_heap_region(false);
4171 drain_region_stack(&bitmap_closure);
4172 // ...then partially drain the local queue and the global stack
4173 drain_local_queue(true);
4174 drain_global_stack(true);
4176 do {
4177 if (!has_aborted() && _curr_region != NULL) {
4178 // This means that we're already holding on to a region.
4179 assert(_finger != NULL, "if region is not NULL, then the finger "
4180 "should not be NULL either");
4182 // We might have restarted this task after an evacuation pause
4183 // which might have evacuated the region we're holding on to
4184 // underneath our feet. Let's read its limit again to make sure
4185 // that we do not iterate over a region of the heap that
4186 // contains garbage (update_region_limit() will also move
4187 // _finger to the start of the region if it is found empty).
4188 update_region_limit();
4189 // We will start from _finger not from the start of the region,
4190 // as we might be restarting this task after aborting half-way
4191 // through scanning this region. In this case, _finger points to
4192 // the address where we last found a marked object. If this is a
4193 // fresh region, _finger points to start().
4194 MemRegion mr = MemRegion(_finger, _region_limit);
4196 if (_cm->verbose_low()) {
4197 gclog_or_tty->print_cr("[%d] we're scanning part "
4198 "["PTR_FORMAT", "PTR_FORMAT") "
4199 "of region "PTR_FORMAT,
4200 _task_id, _finger, _region_limit, _curr_region);
4201 }
4203 // Let's iterate over the bitmap of the part of the
4204 // region that is left.
4205 bitmap_closure.set_scanning_heap_region(true);
4206 if (mr.is_empty() ||
4207 _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4208 // We successfully completed iterating over the region. Now,
4209 // let's give up the region.
4210 giveup_current_region();
4211 regular_clock_call();
4212 } else {
4213 assert(has_aborted(), "currently the only way to do so");
4214 // The only way to abort the bitmap iteration is to return
4215 // false from the do_bit() method. However, inside the
4216 // do_bit() method we move the _finger to point to the
4217 // object currently being looked at. So, if we bail out, we
4218 // have definitely set _finger to something non-null.
4219 assert(_finger != NULL, "invariant");
4221 // Region iteration was actually aborted. So now _finger
4222 // points to the address of the object we last scanned. If we
4223 // leave it there, when we restart this task, we will rescan
4224 // the object. It is easy to avoid this. We move the finger by
4225 // enough to point to the next possible object header (the
4226 // bitmap knows by how much we need to move it as it knows its
4227 // granularity).
4228 assert(_finger < _region_limit, "invariant");
4229 HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4230 // Check if bitmap iteration was aborted while scanning the last object
4231 if (new_finger >= _region_limit) {
4232 giveup_current_region();
4233 } else {
4234 move_finger_to(new_finger);
4235 }
4236 }
4237 }
4238 // At this point we have either completed iterating over the
4239 // region we were holding on to, or we have aborted.
4241 // We then partially drain the local queue and the global stack.
4242 // (Do we really need this?)
4243 drain_local_queue(true);
4244 drain_global_stack(true);
4246 // Read the note on the claim_region() method on why it might
4247 // return NULL with potentially more regions available for
4248 // claiming and why we have to check out_of_regions() to determine
4249 // whether we're done or not.
4250 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4251 // We are going to try to claim a new region. We should have
4252 // given up on the previous one.
4253 // Separated the asserts so that we know which one fires.
4254 assert(_curr_region == NULL, "invariant");
4255 assert(_finger == NULL, "invariant");
4256 assert(_region_limit == NULL, "invariant");
4257 if (_cm->verbose_low()) {
4258 gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4259 }
4260 HeapRegion* claimed_region = _cm->claim_region(_task_id);
4261 if (claimed_region != NULL) {
4262 // Yes, we managed to claim one
4263 statsOnly( ++_regions_claimed );
4265 if (_cm->verbose_low()) {
4266 gclog_or_tty->print_cr("[%d] we successfully claimed "
4267 "region "PTR_FORMAT,
4268 _task_id, claimed_region);
4269 }
4271 setup_for_region(claimed_region);
4272 assert(_curr_region == claimed_region, "invariant");
4273 }
4274 // It is important to call the regular clock here. It might take
4275 // a while to claim a region if, for example, we hit a large
4276 // block of empty regions. So we need to call the regular clock
4277 // method once round the loop to make sure it's called
4278 // frequently enough.
4279 regular_clock_call();
4280 }
4282 if (!has_aborted() && _curr_region == NULL) {
4283 assert(_cm->out_of_regions(),
4284 "at this point we should be out of regions");
4285 }
4286 } while ( _curr_region != NULL && !has_aborted());
4288 if (!has_aborted()) {
4289 // We cannot check whether the global stack is empty, since other
4290 // tasks might be pushing objects to it concurrently. We also cannot
4291 // check if the region stack is empty because if a thread is aborting
4292 // it can push a partially done region back.
4293 assert(_cm->out_of_regions(),
4294 "at this point we should be out of regions");
4296 if (_cm->verbose_low()) {
4297 gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4298 }
4300 // Try to reduce the number of available SATB buffers so that
4301 // remark has less work to do.
4302 drain_satb_buffers();
4303 }
4305 // Since we've done everything else, we can now totally drain the
4306 // local queue and global stack.
4307 drain_local_queue(false);
4308 drain_global_stack(false);
4310 // Attempt at work stealing from other task's queues.
4311 if (do_stealing && !has_aborted()) {
4312 // We have not aborted. This means that we have finished all that
4313 // we could. Let's try to do some stealing...
4315 // We cannot check whether the global stack is empty, since other
4316 // tasks might be pushing objects to it concurrently. We also cannot
4317 // check if the region stack is empty because if a thread is aborting
4318 // it can push a partially done region back.
4319 assert(_cm->out_of_regions() && _task_queue->size() == 0,
4320 "only way to reach here");
4322 if (_cm->verbose_low()) {
4323 gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4324 }
4326 while (!has_aborted()) {
4327 oop obj;
4328 statsOnly( ++_steal_attempts );
4330 if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4331 if (_cm->verbose_medium()) {
4332 gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4333 _task_id, (void*) obj);
4334 }
4336 statsOnly( ++_steals );
4338 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4339 "any stolen object should be marked");
4340 scan_object(obj);
4342 // And since we're towards the end, let's totally drain the
4343 // local queue and global stack.
4344 drain_local_queue(false);
4345 drain_global_stack(false);
4346 } else {
4347 break;
4348 }
4349 }
4350 }
4352 // If we are about to wrap up and go into termination, check if we
4353 // should raise the overflow flag.
4354 if (do_termination && !has_aborted()) {
4355 if (_cm->force_overflow()->should_force()) {
4356 _cm->set_has_overflown();
4357 regular_clock_call();
4358 }
4359 }
4361 // We still haven't aborted. Now, let's try to get into the
4362 // termination protocol.
4363 if (do_termination && !has_aborted()) {
4364 // We cannot check whether the global stack is empty, since other
4365 // tasks might be concurrently pushing objects on it. We also cannot
4366 // check if the region stack is empty because if a thread is aborting
4367 // it can push a partially done region back.
4368 // Separated the asserts so that we know which one fires.
4369 assert(_cm->out_of_regions(), "only way to reach here");
4370 assert(_task_queue->size() == 0, "only way to reach here");
4372 if (_cm->verbose_low()) {
4373 gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4374 }
4376 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4377 // The CMTask class also extends the TerminatorTerminator class,
4378 // hence its should_exit_termination() method will also decide
4379 // whether to exit the termination protocol or not.
4380 bool finished = _cm->terminator()->offer_termination(this);
4381 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4382 _termination_time_ms +=
4383 termination_end_time_ms - _termination_start_time_ms;
4385 if (finished) {
4386 // We're all done.
4388 if (_task_id == 0) {
4389 // let's allow task 0 to do this
4390 if (concurrent()) {
4391 assert(_cm->concurrent_marking_in_progress(), "invariant");
4392 // we need to set this to false before the next
4393 // safepoint. This way we ensure that the marking phase
4394 // doesn't observe any more heap expansions.
4395 _cm->clear_concurrent_marking_in_progress();
4396 }
4397 }
4399 // We can now guarantee that the global stack is empty, since
4400 // all other tasks have finished. We separated the guarantees so
4401 // that, if a condition is false, we can immediately find out
4402 // which one.
4403 guarantee(_cm->out_of_regions(), "only way to reach here");
4404 guarantee(_aborted_region.is_empty(), "only way to reach here");
4405 guarantee(_cm->region_stack_empty(), "only way to reach here");
4406 guarantee(_cm->mark_stack_empty(), "only way to reach here");
4407 guarantee(_task_queue->size() == 0, "only way to reach here");
4408 guarantee(!_cm->has_overflown(), "only way to reach here");
4409 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4410 guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4412 if (_cm->verbose_low()) {
4413 gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4414 }
4415 } else {
4416 // Apparently there's more work to do. Let's abort this task. It
4417 // will restart it and we can hopefully find more things to do.
4419 if (_cm->verbose_low()) {
4420 gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4421 _task_id);
4422 }
4424 set_has_aborted();
4425 statsOnly( ++_aborted_termination );
4426 }
4427 }
4429 // Mainly for debugging purposes to make sure that a pointer to the
4430 // closure which was statically allocated in this frame doesn't
4431 // escape it by accident.
4432 set_cm_oop_closure(NULL);
4433 double end_time_ms = os::elapsedVTime() * 1000.0;
4434 double elapsed_time_ms = end_time_ms - _start_time_ms;
4435 // Update the step history.
4436 _step_times_ms.add(elapsed_time_ms);
4438 if (has_aborted()) {
4439 // The task was aborted for some reason.
4441 statsOnly( ++_aborted );
4443 if (_has_timed_out) {
4444 double diff_ms = elapsed_time_ms - _time_target_ms;
4445 // Keep statistics of how well we did with respect to hitting
4446 // our target only if we actually timed out (if we aborted for
4447 // other reasons, then the results might get skewed).
4448 _marking_step_diffs_ms.add(diff_ms);
4449 }
4451 if (_cm->has_overflown()) {
4452 // This is the interesting one. We aborted because a global
4453 // overflow was raised. This means we have to restart the
4454 // marking phase and start iterating over regions. However, in
4455 // order to do this we have to make sure that all tasks stop
4456 // what they are doing and re-initialise in a safe manner. We
4457 // will achieve this with the use of two barrier sync points.
4459 if (_cm->verbose_low()) {
4460 gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4461 }
4463 _cm->enter_first_sync_barrier(_task_id);
4464 // When we exit this sync barrier we know that all tasks have
4465 // stopped doing marking work. So, it's now safe to
4466 // re-initialise our data structures. At the end of this method,
4467 // task 0 will clear the global data structures.
4469 statsOnly( ++_aborted_overflow );
4471 // We clear the local state of this task...
4472 clear_region_fields();
4474 // ...and enter the second barrier.
4475 _cm->enter_second_sync_barrier(_task_id);
4476 // At this point everything has bee re-initialised and we're
4477 // ready to restart.
4478 }
4480 if (_cm->verbose_low()) {
4481 gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4482 "elapsed = %1.2lfms <<<<<<<<<<",
4483 _task_id, _time_target_ms, elapsed_time_ms);
4484 if (_cm->has_aborted()) {
4485 gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4486 _task_id);
4487 }
4488 }
4489 } else {
4490 if (_cm->verbose_low()) {
4491 gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4492 "elapsed = %1.2lfms <<<<<<<<<<",
4493 _task_id, _time_target_ms, elapsed_time_ms);
4494 }
4495 }
4497 _claimed = false;
4498 }
4500 CMTask::CMTask(int task_id,
4501 ConcurrentMark* cm,
4502 CMTaskQueue* task_queue,
4503 CMTaskQueueSet* task_queues)
4504 : _g1h(G1CollectedHeap::heap()),
4505 _task_id(task_id), _cm(cm),
4506 _claimed(false),
4507 _nextMarkBitMap(NULL), _hash_seed(17),
4508 _task_queue(task_queue),
4509 _task_queues(task_queues),
4510 _cm_oop_closure(NULL),
4511 _aborted_region(MemRegion()) {
4512 guarantee(task_queue != NULL, "invariant");
4513 guarantee(task_queues != NULL, "invariant");
4515 statsOnly( _clock_due_to_scanning = 0;
4516 _clock_due_to_marking = 0 );
4518 _marking_step_diffs_ms.add(0.5);
4519 }
4521 // These are formatting macros that are used below to ensure
4522 // consistent formatting. The *_H_* versions are used to format the
4523 // header for a particular value and they should be kept consistent
4524 // with the corresponding macro. Also note that most of the macros add
4525 // the necessary white space (as a prefix) which makes them a bit
4526 // easier to compose.
4528 // All the output lines are prefixed with this string to be able to
4529 // identify them easily in a large log file.
4530 #define G1PPRL_LINE_PREFIX "###"
4532 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
4533 #ifdef _LP64
4534 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
4535 #else // _LP64
4536 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
4537 #endif // _LP64
4539 // For per-region info
4540 #define G1PPRL_TYPE_FORMAT " %-4s"
4541 #define G1PPRL_TYPE_H_FORMAT " %4s"
4542 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
4543 #define G1PPRL_BYTE_H_FORMAT " %9s"
4544 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
4545 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
4547 // For summary info
4548 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
4549 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
4550 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
4551 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4553 G1PrintRegionLivenessInfoClosure::
4554 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4555 : _out(out),
4556 _total_used_bytes(0), _total_capacity_bytes(0),
4557 _total_prev_live_bytes(0), _total_next_live_bytes(0),
4558 _hum_used_bytes(0), _hum_capacity_bytes(0),
4559 _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4560 G1CollectedHeap* g1h = G1CollectedHeap::heap();
4561 MemRegion g1_committed = g1h->g1_committed();
4562 MemRegion g1_reserved = g1h->g1_reserved();
4563 double now = os::elapsedTime();
4565 // Print the header of the output.
4566 _out->cr();
4567 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4568 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4569 G1PPRL_SUM_ADDR_FORMAT("committed")
4570 G1PPRL_SUM_ADDR_FORMAT("reserved")
4571 G1PPRL_SUM_BYTE_FORMAT("region-size"),
4572 g1_committed.start(), g1_committed.end(),
4573 g1_reserved.start(), g1_reserved.end(),
4574 HeapRegion::GrainBytes);
4575 _out->print_cr(G1PPRL_LINE_PREFIX);
4576 _out->print_cr(G1PPRL_LINE_PREFIX
4577 G1PPRL_TYPE_H_FORMAT
4578 G1PPRL_ADDR_BASE_H_FORMAT
4579 G1PPRL_BYTE_H_FORMAT
4580 G1PPRL_BYTE_H_FORMAT
4581 G1PPRL_BYTE_H_FORMAT
4582 G1PPRL_DOUBLE_H_FORMAT,
4583 "type", "address-range",
4584 "used", "prev-live", "next-live", "gc-eff");
4585 _out->print_cr(G1PPRL_LINE_PREFIX
4586 G1PPRL_TYPE_H_FORMAT
4587 G1PPRL_ADDR_BASE_H_FORMAT
4588 G1PPRL_BYTE_H_FORMAT
4589 G1PPRL_BYTE_H_FORMAT
4590 G1PPRL_BYTE_H_FORMAT
4591 G1PPRL_DOUBLE_H_FORMAT,
4592 "", "",
4593 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4594 }
4596 // It takes as a parameter a reference to one of the _hum_* fields, it
4597 // deduces the corresponding value for a region in a humongous region
4598 // series (either the region size, or what's left if the _hum_* field
4599 // is < the region size), and updates the _hum_* field accordingly.
4600 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4601 size_t bytes = 0;
4602 // The > 0 check is to deal with the prev and next live bytes which
4603 // could be 0.
4604 if (*hum_bytes > 0) {
4605 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4606 *hum_bytes -= bytes;
4607 }
4608 return bytes;
4609 }
4611 // It deduces the values for a region in a humongous region series
4612 // from the _hum_* fields and updates those accordingly. It assumes
4613 // that that _hum_* fields have already been set up from the "starts
4614 // humongous" region and we visit the regions in address order.
4615 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4616 size_t* capacity_bytes,
4617 size_t* prev_live_bytes,
4618 size_t* next_live_bytes) {
4619 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4620 *used_bytes = get_hum_bytes(&_hum_used_bytes);
4621 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
4622 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4623 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4624 }
4626 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4627 const char* type = "";
4628 HeapWord* bottom = r->bottom();
4629 HeapWord* end = r->end();
4630 size_t capacity_bytes = r->capacity();
4631 size_t used_bytes = r->used();
4632 size_t prev_live_bytes = r->live_bytes();
4633 size_t next_live_bytes = r->next_live_bytes();
4634 double gc_eff = r->gc_efficiency();
4635 if (r->used() == 0) {
4636 type = "FREE";
4637 } else if (r->is_survivor()) {
4638 type = "SURV";
4639 } else if (r->is_young()) {
4640 type = "EDEN";
4641 } else if (r->startsHumongous()) {
4642 type = "HUMS";
4644 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4645 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4646 "they should have been zeroed after the last time we used them");
4647 // Set up the _hum_* fields.
4648 _hum_capacity_bytes = capacity_bytes;
4649 _hum_used_bytes = used_bytes;
4650 _hum_prev_live_bytes = prev_live_bytes;
4651 _hum_next_live_bytes = next_live_bytes;
4652 get_hum_bytes(&used_bytes, &capacity_bytes,
4653 &prev_live_bytes, &next_live_bytes);
4654 end = bottom + HeapRegion::GrainWords;
4655 } else if (r->continuesHumongous()) {
4656 type = "HUMC";
4657 get_hum_bytes(&used_bytes, &capacity_bytes,
4658 &prev_live_bytes, &next_live_bytes);
4659 assert(end == bottom + HeapRegion::GrainWords, "invariant");
4660 } else {
4661 type = "OLD";
4662 }
4664 _total_used_bytes += used_bytes;
4665 _total_capacity_bytes += capacity_bytes;
4666 _total_prev_live_bytes += prev_live_bytes;
4667 _total_next_live_bytes += next_live_bytes;
4669 // Print a line for this particular region.
4670 _out->print_cr(G1PPRL_LINE_PREFIX
4671 G1PPRL_TYPE_FORMAT
4672 G1PPRL_ADDR_BASE_FORMAT
4673 G1PPRL_BYTE_FORMAT
4674 G1PPRL_BYTE_FORMAT
4675 G1PPRL_BYTE_FORMAT
4676 G1PPRL_DOUBLE_FORMAT,
4677 type, bottom, end,
4678 used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4680 return false;
4681 }
4683 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4684 // Print the footer of the output.
4685 _out->print_cr(G1PPRL_LINE_PREFIX);
4686 _out->print_cr(G1PPRL_LINE_PREFIX
4687 " SUMMARY"
4688 G1PPRL_SUM_MB_FORMAT("capacity")
4689 G1PPRL_SUM_MB_PERC_FORMAT("used")
4690 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4691 G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4692 bytes_to_mb(_total_capacity_bytes),
4693 bytes_to_mb(_total_used_bytes),
4694 perc(_total_used_bytes, _total_capacity_bytes),
4695 bytes_to_mb(_total_prev_live_bytes),
4696 perc(_total_prev_live_bytes, _total_capacity_bytes),
4697 bytes_to_mb(_total_next_live_bytes),
4698 perc(_total_next_live_bytes, _total_capacity_bytes));
4699 _out->cr();
4700 }