src/share/vm/gc_implementation/g1/concurrentMark.cpp

Thu, 12 Oct 2017 21:27:07 +0800

author
aoqi
date
Thu, 12 Oct 2017 21:27:07 +0800
changeset 7535
7ae4e26cb1e0
parent 7370
8d27d6113625
parent 6876
710a3c8b516e
child 7994
04ff2f6cd0eb
permissions
-rw-r--r--

merge

aoqi@0 1 /*
aoqi@0 2 * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
aoqi@0 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
aoqi@0 4 *
aoqi@0 5 * This code is free software; you can redistribute it and/or modify it
aoqi@0 6 * under the terms of the GNU General Public License version 2 only, as
aoqi@0 7 * published by the Free Software Foundation.
aoqi@0 8 *
aoqi@0 9 * This code is distributed in the hope that it will be useful, but WITHOUT
aoqi@0 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
aoqi@0 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
aoqi@0 12 * version 2 for more details (a copy is included in the LICENSE file that
aoqi@0 13 * accompanied this code).
aoqi@0 14 *
aoqi@0 15 * You should have received a copy of the GNU General Public License version
aoqi@0 16 * 2 along with this work; if not, write to the Free Software Foundation,
aoqi@0 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
aoqi@0 18 *
aoqi@0 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
aoqi@0 20 * or visit www.oracle.com if you need additional information or have any
aoqi@0 21 * questions.
aoqi@0 22 *
aoqi@0 23 */
aoqi@0 24
aoqi@0 25 #include "precompiled.hpp"
stefank@7333 26 #include "classfile/metadataOnStackMark.hpp"
aoqi@0 27 #include "classfile/symbolTable.hpp"
stefank@6992 28 #include "code/codeCache.hpp"
aoqi@0 29 #include "gc_implementation/g1/concurrentMark.inline.hpp"
aoqi@0 30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
aoqi@0 31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
aoqi@0 32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
aoqi@0 33 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
aoqi@0 34 #include "gc_implementation/g1/g1Log.hpp"
aoqi@0 35 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
aoqi@0 36 #include "gc_implementation/g1/g1RemSet.hpp"
aoqi@0 37 #include "gc_implementation/g1/heapRegion.inline.hpp"
tschatzl@7091 38 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
aoqi@0 39 #include "gc_implementation/g1/heapRegionRemSet.hpp"
tschatzl@7051 40 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
aoqi@0 41 #include "gc_implementation/shared/vmGCOperations.hpp"
aoqi@0 42 #include "gc_implementation/shared/gcTimer.hpp"
aoqi@0 43 #include "gc_implementation/shared/gcTrace.hpp"
aoqi@0 44 #include "gc_implementation/shared/gcTraceTime.hpp"
stefank@6992 45 #include "memory/allocation.hpp"
aoqi@0 46 #include "memory/genOopClosures.inline.hpp"
aoqi@0 47 #include "memory/referencePolicy.hpp"
aoqi@0 48 #include "memory/resourceArea.hpp"
aoqi@0 49 #include "oops/oop.inline.hpp"
aoqi@0 50 #include "runtime/handles.inline.hpp"
aoqi@0 51 #include "runtime/java.hpp"
goetz@6912 52 #include "runtime/prefetch.inline.hpp"
aoqi@0 53 #include "services/memTracker.hpp"
aoqi@0 54
aoqi@0 55 // Concurrent marking bit map wrapper
aoqi@0 56
aoqi@0 57 CMBitMapRO::CMBitMapRO(int shifter) :
aoqi@0 58 _bm(),
aoqi@0 59 _shifter(shifter) {
aoqi@0 60 _bmStartWord = 0;
aoqi@0 61 _bmWordSize = 0;
aoqi@0 62 }
aoqi@0 63
stefank@6992 64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
stefank@6992 65 const HeapWord* limit) const {
aoqi@0 66 // First we must round addr *up* to a possible object boundary.
aoqi@0 67 addr = (HeapWord*)align_size_up((intptr_t)addr,
aoqi@0 68 HeapWordSize << _shifter);
aoqi@0 69 size_t addrOffset = heapWordToOffset(addr);
aoqi@0 70 if (limit == NULL) {
aoqi@0 71 limit = _bmStartWord + _bmWordSize;
aoqi@0 72 }
aoqi@0 73 size_t limitOffset = heapWordToOffset(limit);
aoqi@0 74 size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
aoqi@0 75 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
aoqi@0 76 assert(nextAddr >= addr, "get_next_one postcondition");
aoqi@0 77 assert(nextAddr == limit || isMarked(nextAddr),
aoqi@0 78 "get_next_one postcondition");
aoqi@0 79 return nextAddr;
aoqi@0 80 }
aoqi@0 81
stefank@6992 82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
stefank@6992 83 const HeapWord* limit) const {
aoqi@0 84 size_t addrOffset = heapWordToOffset(addr);
aoqi@0 85 if (limit == NULL) {
aoqi@0 86 limit = _bmStartWord + _bmWordSize;
aoqi@0 87 }
aoqi@0 88 size_t limitOffset = heapWordToOffset(limit);
aoqi@0 89 size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
aoqi@0 90 HeapWord* nextAddr = offsetToHeapWord(nextOffset);
aoqi@0 91 assert(nextAddr >= addr, "get_next_one postcondition");
aoqi@0 92 assert(nextAddr == limit || !isMarked(nextAddr),
aoqi@0 93 "get_next_one postcondition");
aoqi@0 94 return nextAddr;
aoqi@0 95 }
aoqi@0 96
aoqi@0 97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
aoqi@0 98 assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
aoqi@0 99 return (int) (diff >> _shifter);
aoqi@0 100 }
aoqi@0 101
aoqi@0 102 #ifndef PRODUCT
tschatzl@7051 103 bool CMBitMapRO::covers(MemRegion heap_rs) const {
aoqi@0 104 // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
aoqi@0 105 assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
aoqi@0 106 "size inconsistency");
tschatzl@7051 107 return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
tschatzl@7051 108 _bmWordSize == heap_rs.word_size();
aoqi@0 109 }
aoqi@0 110 #endif
aoqi@0 111
aoqi@0 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
aoqi@0 113 _bm.print_on_error(st, prefix);
aoqi@0 114 }
aoqi@0 115
tschatzl@7051 116 size_t CMBitMap::compute_size(size_t heap_size) {
tschatzl@7051 117 return heap_size / mark_distance();
tschatzl@7051 118 }
tschatzl@7051 119
tschatzl@7051 120 size_t CMBitMap::mark_distance() {
tschatzl@7051 121 return MinObjAlignmentInBytes * BitsPerByte;
tschatzl@7051 122 }
tschatzl@7051 123
tschatzl@7051 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
tschatzl@7051 125 _bmStartWord = heap.start();
tschatzl@7051 126 _bmWordSize = heap.word_size();
tschatzl@7051 127
tschatzl@7051 128 _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
tschatzl@7051 129 _bm.set_size(_bmWordSize >> _shifter);
tschatzl@7051 130
tschatzl@7051 131 storage->set_mapping_changed_listener(&_listener);
tschatzl@7051 132 }
tschatzl@7051 133
tschatzl@7257 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
tschatzl@7257 135 if (zero_filled) {
tschatzl@7257 136 return;
tschatzl@7257 137 }
tschatzl@7051 138 // We need to clear the bitmap on commit, removing any existing information.
tschatzl@7051 139 MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
tschatzl@7051 140 _bm->clearRange(mr);
tschatzl@7051 141 }
tschatzl@7051 142
tschatzl@7051 143 // Closure used for clearing the given mark bitmap.
tschatzl@7051 144 class ClearBitmapHRClosure : public HeapRegionClosure {
tschatzl@7051 145 private:
tschatzl@7051 146 ConcurrentMark* _cm;
tschatzl@7051 147 CMBitMap* _bitmap;
tschatzl@7051 148 bool _may_yield; // The closure may yield during iteration. If yielded, abort the iteration.
tschatzl@7051 149 public:
tschatzl@7051 150 ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
tschatzl@7051 151 assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
tschatzl@7051 152 }
tschatzl@7051 153
tschatzl@7051 154 virtual bool doHeapRegion(HeapRegion* r) {
tschatzl@7051 155 size_t const chunk_size_in_words = M / HeapWordSize;
tschatzl@7051 156
tschatzl@7051 157 HeapWord* cur = r->bottom();
tschatzl@7051 158 HeapWord* const end = r->end();
tschatzl@7051 159
tschatzl@7051 160 while (cur < end) {
tschatzl@7051 161 MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
tschatzl@7051 162 _bitmap->clearRange(mr);
tschatzl@7051 163
tschatzl@7051 164 cur += chunk_size_in_words;
tschatzl@7051 165
tschatzl@7051 166 // Abort iteration if after yielding the marking has been aborted.
tschatzl@7051 167 if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
tschatzl@7051 168 return true;
tschatzl@7051 169 }
tschatzl@7051 170 // Repeat the asserts from before the start of the closure. We will do them
tschatzl@7051 171 // as asserts here to minimize their overhead on the product. However, we
tschatzl@7051 172 // will have them as guarantees at the beginning / end of the bitmap
tschatzl@7051 173 // clearing to get some checking in the product.
tschatzl@7051 174 assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
tschatzl@7051 175 assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
tschatzl@7051 176 }
tschatzl@7051 177
aoqi@0 178 return false;
aoqi@0 179 }
tschatzl@7051 180 };
aoqi@0 181
aoqi@0 182 void CMBitMap::clearAll() {
tschatzl@7051 183 ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
tschatzl@7051 184 G1CollectedHeap::heap()->heap_region_iterate(&cl);
tschatzl@7051 185 guarantee(cl.complete(), "Must have completed iteration.");
aoqi@0 186 return;
aoqi@0 187 }
aoqi@0 188
aoqi@0 189 void CMBitMap::markRange(MemRegion mr) {
aoqi@0 190 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
aoqi@0 191 assert(!mr.is_empty(), "unexpected empty region");
aoqi@0 192 assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
aoqi@0 193 ((HeapWord *) mr.end())),
aoqi@0 194 "markRange memory region end is not card aligned");
aoqi@0 195 // convert address range into offset range
aoqi@0 196 _bm.at_put_range(heapWordToOffset(mr.start()),
aoqi@0 197 heapWordToOffset(mr.end()), true);
aoqi@0 198 }
aoqi@0 199
aoqi@0 200 void CMBitMap::clearRange(MemRegion mr) {
aoqi@0 201 mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
aoqi@0 202 assert(!mr.is_empty(), "unexpected empty region");
aoqi@0 203 // convert address range into offset range
aoqi@0 204 _bm.at_put_range(heapWordToOffset(mr.start()),
aoqi@0 205 heapWordToOffset(mr.end()), false);
aoqi@0 206 }
aoqi@0 207
aoqi@0 208 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
aoqi@0 209 HeapWord* end_addr) {
aoqi@0 210 HeapWord* start = getNextMarkedWordAddress(addr);
aoqi@0 211 start = MIN2(start, end_addr);
aoqi@0 212 HeapWord* end = getNextUnmarkedWordAddress(start);
aoqi@0 213 end = MIN2(end, end_addr);
aoqi@0 214 assert(start <= end, "Consistency check");
aoqi@0 215 MemRegion mr(start, end);
aoqi@0 216 if (!mr.is_empty()) {
aoqi@0 217 clearRange(mr);
aoqi@0 218 }
aoqi@0 219 return mr;
aoqi@0 220 }
aoqi@0 221
aoqi@0 222 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
aoqi@0 223 _base(NULL), _cm(cm)
aoqi@0 224 #ifdef ASSERT
aoqi@0 225 , _drain_in_progress(false)
aoqi@0 226 , _drain_in_progress_yields(false)
aoqi@0 227 #endif
aoqi@0 228 {}
aoqi@0 229
aoqi@0 230 bool CMMarkStack::allocate(size_t capacity) {
aoqi@0 231 // allocate a stack of the requisite depth
aoqi@0 232 ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
aoqi@0 233 if (!rs.is_reserved()) {
aoqi@0 234 warning("ConcurrentMark MarkStack allocation failure");
aoqi@0 235 return false;
aoqi@0 236 }
aoqi@0 237 MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
aoqi@0 238 if (!_virtual_space.initialize(rs, rs.size())) {
aoqi@0 239 warning("ConcurrentMark MarkStack backing store failure");
aoqi@0 240 // Release the virtual memory reserved for the marking stack
aoqi@0 241 rs.release();
aoqi@0 242 return false;
aoqi@0 243 }
aoqi@0 244 assert(_virtual_space.committed_size() == rs.size(),
aoqi@0 245 "Didn't reserve backing store for all of ConcurrentMark stack?");
aoqi@0 246 _base = (oop*) _virtual_space.low();
aoqi@0 247 setEmpty();
aoqi@0 248 _capacity = (jint) capacity;
aoqi@0 249 _saved_index = -1;
aoqi@0 250 _should_expand = false;
aoqi@0 251 NOT_PRODUCT(_max_depth = 0);
aoqi@0 252 return true;
aoqi@0 253 }
aoqi@0 254
aoqi@0 255 void CMMarkStack::expand() {
aoqi@0 256 // Called, during remark, if we've overflown the marking stack during marking.
aoqi@0 257 assert(isEmpty(), "stack should been emptied while handling overflow");
aoqi@0 258 assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
aoqi@0 259 // Clear expansion flag
aoqi@0 260 _should_expand = false;
aoqi@0 261 if (_capacity == (jint) MarkStackSizeMax) {
aoqi@0 262 if (PrintGCDetails && Verbose) {
aoqi@0 263 gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
aoqi@0 264 }
aoqi@0 265 return;
aoqi@0 266 }
aoqi@0 267 // Double capacity if possible
aoqi@0 268 jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
aoqi@0 269 // Do not give up existing stack until we have managed to
aoqi@0 270 // get the double capacity that we desired.
aoqi@0 271 ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
aoqi@0 272 sizeof(oop)));
aoqi@0 273 if (rs.is_reserved()) {
aoqi@0 274 // Release the backing store associated with old stack
aoqi@0 275 _virtual_space.release();
aoqi@0 276 // Reinitialize virtual space for new stack
aoqi@0 277 if (!_virtual_space.initialize(rs, rs.size())) {
aoqi@0 278 fatal("Not enough swap for expanded marking stack capacity");
aoqi@0 279 }
aoqi@0 280 _base = (oop*)(_virtual_space.low());
aoqi@0 281 _index = 0;
aoqi@0 282 _capacity = new_capacity;
aoqi@0 283 } else {
aoqi@0 284 if (PrintGCDetails && Verbose) {
aoqi@0 285 // Failed to double capacity, continue;
aoqi@0 286 gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
aoqi@0 287 SIZE_FORMAT"K to " SIZE_FORMAT"K",
aoqi@0 288 _capacity / K, new_capacity / K);
aoqi@0 289 }
aoqi@0 290 }
aoqi@0 291 }
aoqi@0 292
aoqi@0 293 void CMMarkStack::set_should_expand() {
aoqi@0 294 // If we're resetting the marking state because of an
aoqi@0 295 // marking stack overflow, record that we should, if
aoqi@0 296 // possible, expand the stack.
aoqi@0 297 _should_expand = _cm->has_overflown();
aoqi@0 298 }
aoqi@0 299
aoqi@0 300 CMMarkStack::~CMMarkStack() {
aoqi@0 301 if (_base != NULL) {
aoqi@0 302 _base = NULL;
aoqi@0 303 _virtual_space.release();
aoqi@0 304 }
aoqi@0 305 }
aoqi@0 306
aoqi@0 307 void CMMarkStack::par_push(oop ptr) {
aoqi@0 308 while (true) {
aoqi@0 309 if (isFull()) {
aoqi@0 310 _overflow = true;
aoqi@0 311 return;
aoqi@0 312 }
aoqi@0 313 // Otherwise...
aoqi@0 314 jint index = _index;
aoqi@0 315 jint next_index = index+1;
aoqi@0 316 jint res = Atomic::cmpxchg(next_index, &_index, index);
aoqi@0 317 if (res == index) {
aoqi@0 318 _base[index] = ptr;
aoqi@0 319 // Note that we don't maintain this atomically. We could, but it
aoqi@0 320 // doesn't seem necessary.
aoqi@0 321 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
aoqi@0 322 return;
aoqi@0 323 }
aoqi@0 324 // Otherwise, we need to try again.
aoqi@0 325 }
aoqi@0 326 }
aoqi@0 327
aoqi@0 328 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
aoqi@0 329 while (true) {
aoqi@0 330 if (isFull()) {
aoqi@0 331 _overflow = true;
aoqi@0 332 return;
aoqi@0 333 }
aoqi@0 334 // Otherwise...
aoqi@0 335 jint index = _index;
aoqi@0 336 jint next_index = index + n;
aoqi@0 337 if (next_index > _capacity) {
aoqi@0 338 _overflow = true;
aoqi@0 339 return;
aoqi@0 340 }
aoqi@0 341 jint res = Atomic::cmpxchg(next_index, &_index, index);
aoqi@0 342 if (res == index) {
aoqi@0 343 for (int i = 0; i < n; i++) {
aoqi@0 344 int ind = index + i;
aoqi@0 345 assert(ind < _capacity, "By overflow test above.");
aoqi@0 346 _base[ind] = ptr_arr[i];
aoqi@0 347 }
aoqi@0 348 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
aoqi@0 349 return;
aoqi@0 350 }
aoqi@0 351 // Otherwise, we need to try again.
aoqi@0 352 }
aoqi@0 353 }
aoqi@0 354
aoqi@0 355 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
aoqi@0 356 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 357 jint start = _index;
aoqi@0 358 jint next_index = start + n;
aoqi@0 359 if (next_index > _capacity) {
aoqi@0 360 _overflow = true;
aoqi@0 361 return;
aoqi@0 362 }
aoqi@0 363 // Otherwise.
aoqi@0 364 _index = next_index;
aoqi@0 365 for (int i = 0; i < n; i++) {
aoqi@0 366 int ind = start + i;
aoqi@0 367 assert(ind < _capacity, "By overflow test above.");
aoqi@0 368 _base[ind] = ptr_arr[i];
aoqi@0 369 }
aoqi@0 370 NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
aoqi@0 371 }
aoqi@0 372
aoqi@0 373 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
aoqi@0 374 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 375 jint index = _index;
aoqi@0 376 if (index == 0) {
aoqi@0 377 *n = 0;
aoqi@0 378 return false;
aoqi@0 379 } else {
aoqi@0 380 int k = MIN2(max, index);
aoqi@0 381 jint new_ind = index - k;
aoqi@0 382 for (int j = 0; j < k; j++) {
aoqi@0 383 ptr_arr[j] = _base[new_ind + j];
aoqi@0 384 }
aoqi@0 385 _index = new_ind;
aoqi@0 386 *n = k;
aoqi@0 387 return true;
aoqi@0 388 }
aoqi@0 389 }
aoqi@0 390
aoqi@0 391 template<class OopClosureClass>
aoqi@0 392 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
aoqi@0 393 assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
aoqi@0 394 || SafepointSynchronize::is_at_safepoint(),
aoqi@0 395 "Drain recursion must be yield-safe.");
aoqi@0 396 bool res = true;
aoqi@0 397 debug_only(_drain_in_progress = true);
aoqi@0 398 debug_only(_drain_in_progress_yields = yield_after);
aoqi@0 399 while (!isEmpty()) {
aoqi@0 400 oop newOop = pop();
aoqi@0 401 assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
aoqi@0 402 assert(newOop->is_oop(), "Expected an oop");
aoqi@0 403 assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
aoqi@0 404 "only grey objects on this stack");
aoqi@0 405 newOop->oop_iterate(cl);
aoqi@0 406 if (yield_after && _cm->do_yield_check()) {
aoqi@0 407 res = false;
aoqi@0 408 break;
aoqi@0 409 }
aoqi@0 410 }
aoqi@0 411 debug_only(_drain_in_progress = false);
aoqi@0 412 return res;
aoqi@0 413 }
aoqi@0 414
aoqi@0 415 void CMMarkStack::note_start_of_gc() {
aoqi@0 416 assert(_saved_index == -1,
aoqi@0 417 "note_start_of_gc()/end_of_gc() bracketed incorrectly");
aoqi@0 418 _saved_index = _index;
aoqi@0 419 }
aoqi@0 420
aoqi@0 421 void CMMarkStack::note_end_of_gc() {
aoqi@0 422 // This is intentionally a guarantee, instead of an assert. If we
aoqi@0 423 // accidentally add something to the mark stack during GC, it
aoqi@0 424 // will be a correctness issue so it's better if we crash. we'll
aoqi@0 425 // only check this once per GC anyway, so it won't be a performance
aoqi@0 426 // issue in any way.
aoqi@0 427 guarantee(_saved_index == _index,
aoqi@0 428 err_msg("saved index: %d index: %d", _saved_index, _index));
aoqi@0 429 _saved_index = -1;
aoqi@0 430 }
aoqi@0 431
aoqi@0 432 void CMMarkStack::oops_do(OopClosure* f) {
aoqi@0 433 assert(_saved_index == _index,
aoqi@0 434 err_msg("saved index: %d index: %d", _saved_index, _index));
aoqi@0 435 for (int i = 0; i < _index; i += 1) {
aoqi@0 436 f->do_oop(&_base[i]);
aoqi@0 437 }
aoqi@0 438 }
aoqi@0 439
aoqi@0 440 CMRootRegions::CMRootRegions() :
aoqi@0 441 _young_list(NULL), _cm(NULL), _scan_in_progress(false),
aoqi@0 442 _should_abort(false), _next_survivor(NULL) { }
aoqi@0 443
aoqi@0 444 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
aoqi@0 445 _young_list = g1h->young_list();
aoqi@0 446 _cm = cm;
aoqi@0 447 }
aoqi@0 448
aoqi@0 449 void CMRootRegions::prepare_for_scan() {
aoqi@0 450 assert(!scan_in_progress(), "pre-condition");
aoqi@0 451
aoqi@0 452 // Currently, only survivors can be root regions.
aoqi@0 453 assert(_next_survivor == NULL, "pre-condition");
aoqi@0 454 _next_survivor = _young_list->first_survivor_region();
aoqi@0 455 _scan_in_progress = (_next_survivor != NULL);
aoqi@0 456 _should_abort = false;
aoqi@0 457 }
aoqi@0 458
aoqi@0 459 HeapRegion* CMRootRegions::claim_next() {
aoqi@0 460 if (_should_abort) {
aoqi@0 461 // If someone has set the should_abort flag, we return NULL to
aoqi@0 462 // force the caller to bail out of their loop.
aoqi@0 463 return NULL;
aoqi@0 464 }
aoqi@0 465
aoqi@0 466 // Currently, only survivors can be root regions.
aoqi@0 467 HeapRegion* res = _next_survivor;
aoqi@0 468 if (res != NULL) {
aoqi@0 469 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 470 // Read it again in case it changed while we were waiting for the lock.
aoqi@0 471 res = _next_survivor;
aoqi@0 472 if (res != NULL) {
aoqi@0 473 if (res == _young_list->last_survivor_region()) {
aoqi@0 474 // We just claimed the last survivor so store NULL to indicate
aoqi@0 475 // that we're done.
aoqi@0 476 _next_survivor = NULL;
aoqi@0 477 } else {
aoqi@0 478 _next_survivor = res->get_next_young_region();
aoqi@0 479 }
aoqi@0 480 } else {
aoqi@0 481 // Someone else claimed the last survivor while we were trying
aoqi@0 482 // to take the lock so nothing else to do.
aoqi@0 483 }
aoqi@0 484 }
aoqi@0 485 assert(res == NULL || res->is_survivor(), "post-condition");
aoqi@0 486
aoqi@0 487 return res;
aoqi@0 488 }
aoqi@0 489
aoqi@0 490 void CMRootRegions::scan_finished() {
aoqi@0 491 assert(scan_in_progress(), "pre-condition");
aoqi@0 492
aoqi@0 493 // Currently, only survivors can be root regions.
aoqi@0 494 if (!_should_abort) {
aoqi@0 495 assert(_next_survivor == NULL, "we should have claimed all survivors");
aoqi@0 496 }
aoqi@0 497 _next_survivor = NULL;
aoqi@0 498
aoqi@0 499 {
aoqi@0 500 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 501 _scan_in_progress = false;
aoqi@0 502 RootRegionScan_lock->notify_all();
aoqi@0 503 }
aoqi@0 504 }
aoqi@0 505
aoqi@0 506 bool CMRootRegions::wait_until_scan_finished() {
aoqi@0 507 if (!scan_in_progress()) return false;
aoqi@0 508
aoqi@0 509 {
aoqi@0 510 MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 511 while (scan_in_progress()) {
aoqi@0 512 RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
aoqi@0 513 }
aoqi@0 514 }
aoqi@0 515 return true;
aoqi@0 516 }
aoqi@0 517
aoqi@0 518 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
aoqi@0 519 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
aoqi@0 520 #endif // _MSC_VER
aoqi@0 521
aoqi@0 522 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
aoqi@0 523 return MAX2((n_par_threads + 2) / 4, 1U);
aoqi@0 524 }
aoqi@0 525
tschatzl@7051 526 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
aoqi@0 527 _g1h(g1h),
tschatzl@7051 528 _markBitMap1(),
tschatzl@7051 529 _markBitMap2(),
aoqi@0 530 _parallel_marking_threads(0),
aoqi@0 531 _max_parallel_marking_threads(0),
aoqi@0 532 _sleep_factor(0.0),
aoqi@0 533 _marking_task_overhead(1.0),
aoqi@0 534 _cleanup_sleep_factor(0.0),
aoqi@0 535 _cleanup_task_overhead(1.0),
aoqi@0 536 _cleanup_list("Cleanup List"),
aoqi@0 537 _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
tschatzl@7051 538 _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
aoqi@0 539 CardTableModRefBS::card_shift,
aoqi@0 540 false /* in_resource_area*/),
aoqi@0 541
aoqi@0 542 _prevMarkBitMap(&_markBitMap1),
aoqi@0 543 _nextMarkBitMap(&_markBitMap2),
aoqi@0 544
aoqi@0 545 _markStack(this),
aoqi@0 546 // _finger set in set_non_marking_state
aoqi@0 547
aoqi@0 548 _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
aoqi@0 549 // _active_tasks set in set_non_marking_state
aoqi@0 550 // _tasks set inside the constructor
aoqi@0 551 _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
aoqi@0 552 _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
aoqi@0 553
aoqi@0 554 _has_overflown(false),
aoqi@0 555 _concurrent(false),
aoqi@0 556 _has_aborted(false),
brutisso@6904 557 _aborted_gc_id(GCId::undefined()),
aoqi@0 558 _restart_for_overflow(false),
aoqi@0 559 _concurrent_marking_in_progress(false),
aoqi@0 560
aoqi@0 561 // _verbose_level set below
aoqi@0 562
aoqi@0 563 _init_times(),
aoqi@0 564 _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
aoqi@0 565 _cleanup_times(),
aoqi@0 566 _total_counting_time(0.0),
aoqi@0 567 _total_rs_scrub_time(0.0),
aoqi@0 568
aoqi@0 569 _parallel_workers(NULL),
aoqi@0 570
aoqi@0 571 _count_card_bitmaps(NULL),
aoqi@0 572 _count_marked_bytes(NULL),
aoqi@0 573 _completed_initialization(false) {
aoqi@0 574 CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
aoqi@0 575 if (verbose_level < no_verbose) {
aoqi@0 576 verbose_level = no_verbose;
aoqi@0 577 }
aoqi@0 578 if (verbose_level > high_verbose) {
aoqi@0 579 verbose_level = high_verbose;
aoqi@0 580 }
aoqi@0 581 _verbose_level = verbose_level;
aoqi@0 582
aoqi@0 583 if (verbose_low()) {
aoqi@0 584 gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
aoqi@0 585 "heap end = " INTPTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
aoqi@0 586 }
aoqi@0 587
tschatzl@7051 588 _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
tschatzl@7051 589 _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
aoqi@0 590
aoqi@0 591 // Create & start a ConcurrentMark thread.
aoqi@0 592 _cmThread = new ConcurrentMarkThread(this);
aoqi@0 593 assert(cmThread() != NULL, "CM Thread should have been created");
aoqi@0 594 assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
aoqi@0 595 if (_cmThread->osthread() == NULL) {
aoqi@0 596 vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
aoqi@0 597 }
aoqi@0 598
aoqi@0 599 assert(CGC_lock != NULL, "Where's the CGC_lock?");
tschatzl@7051 600 assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
tschatzl@7051 601 assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
aoqi@0 602
aoqi@0 603 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
aoqi@0 604 satb_qs.set_buffer_size(G1SATBBufferSize);
aoqi@0 605
aoqi@0 606 _root_regions.init(_g1h, this);
aoqi@0 607
aoqi@0 608 if (ConcGCThreads > ParallelGCThreads) {
aoqi@0 609 warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
aoqi@0 610 "than ParallelGCThreads (" UINTX_FORMAT ").",
aoqi@0 611 ConcGCThreads, ParallelGCThreads);
aoqi@0 612 return;
aoqi@0 613 }
aoqi@0 614 if (ParallelGCThreads == 0) {
aoqi@0 615 // if we are not running with any parallel GC threads we will not
aoqi@0 616 // spawn any marking threads either
aoqi@0 617 _parallel_marking_threads = 0;
aoqi@0 618 _max_parallel_marking_threads = 0;
aoqi@0 619 _sleep_factor = 0.0;
aoqi@0 620 _marking_task_overhead = 1.0;
aoqi@0 621 } else {
aoqi@0 622 if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
aoqi@0 623 // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
aoqi@0 624 // if both are set
aoqi@0 625 _sleep_factor = 0.0;
aoqi@0 626 _marking_task_overhead = 1.0;
aoqi@0 627 } else if (G1MarkingOverheadPercent > 0) {
aoqi@0 628 // We will calculate the number of parallel marking threads based
aoqi@0 629 // on a target overhead with respect to the soft real-time goal
aoqi@0 630 double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
aoqi@0 631 double overall_cm_overhead =
aoqi@0 632 (double) MaxGCPauseMillis * marking_overhead /
aoqi@0 633 (double) GCPauseIntervalMillis;
aoqi@0 634 double cpu_ratio = 1.0 / (double) os::processor_count();
aoqi@0 635 double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
aoqi@0 636 double marking_task_overhead =
aoqi@0 637 overall_cm_overhead / marking_thread_num *
aoqi@0 638 (double) os::processor_count();
aoqi@0 639 double sleep_factor =
aoqi@0 640 (1.0 - marking_task_overhead) / marking_task_overhead;
aoqi@0 641
aoqi@0 642 FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
aoqi@0 643 _sleep_factor = sleep_factor;
aoqi@0 644 _marking_task_overhead = marking_task_overhead;
aoqi@0 645 } else {
aoqi@0 646 // Calculate the number of parallel marking threads by scaling
aoqi@0 647 // the number of parallel GC threads.
aoqi@0 648 uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
aoqi@0 649 FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
aoqi@0 650 _sleep_factor = 0.0;
aoqi@0 651 _marking_task_overhead = 1.0;
aoqi@0 652 }
aoqi@0 653
aoqi@0 654 assert(ConcGCThreads > 0, "Should have been set");
aoqi@0 655 _parallel_marking_threads = (uint) ConcGCThreads;
aoqi@0 656 _max_parallel_marking_threads = _parallel_marking_threads;
aoqi@0 657
aoqi@0 658 if (parallel_marking_threads() > 1) {
aoqi@0 659 _cleanup_task_overhead = 1.0;
aoqi@0 660 } else {
aoqi@0 661 _cleanup_task_overhead = marking_task_overhead();
aoqi@0 662 }
aoqi@0 663 _cleanup_sleep_factor =
aoqi@0 664 (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
aoqi@0 665
aoqi@0 666 #if 0
aoqi@0 667 gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads());
aoqi@0 668 gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
aoqi@0 669 gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor());
aoqi@0 670 gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
aoqi@0 671 gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor());
aoqi@0 672 #endif
aoqi@0 673
aoqi@0 674 guarantee(parallel_marking_threads() > 0, "peace of mind");
aoqi@0 675 _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
aoqi@0 676 _max_parallel_marking_threads, false, true);
aoqi@0 677 if (_parallel_workers == NULL) {
aoqi@0 678 vm_exit_during_initialization("Failed necessary allocation.");
aoqi@0 679 } else {
aoqi@0 680 _parallel_workers->initialize_workers();
aoqi@0 681 }
aoqi@0 682 }
aoqi@0 683
aoqi@0 684 if (FLAG_IS_DEFAULT(MarkStackSize)) {
aoqi@0 685 uintx mark_stack_size =
aoqi@0 686 MIN2(MarkStackSizeMax,
aoqi@0 687 MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
aoqi@0 688 // Verify that the calculated value for MarkStackSize is in range.
aoqi@0 689 // It would be nice to use the private utility routine from Arguments.
aoqi@0 690 if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
aoqi@0 691 warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
aoqi@0 692 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
aoqi@0 693 mark_stack_size, (uintx) 1, MarkStackSizeMax);
aoqi@0 694 return;
aoqi@0 695 }
aoqi@0 696 FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
aoqi@0 697 } else {
aoqi@0 698 // Verify MarkStackSize is in range.
aoqi@0 699 if (FLAG_IS_CMDLINE(MarkStackSize)) {
aoqi@0 700 if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
aoqi@0 701 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
aoqi@0 702 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
aoqi@0 703 "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
aoqi@0 704 MarkStackSize, (uintx) 1, MarkStackSizeMax);
aoqi@0 705 return;
aoqi@0 706 }
aoqi@0 707 } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
aoqi@0 708 if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
aoqi@0 709 warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
aoqi@0 710 " or for MarkStackSizeMax (" UINTX_FORMAT ")",
aoqi@0 711 MarkStackSize, MarkStackSizeMax);
aoqi@0 712 return;
aoqi@0 713 }
aoqi@0 714 }
aoqi@0 715 }
aoqi@0 716 }
aoqi@0 717
aoqi@0 718 if (!_markStack.allocate(MarkStackSize)) {
aoqi@0 719 warning("Failed to allocate CM marking stack");
aoqi@0 720 return;
aoqi@0 721 }
aoqi@0 722
aoqi@0 723 _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
aoqi@0 724 _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
aoqi@0 725
aoqi@0 726 _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap, _max_worker_id, mtGC);
aoqi@0 727 _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
aoqi@0 728
aoqi@0 729 BitMap::idx_t card_bm_size = _card_bm.size();
aoqi@0 730
aoqi@0 731 // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
aoqi@0 732 _active_tasks = _max_worker_id;
aoqi@0 733
aoqi@0 734 size_t max_regions = (size_t) _g1h->max_regions();
aoqi@0 735 for (uint i = 0; i < _max_worker_id; ++i) {
aoqi@0 736 CMTaskQueue* task_queue = new CMTaskQueue();
aoqi@0 737 task_queue->initialize();
aoqi@0 738 _task_queues->register_queue(i, task_queue);
aoqi@0 739
aoqi@0 740 _count_card_bitmaps[i] = BitMap(card_bm_size, false);
aoqi@0 741 _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
aoqi@0 742
aoqi@0 743 _tasks[i] = new CMTask(i, this,
aoqi@0 744 _count_marked_bytes[i],
aoqi@0 745 &_count_card_bitmaps[i],
aoqi@0 746 task_queue, _task_queues);
aoqi@0 747
aoqi@0 748 _accum_task_vtime[i] = 0.0;
aoqi@0 749 }
aoqi@0 750
aoqi@0 751 // Calculate the card number for the bottom of the heap. Used
aoqi@0 752 // in biasing indexes into the accounting card bitmaps.
aoqi@0 753 _heap_bottom_card_num =
aoqi@0 754 intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
aoqi@0 755 CardTableModRefBS::card_shift);
aoqi@0 756
aoqi@0 757 // Clear all the liveness counting data
aoqi@0 758 clear_all_count_data();
aoqi@0 759
aoqi@0 760 // so that the call below can read a sensible value
tschatzl@7051 761 _heap_start = g1h->reserved_region().start();
aoqi@0 762 set_non_marking_state();
aoqi@0 763 _completed_initialization = true;
aoqi@0 764 }
aoqi@0 765
aoqi@0 766 void ConcurrentMark::reset() {
aoqi@0 767 // Starting values for these two. This should be called in a STW
tschatzl@7051 768 // phase.
tschatzl@7051 769 MemRegion reserved = _g1h->g1_reserved();
tschatzl@7051 770 _heap_start = reserved.start();
tschatzl@7051 771 _heap_end = reserved.end();
aoqi@0 772
aoqi@0 773 // Separated the asserts so that we know which one fires.
aoqi@0 774 assert(_heap_start != NULL, "heap bounds should look ok");
aoqi@0 775 assert(_heap_end != NULL, "heap bounds should look ok");
aoqi@0 776 assert(_heap_start < _heap_end, "heap bounds should look ok");
aoqi@0 777
aoqi@0 778 // Reset all the marking data structures and any necessary flags
aoqi@0 779 reset_marking_state();
aoqi@0 780
aoqi@0 781 if (verbose_low()) {
aoqi@0 782 gclog_or_tty->print_cr("[global] resetting");
aoqi@0 783 }
aoqi@0 784
aoqi@0 785 // We do reset all of them, since different phases will use
aoqi@0 786 // different number of active threads. So, it's easiest to have all
aoqi@0 787 // of them ready.
aoqi@0 788 for (uint i = 0; i < _max_worker_id; ++i) {
aoqi@0 789 _tasks[i]->reset(_nextMarkBitMap);
aoqi@0 790 }
aoqi@0 791
aoqi@0 792 // we need this to make sure that the flag is on during the evac
aoqi@0 793 // pause with initial mark piggy-backed
aoqi@0 794 set_concurrent_marking_in_progress();
aoqi@0 795 }
aoqi@0 796
aoqi@0 797
aoqi@0 798 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
aoqi@0 799 _markStack.set_should_expand();
aoqi@0 800 _markStack.setEmpty(); // Also clears the _markStack overflow flag
aoqi@0 801 if (clear_overflow) {
aoqi@0 802 clear_has_overflown();
aoqi@0 803 } else {
aoqi@0 804 assert(has_overflown(), "pre-condition");
aoqi@0 805 }
aoqi@0 806 _finger = _heap_start;
aoqi@0 807
aoqi@0 808 for (uint i = 0; i < _max_worker_id; ++i) {
aoqi@0 809 CMTaskQueue* queue = _task_queues->queue(i);
aoqi@0 810 queue->set_empty();
aoqi@0 811 }
aoqi@0 812 }
aoqi@0 813
aoqi@0 814 void ConcurrentMark::set_concurrency(uint active_tasks) {
aoqi@0 815 assert(active_tasks <= _max_worker_id, "we should not have more");
aoqi@0 816
aoqi@0 817 _active_tasks = active_tasks;
aoqi@0 818 // Need to update the three data structures below according to the
aoqi@0 819 // number of active threads for this phase.
aoqi@0 820 _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues);
aoqi@0 821 _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
aoqi@0 822 _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
aoqi@0 823 }
aoqi@0 824
aoqi@0 825 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
aoqi@0 826 set_concurrency(active_tasks);
aoqi@0 827
aoqi@0 828 _concurrent = concurrent;
aoqi@0 829 // We propagate this to all tasks, not just the active ones.
aoqi@0 830 for (uint i = 0; i < _max_worker_id; ++i)
aoqi@0 831 _tasks[i]->set_concurrent(concurrent);
aoqi@0 832
aoqi@0 833 if (concurrent) {
aoqi@0 834 set_concurrent_marking_in_progress();
aoqi@0 835 } else {
aoqi@0 836 // We currently assume that the concurrent flag has been set to
aoqi@0 837 // false before we start remark. At this point we should also be
aoqi@0 838 // in a STW phase.
aoqi@0 839 assert(!concurrent_marking_in_progress(), "invariant");
aoqi@0 840 assert(out_of_regions(),
aoqi@0 841 err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
aoqi@0 842 p2i(_finger), p2i(_heap_end)));
aoqi@0 843 }
aoqi@0 844 }
aoqi@0 845
aoqi@0 846 void ConcurrentMark::set_non_marking_state() {
aoqi@0 847 // We set the global marking state to some default values when we're
aoqi@0 848 // not doing marking.
aoqi@0 849 reset_marking_state();
aoqi@0 850 _active_tasks = 0;
aoqi@0 851 clear_concurrent_marking_in_progress();
aoqi@0 852 }
aoqi@0 853
aoqi@0 854 ConcurrentMark::~ConcurrentMark() {
aoqi@0 855 // The ConcurrentMark instance is never freed.
aoqi@0 856 ShouldNotReachHere();
aoqi@0 857 }
aoqi@0 858
aoqi@0 859 void ConcurrentMark::clearNextBitmap() {
aoqi@0 860 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 861
aoqi@0 862 // Make sure that the concurrent mark thread looks to still be in
aoqi@0 863 // the current cycle.
aoqi@0 864 guarantee(cmThread()->during_cycle(), "invariant");
aoqi@0 865
aoqi@0 866 // We are finishing up the current cycle by clearing the next
aoqi@0 867 // marking bitmap and getting it ready for the next cycle. During
aoqi@0 868 // this time no other cycle can start. So, let's make sure that this
aoqi@0 869 // is the case.
aoqi@0 870 guarantee(!g1h->mark_in_progress(), "invariant");
aoqi@0 871
tschatzl@7051 872 ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
tschatzl@7051 873 g1h->heap_region_iterate(&cl);
tschatzl@7051 874
tschatzl@7051 875 // Clear the liveness counting data. If the marking has been aborted, the abort()
tschatzl@7051 876 // call already did that.
tschatzl@7051 877 if (cl.complete()) {
tschatzl@7051 878 clear_all_count_data();
aoqi@0 879 }
aoqi@0 880
aoqi@0 881 // Repeat the asserts from above.
aoqi@0 882 guarantee(cmThread()->during_cycle(), "invariant");
aoqi@0 883 guarantee(!g1h->mark_in_progress(), "invariant");
aoqi@0 884 }
aoqi@0 885
tschatzl@7051 886 class CheckBitmapClearHRClosure : public HeapRegionClosure {
tschatzl@7051 887 CMBitMap* _bitmap;
tschatzl@7051 888 bool _error;
tschatzl@7051 889 public:
tschatzl@7051 890 CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
tschatzl@7051 891 }
tschatzl@7051 892
tschatzl@7051 893 virtual bool doHeapRegion(HeapRegion* r) {
tschatzl@7100 894 // This closure can be called concurrently to the mutator, so we must make sure
tschatzl@7100 895 // that the result of the getNextMarkedWordAddress() call is compared to the
tschatzl@7100 896 // value passed to it as limit to detect any found bits.
tschatzl@7100 897 // We can use the region's orig_end() for the limit and the comparison value
tschatzl@7100 898 // as it always contains the "real" end of the region that never changes and
tschatzl@7100 899 // has no side effects.
tschatzl@7100 900 // Due to the latter, there can also be no problem with the compiler generating
tschatzl@7100 901 // reloads of the orig_end() call.
tschatzl@7100 902 HeapWord* end = r->orig_end();
tschatzl@7100 903 return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
tschatzl@7051 904 }
tschatzl@7051 905 };
tschatzl@7051 906
tschatzl@7016 907 bool ConcurrentMark::nextMarkBitmapIsClear() {
tschatzl@7051 908 CheckBitmapClearHRClosure cl(_nextMarkBitMap);
tschatzl@7051 909 _g1h->heap_region_iterate(&cl);
tschatzl@7051 910 return cl.complete();
tschatzl@7016 911 }
tschatzl@7016 912
aoqi@0 913 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
aoqi@0 914 public:
aoqi@0 915 bool doHeapRegion(HeapRegion* r) {
aoqi@0 916 if (!r->continuesHumongous()) {
aoqi@0 917 r->note_start_of_marking();
aoqi@0 918 }
aoqi@0 919 return false;
aoqi@0 920 }
aoqi@0 921 };
aoqi@0 922
aoqi@0 923 void ConcurrentMark::checkpointRootsInitialPre() {
aoqi@0 924 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 925 G1CollectorPolicy* g1p = g1h->g1_policy();
aoqi@0 926
aoqi@0 927 _has_aborted = false;
aoqi@0 928
aoqi@0 929 #ifndef PRODUCT
aoqi@0 930 if (G1PrintReachableAtInitialMark) {
aoqi@0 931 print_reachable("at-cycle-start",
aoqi@0 932 VerifyOption_G1UsePrevMarking, true /* all */);
aoqi@0 933 }
aoqi@0 934 #endif
aoqi@0 935
aoqi@0 936 // Initialise marking structures. This has to be done in a STW phase.
aoqi@0 937 reset();
aoqi@0 938
aoqi@0 939 // For each region note start of marking.
aoqi@0 940 NoteStartOfMarkHRClosure startcl;
aoqi@0 941 g1h->heap_region_iterate(&startcl);
aoqi@0 942 }
aoqi@0 943
aoqi@0 944
aoqi@0 945 void ConcurrentMark::checkpointRootsInitialPost() {
aoqi@0 946 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 947
aoqi@0 948 // If we force an overflow during remark, the remark operation will
aoqi@0 949 // actually abort and we'll restart concurrent marking. If we always
aoqi@0 950 // force an oveflow during remark we'll never actually complete the
aoqi@0 951 // marking phase. So, we initilize this here, at the start of the
aoqi@0 952 // cycle, so that at the remaining overflow number will decrease at
aoqi@0 953 // every remark and we'll eventually not need to cause one.
aoqi@0 954 force_overflow_stw()->init();
aoqi@0 955
aoqi@0 956 // Start Concurrent Marking weak-reference discovery.
aoqi@0 957 ReferenceProcessor* rp = g1h->ref_processor_cm();
aoqi@0 958 // enable ("weak") refs discovery
aoqi@0 959 rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
aoqi@0 960 rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
aoqi@0 961
aoqi@0 962 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 963 // This is the start of the marking cycle, we're expected all
aoqi@0 964 // threads to have SATB queues with active set to false.
aoqi@0 965 satb_mq_set.set_active_all_threads(true, /* new active value */
aoqi@0 966 false /* expected_active */);
aoqi@0 967
aoqi@0 968 _root_regions.prepare_for_scan();
aoqi@0 969
aoqi@0 970 // update_g1_committed() will be called at the end of an evac pause
aoqi@0 971 // when marking is on. So, it's also called at the end of the
aoqi@0 972 // initial-mark pause to update the heap end, if the heap expands
aoqi@0 973 // during it. No need to call it here.
aoqi@0 974 }
aoqi@0 975
aoqi@0 976 /*
aoqi@0 977 * Notice that in the next two methods, we actually leave the STS
aoqi@0 978 * during the barrier sync and join it immediately afterwards. If we
aoqi@0 979 * do not do this, the following deadlock can occur: one thread could
aoqi@0 980 * be in the barrier sync code, waiting for the other thread to also
aoqi@0 981 * sync up, whereas another one could be trying to yield, while also
aoqi@0 982 * waiting for the other threads to sync up too.
aoqi@0 983 *
aoqi@0 984 * Note, however, that this code is also used during remark and in
aoqi@0 985 * this case we should not attempt to leave / enter the STS, otherwise
aoqi@0 986 * we'll either hit an asseert (debug / fastdebug) or deadlock
aoqi@0 987 * (product). So we should only leave / enter the STS if we are
aoqi@0 988 * operating concurrently.
aoqi@0 989 *
aoqi@0 990 * Because the thread that does the sync barrier has left the STS, it
aoqi@0 991 * is possible to be suspended for a Full GC or an evacuation pause
aoqi@0 992 * could occur. This is actually safe, since the entering the sync
aoqi@0 993 * barrier is one of the last things do_marking_step() does, and it
aoqi@0 994 * doesn't manipulate any data structures afterwards.
aoqi@0 995 */
aoqi@0 996
aoqi@0 997 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
aoqi@0 998 if (verbose_low()) {
aoqi@0 999 gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
aoqi@0 1000 }
aoqi@0 1001
aoqi@0 1002 if (concurrent()) {
pliden@6906 1003 SuspendibleThreadSet::leave();
aoqi@0 1004 }
aoqi@0 1005
aoqi@0 1006 bool barrier_aborted = !_first_overflow_barrier_sync.enter();
aoqi@0 1007
aoqi@0 1008 if (concurrent()) {
pliden@6906 1009 SuspendibleThreadSet::join();
aoqi@0 1010 }
aoqi@0 1011 // at this point everyone should have synced up and not be doing any
aoqi@0 1012 // more work
aoqi@0 1013
aoqi@0 1014 if (verbose_low()) {
aoqi@0 1015 if (barrier_aborted) {
aoqi@0 1016 gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
aoqi@0 1017 } else {
aoqi@0 1018 gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
aoqi@0 1019 }
aoqi@0 1020 }
aoqi@0 1021
aoqi@0 1022 if (barrier_aborted) {
aoqi@0 1023 // If the barrier aborted we ignore the overflow condition and
aoqi@0 1024 // just abort the whole marking phase as quickly as possible.
aoqi@0 1025 return;
aoqi@0 1026 }
aoqi@0 1027
aoqi@0 1028 // If we're executing the concurrent phase of marking, reset the marking
aoqi@0 1029 // state; otherwise the marking state is reset after reference processing,
aoqi@0 1030 // during the remark pause.
aoqi@0 1031 // If we reset here as a result of an overflow during the remark we will
aoqi@0 1032 // see assertion failures from any subsequent set_concurrency_and_phase()
aoqi@0 1033 // calls.
aoqi@0 1034 if (concurrent()) {
aoqi@0 1035 // let the task associated with with worker 0 do this
aoqi@0 1036 if (worker_id == 0) {
aoqi@0 1037 // task 0 is responsible for clearing the global data structures
aoqi@0 1038 // We should be here because of an overflow. During STW we should
aoqi@0 1039 // not clear the overflow flag since we rely on it being true when
aoqi@0 1040 // we exit this method to abort the pause and restart concurent
aoqi@0 1041 // marking.
aoqi@0 1042 reset_marking_state(true /* clear_overflow */);
aoqi@0 1043 force_overflow()->update();
aoqi@0 1044
aoqi@0 1045 if (G1Log::fine()) {
brutisso@6904 1046 gclog_or_tty->gclog_stamp(concurrent_gc_id());
aoqi@0 1047 gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
aoqi@0 1048 }
aoqi@0 1049 }
aoqi@0 1050 }
aoqi@0 1051
aoqi@0 1052 // after this, each task should reset its own data structures then
aoqi@0 1053 // then go into the second barrier
aoqi@0 1054 }
aoqi@0 1055
aoqi@0 1056 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
aoqi@0 1057 if (verbose_low()) {
aoqi@0 1058 gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
aoqi@0 1059 }
aoqi@0 1060
aoqi@0 1061 if (concurrent()) {
pliden@6906 1062 SuspendibleThreadSet::leave();
aoqi@0 1063 }
aoqi@0 1064
aoqi@0 1065 bool barrier_aborted = !_second_overflow_barrier_sync.enter();
aoqi@0 1066
aoqi@0 1067 if (concurrent()) {
pliden@6906 1068 SuspendibleThreadSet::join();
aoqi@0 1069 }
aoqi@0 1070 // at this point everything should be re-initialized and ready to go
aoqi@0 1071
aoqi@0 1072 if (verbose_low()) {
aoqi@0 1073 if (barrier_aborted) {
aoqi@0 1074 gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
aoqi@0 1075 } else {
aoqi@0 1076 gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
aoqi@0 1077 }
aoqi@0 1078 }
aoqi@0 1079 }
aoqi@0 1080
aoqi@0 1081 #ifndef PRODUCT
aoqi@0 1082 void ForceOverflowSettings::init() {
aoqi@0 1083 _num_remaining = G1ConcMarkForceOverflow;
aoqi@0 1084 _force = false;
aoqi@0 1085 update();
aoqi@0 1086 }
aoqi@0 1087
aoqi@0 1088 void ForceOverflowSettings::update() {
aoqi@0 1089 if (_num_remaining > 0) {
aoqi@0 1090 _num_remaining -= 1;
aoqi@0 1091 _force = true;
aoqi@0 1092 } else {
aoqi@0 1093 _force = false;
aoqi@0 1094 }
aoqi@0 1095 }
aoqi@0 1096
aoqi@0 1097 bool ForceOverflowSettings::should_force() {
aoqi@0 1098 if (_force) {
aoqi@0 1099 _force = false;
aoqi@0 1100 return true;
aoqi@0 1101 } else {
aoqi@0 1102 return false;
aoqi@0 1103 }
aoqi@0 1104 }
aoqi@0 1105 #endif // !PRODUCT
aoqi@0 1106
aoqi@0 1107 class CMConcurrentMarkingTask: public AbstractGangTask {
aoqi@0 1108 private:
aoqi@0 1109 ConcurrentMark* _cm;
aoqi@0 1110 ConcurrentMarkThread* _cmt;
aoqi@0 1111
aoqi@0 1112 public:
aoqi@0 1113 void work(uint worker_id) {
aoqi@0 1114 assert(Thread::current()->is_ConcurrentGC_thread(),
aoqi@0 1115 "this should only be done by a conc GC thread");
aoqi@0 1116 ResourceMark rm;
aoqi@0 1117
aoqi@0 1118 double start_vtime = os::elapsedVTime();
aoqi@0 1119
pliden@6906 1120 SuspendibleThreadSet::join();
aoqi@0 1121
aoqi@0 1122 assert(worker_id < _cm->active_tasks(), "invariant");
aoqi@0 1123 CMTask* the_task = _cm->task(worker_id);
aoqi@0 1124 the_task->record_start_time();
aoqi@0 1125 if (!_cm->has_aborted()) {
aoqi@0 1126 do {
aoqi@0 1127 double start_vtime_sec = os::elapsedVTime();
aoqi@0 1128 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
aoqi@0 1129
aoqi@0 1130 the_task->do_marking_step(mark_step_duration_ms,
aoqi@0 1131 true /* do_termination */,
aoqi@0 1132 false /* is_serial*/);
aoqi@0 1133
aoqi@0 1134 double end_vtime_sec = os::elapsedVTime();
aoqi@0 1135 double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
aoqi@0 1136 _cm->clear_has_overflown();
aoqi@0 1137
tschatzl@7094 1138 _cm->do_yield_check(worker_id);
aoqi@0 1139
aoqi@0 1140 jlong sleep_time_ms;
aoqi@0 1141 if (!_cm->has_aborted() && the_task->has_aborted()) {
aoqi@0 1142 sleep_time_ms =
aoqi@0 1143 (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
pliden@6906 1144 SuspendibleThreadSet::leave();
aoqi@0 1145 os::sleep(Thread::current(), sleep_time_ms, false);
pliden@6906 1146 SuspendibleThreadSet::join();
aoqi@0 1147 }
aoqi@0 1148 } while (!_cm->has_aborted() && the_task->has_aborted());
aoqi@0 1149 }
aoqi@0 1150 the_task->record_end_time();
aoqi@0 1151 guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
aoqi@0 1152
pliden@6906 1153 SuspendibleThreadSet::leave();
aoqi@0 1154
aoqi@0 1155 double end_vtime = os::elapsedVTime();
aoqi@0 1156 _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
aoqi@0 1157 }
aoqi@0 1158
aoqi@0 1159 CMConcurrentMarkingTask(ConcurrentMark* cm,
aoqi@0 1160 ConcurrentMarkThread* cmt) :
aoqi@0 1161 AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
aoqi@0 1162
aoqi@0 1163 ~CMConcurrentMarkingTask() { }
aoqi@0 1164 };
aoqi@0 1165
aoqi@0 1166 // Calculates the number of active workers for a concurrent
aoqi@0 1167 // phase.
aoqi@0 1168 uint ConcurrentMark::calc_parallel_marking_threads() {
aoqi@0 1169 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1170 uint n_conc_workers = 0;
aoqi@0 1171 if (!UseDynamicNumberOfGCThreads ||
aoqi@0 1172 (!FLAG_IS_DEFAULT(ConcGCThreads) &&
aoqi@0 1173 !ForceDynamicNumberOfGCThreads)) {
aoqi@0 1174 n_conc_workers = max_parallel_marking_threads();
aoqi@0 1175 } else {
aoqi@0 1176 n_conc_workers =
aoqi@0 1177 AdaptiveSizePolicy::calc_default_active_workers(
aoqi@0 1178 max_parallel_marking_threads(),
aoqi@0 1179 1, /* Minimum workers */
aoqi@0 1180 parallel_marking_threads(),
aoqi@0 1181 Threads::number_of_non_daemon_threads());
aoqi@0 1182 // Don't scale down "n_conc_workers" by scale_parallel_threads() because
aoqi@0 1183 // that scaling has already gone into "_max_parallel_marking_threads".
aoqi@0 1184 }
aoqi@0 1185 assert(n_conc_workers > 0, "Always need at least 1");
aoqi@0 1186 return n_conc_workers;
aoqi@0 1187 }
aoqi@0 1188 // If we are not running with any parallel GC threads we will not
aoqi@0 1189 // have spawned any marking threads either. Hence the number of
aoqi@0 1190 // concurrent workers should be 0.
aoqi@0 1191 return 0;
aoqi@0 1192 }
aoqi@0 1193
aoqi@0 1194 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
aoqi@0 1195 // Currently, only survivors can be root regions.
aoqi@0 1196 assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
aoqi@0 1197 G1RootRegionScanClosure cl(_g1h, this, worker_id);
aoqi@0 1198
aoqi@0 1199 const uintx interval = PrefetchScanIntervalInBytes;
aoqi@0 1200 HeapWord* curr = hr->bottom();
aoqi@0 1201 const HeapWord* end = hr->top();
aoqi@0 1202 while (curr < end) {
aoqi@0 1203 Prefetch::read(curr, interval);
aoqi@0 1204 oop obj = oop(curr);
aoqi@0 1205 int size = obj->oop_iterate(&cl);
aoqi@0 1206 assert(size == obj->size(), "sanity");
aoqi@0 1207 curr += size;
aoqi@0 1208 }
aoqi@0 1209 }
aoqi@0 1210
aoqi@0 1211 class CMRootRegionScanTask : public AbstractGangTask {
aoqi@0 1212 private:
aoqi@0 1213 ConcurrentMark* _cm;
aoqi@0 1214
aoqi@0 1215 public:
aoqi@0 1216 CMRootRegionScanTask(ConcurrentMark* cm) :
aoqi@0 1217 AbstractGangTask("Root Region Scan"), _cm(cm) { }
aoqi@0 1218
aoqi@0 1219 void work(uint worker_id) {
aoqi@0 1220 assert(Thread::current()->is_ConcurrentGC_thread(),
aoqi@0 1221 "this should only be done by a conc GC thread");
aoqi@0 1222
aoqi@0 1223 CMRootRegions* root_regions = _cm->root_regions();
aoqi@0 1224 HeapRegion* hr = root_regions->claim_next();
aoqi@0 1225 while (hr != NULL) {
aoqi@0 1226 _cm->scanRootRegion(hr, worker_id);
aoqi@0 1227 hr = root_regions->claim_next();
aoqi@0 1228 }
aoqi@0 1229 }
aoqi@0 1230 };
aoqi@0 1231
aoqi@0 1232 void ConcurrentMark::scanRootRegions() {
stefank@6992 1233 // Start of concurrent marking.
stefank@6992 1234 ClassLoaderDataGraph::clear_claimed_marks();
stefank@6992 1235
aoqi@0 1236 // scan_in_progress() will have been set to true only if there was
aoqi@0 1237 // at least one root region to scan. So, if it's false, we
aoqi@0 1238 // should not attempt to do any further work.
aoqi@0 1239 if (root_regions()->scan_in_progress()) {
aoqi@0 1240 _parallel_marking_threads = calc_parallel_marking_threads();
aoqi@0 1241 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
aoqi@0 1242 "Maximum number of marking threads exceeded");
aoqi@0 1243 uint active_workers = MAX2(1U, parallel_marking_threads());
aoqi@0 1244
aoqi@0 1245 CMRootRegionScanTask task(this);
aoqi@0 1246 if (use_parallel_marking_threads()) {
aoqi@0 1247 _parallel_workers->set_active_workers((int) active_workers);
aoqi@0 1248 _parallel_workers->run_task(&task);
aoqi@0 1249 } else {
aoqi@0 1250 task.work(0);
aoqi@0 1251 }
aoqi@0 1252
aoqi@0 1253 // It's possible that has_aborted() is true here without actually
aoqi@0 1254 // aborting the survivor scan earlier. This is OK as it's
aoqi@0 1255 // mainly used for sanity checking.
aoqi@0 1256 root_regions()->scan_finished();
aoqi@0 1257 }
aoqi@0 1258 }
aoqi@0 1259
aoqi@0 1260 void ConcurrentMark::markFromRoots() {
aoqi@0 1261 // we might be tempted to assert that:
aoqi@0 1262 // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
aoqi@0 1263 // "inconsistent argument?");
aoqi@0 1264 // However that wouldn't be right, because it's possible that
aoqi@0 1265 // a safepoint is indeed in progress as a younger generation
aoqi@0 1266 // stop-the-world GC happens even as we mark in this generation.
aoqi@0 1267
aoqi@0 1268 _restart_for_overflow = false;
aoqi@0 1269 force_overflow_conc()->init();
aoqi@0 1270
aoqi@0 1271 // _g1h has _n_par_threads
aoqi@0 1272 _parallel_marking_threads = calc_parallel_marking_threads();
aoqi@0 1273 assert(parallel_marking_threads() <= max_parallel_marking_threads(),
aoqi@0 1274 "Maximum number of marking threads exceeded");
aoqi@0 1275
aoqi@0 1276 uint active_workers = MAX2(1U, parallel_marking_threads());
aoqi@0 1277
aoqi@0 1278 // Parallel task terminator is set in "set_concurrency_and_phase()"
aoqi@0 1279 set_concurrency_and_phase(active_workers, true /* concurrent */);
aoqi@0 1280
aoqi@0 1281 CMConcurrentMarkingTask markingTask(this, cmThread());
aoqi@0 1282 if (use_parallel_marking_threads()) {
aoqi@0 1283 _parallel_workers->set_active_workers((int)active_workers);
stefank@6992 1284 // Don't set _n_par_threads because it affects MT in process_roots()
aoqi@0 1285 // and the decisions on that MT processing is made elsewhere.
aoqi@0 1286 assert(_parallel_workers->active_workers() > 0, "Should have been set");
aoqi@0 1287 _parallel_workers->run_task(&markingTask);
aoqi@0 1288 } else {
aoqi@0 1289 markingTask.work(0);
aoqi@0 1290 }
aoqi@0 1291 print_stats();
aoqi@0 1292 }
aoqi@0 1293
aoqi@0 1294 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
aoqi@0 1295 // world is stopped at this checkpoint
aoqi@0 1296 assert(SafepointSynchronize::is_at_safepoint(),
aoqi@0 1297 "world should be stopped");
aoqi@0 1298
aoqi@0 1299 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 1300
aoqi@0 1301 // If a full collection has happened, we shouldn't do this.
aoqi@0 1302 if (has_aborted()) {
aoqi@0 1303 g1h->set_marking_complete(); // So bitmap clearing isn't confused
aoqi@0 1304 return;
aoqi@0 1305 }
aoqi@0 1306
aoqi@0 1307 SvcGCMarker sgcm(SvcGCMarker::OTHER);
aoqi@0 1308
aoqi@0 1309 if (VerifyDuringGC) {
aoqi@0 1310 HandleMark hm; // handle scope
aoqi@0 1311 Universe::heap()->prepare_for_verify();
aoqi@0 1312 Universe::verify(VerifyOption_G1UsePrevMarking,
aoqi@0 1313 " VerifyDuringGC:(before)");
aoqi@0 1314 }
brutisso@7005 1315 g1h->check_bitmaps("Remark Start");
aoqi@0 1316
aoqi@0 1317 G1CollectorPolicy* g1p = g1h->g1_policy();
aoqi@0 1318 g1p->record_concurrent_mark_remark_start();
aoqi@0 1319
aoqi@0 1320 double start = os::elapsedTime();
aoqi@0 1321
aoqi@0 1322 checkpointRootsFinalWork();
aoqi@0 1323
aoqi@0 1324 double mark_work_end = os::elapsedTime();
aoqi@0 1325
aoqi@0 1326 weakRefsWork(clear_all_soft_refs);
aoqi@0 1327
aoqi@0 1328 if (has_overflown()) {
aoqi@0 1329 // Oops. We overflowed. Restart concurrent marking.
aoqi@0 1330 _restart_for_overflow = true;
aoqi@0 1331 if (G1TraceMarkStackOverflow) {
aoqi@0 1332 gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
aoqi@0 1333 }
aoqi@0 1334
aoqi@0 1335 // Verify the heap w.r.t. the previous marking bitmap.
aoqi@0 1336 if (VerifyDuringGC) {
aoqi@0 1337 HandleMark hm; // handle scope
aoqi@0 1338 Universe::heap()->prepare_for_verify();
aoqi@0 1339 Universe::verify(VerifyOption_G1UsePrevMarking,
aoqi@0 1340 " VerifyDuringGC:(overflow)");
aoqi@0 1341 }
aoqi@0 1342
aoqi@0 1343 // Clear the marking state because we will be restarting
aoqi@0 1344 // marking due to overflowing the global mark stack.
aoqi@0 1345 reset_marking_state();
aoqi@0 1346 } else {
aoqi@0 1347 // Aggregate the per-task counting data that we have accumulated
aoqi@0 1348 // while marking.
aoqi@0 1349 aggregate_count_data();
aoqi@0 1350
aoqi@0 1351 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 1352 // We're done with marking.
aoqi@0 1353 // This is the end of the marking cycle, we're expected all
aoqi@0 1354 // threads to have SATB queues with active set to true.
aoqi@0 1355 satb_mq_set.set_active_all_threads(false, /* new active value */
aoqi@0 1356 true /* expected_active */);
aoqi@0 1357
aoqi@0 1358 if (VerifyDuringGC) {
aoqi@0 1359 HandleMark hm; // handle scope
aoqi@0 1360 Universe::heap()->prepare_for_verify();
aoqi@0 1361 Universe::verify(VerifyOption_G1UseNextMarking,
aoqi@0 1362 " VerifyDuringGC:(after)");
aoqi@0 1363 }
brutisso@7005 1364 g1h->check_bitmaps("Remark End");
aoqi@0 1365 assert(!restart_for_overflow(), "sanity");
aoqi@0 1366 // Completely reset the marking state since marking completed
aoqi@0 1367 set_non_marking_state();
aoqi@0 1368 }
aoqi@0 1369
aoqi@0 1370 // Expand the marking stack, if we have to and if we can.
aoqi@0 1371 if (_markStack.should_expand()) {
aoqi@0 1372 _markStack.expand();
aoqi@0 1373 }
aoqi@0 1374
aoqi@0 1375 // Statistics
aoqi@0 1376 double now = os::elapsedTime();
aoqi@0 1377 _remark_mark_times.add((mark_work_end - start) * 1000.0);
aoqi@0 1378 _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
aoqi@0 1379 _remark_times.add((now - start) * 1000.0);
aoqi@0 1380
aoqi@0 1381 g1p->record_concurrent_mark_remark_end();
aoqi@0 1382
aoqi@0 1383 G1CMIsAliveClosure is_alive(g1h);
aoqi@0 1384 g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
aoqi@0 1385 }
aoqi@0 1386
aoqi@0 1387 // Base class of the closures that finalize and verify the
aoqi@0 1388 // liveness counting data.
aoqi@0 1389 class CMCountDataClosureBase: public HeapRegionClosure {
aoqi@0 1390 protected:
aoqi@0 1391 G1CollectedHeap* _g1h;
aoqi@0 1392 ConcurrentMark* _cm;
aoqi@0 1393 CardTableModRefBS* _ct_bs;
aoqi@0 1394
aoqi@0 1395 BitMap* _region_bm;
aoqi@0 1396 BitMap* _card_bm;
aoqi@0 1397
aoqi@0 1398 // Takes a region that's not empty (i.e., it has at least one
aoqi@0 1399 // live object in it and sets its corresponding bit on the region
aoqi@0 1400 // bitmap to 1. If the region is "starts humongous" it will also set
aoqi@0 1401 // to 1 the bits on the region bitmap that correspond to its
aoqi@0 1402 // associated "continues humongous" regions.
aoqi@0 1403 void set_bit_for_region(HeapRegion* hr) {
aoqi@0 1404 assert(!hr->continuesHumongous(), "should have filtered those out");
aoqi@0 1405
tschatzl@7091 1406 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
aoqi@0 1407 if (!hr->startsHumongous()) {
aoqi@0 1408 // Normal (non-humongous) case: just set the bit.
aoqi@0 1409 _region_bm->par_at_put(index, true);
aoqi@0 1410 } else {
aoqi@0 1411 // Starts humongous case: calculate how many regions are part of
aoqi@0 1412 // this humongous region and then set the bit range.
aoqi@0 1413 BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
aoqi@0 1414 _region_bm->par_at_put_range(index, end_index, true);
aoqi@0 1415 }
aoqi@0 1416 }
aoqi@0 1417
aoqi@0 1418 public:
aoqi@0 1419 CMCountDataClosureBase(G1CollectedHeap* g1h,
aoqi@0 1420 BitMap* region_bm, BitMap* card_bm):
aoqi@0 1421 _g1h(g1h), _cm(g1h->concurrent_mark()),
aoqi@0 1422 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
aoqi@0 1423 _region_bm(region_bm), _card_bm(card_bm) { }
aoqi@0 1424 };
aoqi@0 1425
aoqi@0 1426 // Closure that calculates the # live objects per region. Used
aoqi@0 1427 // for verification purposes during the cleanup pause.
aoqi@0 1428 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
aoqi@0 1429 CMBitMapRO* _bm;
aoqi@0 1430 size_t _region_marked_bytes;
aoqi@0 1431
aoqi@0 1432 public:
aoqi@0 1433 CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
aoqi@0 1434 BitMap* region_bm, BitMap* card_bm) :
aoqi@0 1435 CMCountDataClosureBase(g1h, region_bm, card_bm),
aoqi@0 1436 _bm(bm), _region_marked_bytes(0) { }
aoqi@0 1437
aoqi@0 1438 bool doHeapRegion(HeapRegion* hr) {
aoqi@0 1439
aoqi@0 1440 if (hr->continuesHumongous()) {
aoqi@0 1441 // We will ignore these here and process them when their
aoqi@0 1442 // associated "starts humongous" region is processed (see
aoqi@0 1443 // set_bit_for_heap_region()). Note that we cannot rely on their
aoqi@0 1444 // associated "starts humongous" region to have their bit set to
aoqi@0 1445 // 1 since, due to the region chunking in the parallel region
aoqi@0 1446 // iteration, a "continues humongous" region might be visited
aoqi@0 1447 // before its associated "starts humongous".
aoqi@0 1448 return false;
aoqi@0 1449 }
aoqi@0 1450
aoqi@0 1451 HeapWord* ntams = hr->next_top_at_mark_start();
aoqi@0 1452 HeapWord* start = hr->bottom();
aoqi@0 1453
aoqi@0 1454 assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
aoqi@0 1455 err_msg("Preconditions not met - "
aoqi@0 1456 "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
aoqi@0 1457 p2i(start), p2i(ntams), p2i(hr->end())));
aoqi@0 1458
aoqi@0 1459 // Find the first marked object at or after "start".
aoqi@0 1460 start = _bm->getNextMarkedWordAddress(start, ntams);
aoqi@0 1461
aoqi@0 1462 size_t marked_bytes = 0;
aoqi@0 1463
aoqi@0 1464 while (start < ntams) {
aoqi@0 1465 oop obj = oop(start);
aoqi@0 1466 int obj_sz = obj->size();
aoqi@0 1467 HeapWord* obj_end = start + obj_sz;
aoqi@0 1468
aoqi@0 1469 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
aoqi@0 1470 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
aoqi@0 1471
aoqi@0 1472 // Note: if we're looking at the last region in heap - obj_end
aoqi@0 1473 // could be actually just beyond the end of the heap; end_idx
aoqi@0 1474 // will then correspond to a (non-existent) card that is also
aoqi@0 1475 // just beyond the heap.
aoqi@0 1476 if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
aoqi@0 1477 // end of object is not card aligned - increment to cover
aoqi@0 1478 // all the cards spanned by the object
aoqi@0 1479 end_idx += 1;
aoqi@0 1480 }
aoqi@0 1481
aoqi@0 1482 // Set the bits in the card BM for the cards spanned by this object.
aoqi@0 1483 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
aoqi@0 1484
aoqi@0 1485 // Add the size of this object to the number of marked bytes.
aoqi@0 1486 marked_bytes += (size_t)obj_sz * HeapWordSize;
aoqi@0 1487
aoqi@0 1488 // Find the next marked object after this one.
aoqi@0 1489 start = _bm->getNextMarkedWordAddress(obj_end, ntams);
aoqi@0 1490 }
aoqi@0 1491
aoqi@0 1492 // Mark the allocated-since-marking portion...
aoqi@0 1493 HeapWord* top = hr->top();
aoqi@0 1494 if (ntams < top) {
aoqi@0 1495 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
aoqi@0 1496 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
aoqi@0 1497
aoqi@0 1498 // Note: if we're looking at the last region in heap - top
aoqi@0 1499 // could be actually just beyond the end of the heap; end_idx
aoqi@0 1500 // will then correspond to a (non-existent) card that is also
aoqi@0 1501 // just beyond the heap.
aoqi@0 1502 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
aoqi@0 1503 // end of object is not card aligned - increment to cover
aoqi@0 1504 // all the cards spanned by the object
aoqi@0 1505 end_idx += 1;
aoqi@0 1506 }
aoqi@0 1507 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
aoqi@0 1508
aoqi@0 1509 // This definitely means the region has live objects.
aoqi@0 1510 set_bit_for_region(hr);
aoqi@0 1511 }
aoqi@0 1512
aoqi@0 1513 // Update the live region bitmap.
aoqi@0 1514 if (marked_bytes > 0) {
aoqi@0 1515 set_bit_for_region(hr);
aoqi@0 1516 }
aoqi@0 1517
aoqi@0 1518 // Set the marked bytes for the current region so that
aoqi@0 1519 // it can be queried by a calling verificiation routine
aoqi@0 1520 _region_marked_bytes = marked_bytes;
aoqi@0 1521
aoqi@0 1522 return false;
aoqi@0 1523 }
aoqi@0 1524
aoqi@0 1525 size_t region_marked_bytes() const { return _region_marked_bytes; }
aoqi@0 1526 };
aoqi@0 1527
aoqi@0 1528 // Heap region closure used for verifying the counting data
aoqi@0 1529 // that was accumulated concurrently and aggregated during
aoqi@0 1530 // the remark pause. This closure is applied to the heap
aoqi@0 1531 // regions during the STW cleanup pause.
aoqi@0 1532
aoqi@0 1533 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
aoqi@0 1534 G1CollectedHeap* _g1h;
aoqi@0 1535 ConcurrentMark* _cm;
aoqi@0 1536 CalcLiveObjectsClosure _calc_cl;
aoqi@0 1537 BitMap* _region_bm; // Region BM to be verified
aoqi@0 1538 BitMap* _card_bm; // Card BM to be verified
aoqi@0 1539 bool _verbose; // verbose output?
aoqi@0 1540
aoqi@0 1541 BitMap* _exp_region_bm; // Expected Region BM values
aoqi@0 1542 BitMap* _exp_card_bm; // Expected card BM values
aoqi@0 1543
aoqi@0 1544 int _failures;
aoqi@0 1545
aoqi@0 1546 public:
aoqi@0 1547 VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
aoqi@0 1548 BitMap* region_bm,
aoqi@0 1549 BitMap* card_bm,
aoqi@0 1550 BitMap* exp_region_bm,
aoqi@0 1551 BitMap* exp_card_bm,
aoqi@0 1552 bool verbose) :
aoqi@0 1553 _g1h(g1h), _cm(g1h->concurrent_mark()),
aoqi@0 1554 _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
aoqi@0 1555 _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
aoqi@0 1556 _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
aoqi@0 1557 _failures(0) { }
aoqi@0 1558
aoqi@0 1559 int failures() const { return _failures; }
aoqi@0 1560
aoqi@0 1561 bool doHeapRegion(HeapRegion* hr) {
aoqi@0 1562 if (hr->continuesHumongous()) {
aoqi@0 1563 // We will ignore these here and process them when their
aoqi@0 1564 // associated "starts humongous" region is processed (see
aoqi@0 1565 // set_bit_for_heap_region()). Note that we cannot rely on their
aoqi@0 1566 // associated "starts humongous" region to have their bit set to
aoqi@0 1567 // 1 since, due to the region chunking in the parallel region
aoqi@0 1568 // iteration, a "continues humongous" region might be visited
aoqi@0 1569 // before its associated "starts humongous".
aoqi@0 1570 return false;
aoqi@0 1571 }
aoqi@0 1572
aoqi@0 1573 int failures = 0;
aoqi@0 1574
aoqi@0 1575 // Call the CalcLiveObjectsClosure to walk the marking bitmap for
aoqi@0 1576 // this region and set the corresponding bits in the expected region
aoqi@0 1577 // and card bitmaps.
aoqi@0 1578 bool res = _calc_cl.doHeapRegion(hr);
aoqi@0 1579 assert(res == false, "should be continuing");
aoqi@0 1580
aoqi@0 1581 MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
aoqi@0 1582 Mutex::_no_safepoint_check_flag);
aoqi@0 1583
aoqi@0 1584 // Verify the marked bytes for this region.
aoqi@0 1585 size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
aoqi@0 1586 size_t act_marked_bytes = hr->next_marked_bytes();
aoqi@0 1587
aoqi@0 1588 // We're not OK if expected marked bytes > actual marked bytes. It means
aoqi@0 1589 // we have missed accounting some objects during the actual marking.
aoqi@0 1590 if (exp_marked_bytes > act_marked_bytes) {
aoqi@0 1591 if (_verbose) {
aoqi@0 1592 gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
aoqi@0 1593 "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
tschatzl@7091 1594 hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
aoqi@0 1595 }
aoqi@0 1596 failures += 1;
aoqi@0 1597 }
aoqi@0 1598
aoqi@0 1599 // Verify the bit, for this region, in the actual and expected
aoqi@0 1600 // (which was just calculated) region bit maps.
aoqi@0 1601 // We're not OK if the bit in the calculated expected region
aoqi@0 1602 // bitmap is set and the bit in the actual region bitmap is not.
tschatzl@7091 1603 BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
aoqi@0 1604
aoqi@0 1605 bool expected = _exp_region_bm->at(index);
aoqi@0 1606 bool actual = _region_bm->at(index);
aoqi@0 1607 if (expected && !actual) {
aoqi@0 1608 if (_verbose) {
aoqi@0 1609 gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
aoqi@0 1610 "expected: %s, actual: %s",
tschatzl@7091 1611 hr->hrm_index(),
aoqi@0 1612 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
aoqi@0 1613 }
aoqi@0 1614 failures += 1;
aoqi@0 1615 }
aoqi@0 1616
aoqi@0 1617 // Verify that the card bit maps for the cards spanned by the current
aoqi@0 1618 // region match. We have an error if we have a set bit in the expected
aoqi@0 1619 // bit map and the corresponding bit in the actual bitmap is not set.
aoqi@0 1620
aoqi@0 1621 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
aoqi@0 1622 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
aoqi@0 1623
aoqi@0 1624 for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
aoqi@0 1625 expected = _exp_card_bm->at(i);
aoqi@0 1626 actual = _card_bm->at(i);
aoqi@0 1627
aoqi@0 1628 if (expected && !actual) {
aoqi@0 1629 if (_verbose) {
aoqi@0 1630 gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
aoqi@0 1631 "expected: %s, actual: %s",
tschatzl@7091 1632 hr->hrm_index(), i,
aoqi@0 1633 BOOL_TO_STR(expected), BOOL_TO_STR(actual));
aoqi@0 1634 }
aoqi@0 1635 failures += 1;
aoqi@0 1636 }
aoqi@0 1637 }
aoqi@0 1638
aoqi@0 1639 if (failures > 0 && _verbose) {
aoqi@0 1640 gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
aoqi@0 1641 "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
aoqi@0 1642 HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
aoqi@0 1643 _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
aoqi@0 1644 }
aoqi@0 1645
aoqi@0 1646 _failures += failures;
aoqi@0 1647
aoqi@0 1648 // We could stop iteration over the heap when we
aoqi@0 1649 // find the first violating region by returning true.
aoqi@0 1650 return false;
aoqi@0 1651 }
aoqi@0 1652 };
aoqi@0 1653
aoqi@0 1654 class G1ParVerifyFinalCountTask: public AbstractGangTask {
aoqi@0 1655 protected:
aoqi@0 1656 G1CollectedHeap* _g1h;
aoqi@0 1657 ConcurrentMark* _cm;
aoqi@0 1658 BitMap* _actual_region_bm;
aoqi@0 1659 BitMap* _actual_card_bm;
aoqi@0 1660
aoqi@0 1661 uint _n_workers;
aoqi@0 1662
aoqi@0 1663 BitMap* _expected_region_bm;
aoqi@0 1664 BitMap* _expected_card_bm;
aoqi@0 1665
aoqi@0 1666 int _failures;
aoqi@0 1667 bool _verbose;
aoqi@0 1668
aoqi@0 1669 public:
aoqi@0 1670 G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
aoqi@0 1671 BitMap* region_bm, BitMap* card_bm,
aoqi@0 1672 BitMap* expected_region_bm, BitMap* expected_card_bm)
aoqi@0 1673 : AbstractGangTask("G1 verify final counting"),
aoqi@0 1674 _g1h(g1h), _cm(_g1h->concurrent_mark()),
aoqi@0 1675 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
aoqi@0 1676 _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
aoqi@0 1677 _failures(0), _verbose(false),
aoqi@0 1678 _n_workers(0) {
aoqi@0 1679 assert(VerifyDuringGC, "don't call this otherwise");
aoqi@0 1680
aoqi@0 1681 // Use the value already set as the number of active threads
aoqi@0 1682 // in the call to run_task().
aoqi@0 1683 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1684 assert( _g1h->workers()->active_workers() > 0,
aoqi@0 1685 "Should have been previously set");
aoqi@0 1686 _n_workers = _g1h->workers()->active_workers();
aoqi@0 1687 } else {
aoqi@0 1688 _n_workers = 1;
aoqi@0 1689 }
aoqi@0 1690
aoqi@0 1691 assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
aoqi@0 1692 assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
aoqi@0 1693
aoqi@0 1694 _verbose = _cm->verbose_medium();
aoqi@0 1695 }
aoqi@0 1696
aoqi@0 1697 void work(uint worker_id) {
aoqi@0 1698 assert(worker_id < _n_workers, "invariant");
aoqi@0 1699
aoqi@0 1700 VerifyLiveObjectDataHRClosure verify_cl(_g1h,
aoqi@0 1701 _actual_region_bm, _actual_card_bm,
aoqi@0 1702 _expected_region_bm,
aoqi@0 1703 _expected_card_bm,
aoqi@0 1704 _verbose);
aoqi@0 1705
aoqi@0 1706 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1707 _g1h->heap_region_par_iterate_chunked(&verify_cl,
aoqi@0 1708 worker_id,
aoqi@0 1709 _n_workers,
aoqi@0 1710 HeapRegion::VerifyCountClaimValue);
aoqi@0 1711 } else {
aoqi@0 1712 _g1h->heap_region_iterate(&verify_cl);
aoqi@0 1713 }
aoqi@0 1714
aoqi@0 1715 Atomic::add(verify_cl.failures(), &_failures);
aoqi@0 1716 }
aoqi@0 1717
aoqi@0 1718 int failures() const { return _failures; }
aoqi@0 1719 };
aoqi@0 1720
aoqi@0 1721 // Closure that finalizes the liveness counting data.
aoqi@0 1722 // Used during the cleanup pause.
aoqi@0 1723 // Sets the bits corresponding to the interval [NTAMS, top]
aoqi@0 1724 // (which contains the implicitly live objects) in the
aoqi@0 1725 // card liveness bitmap. Also sets the bit for each region,
aoqi@0 1726 // containing live data, in the region liveness bitmap.
aoqi@0 1727
aoqi@0 1728 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
aoqi@0 1729 public:
aoqi@0 1730 FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
aoqi@0 1731 BitMap* region_bm,
aoqi@0 1732 BitMap* card_bm) :
aoqi@0 1733 CMCountDataClosureBase(g1h, region_bm, card_bm) { }
aoqi@0 1734
aoqi@0 1735 bool doHeapRegion(HeapRegion* hr) {
aoqi@0 1736
aoqi@0 1737 if (hr->continuesHumongous()) {
aoqi@0 1738 // We will ignore these here and process them when their
aoqi@0 1739 // associated "starts humongous" region is processed (see
aoqi@0 1740 // set_bit_for_heap_region()). Note that we cannot rely on their
aoqi@0 1741 // associated "starts humongous" region to have their bit set to
aoqi@0 1742 // 1 since, due to the region chunking in the parallel region
aoqi@0 1743 // iteration, a "continues humongous" region might be visited
aoqi@0 1744 // before its associated "starts humongous".
aoqi@0 1745 return false;
aoqi@0 1746 }
aoqi@0 1747
aoqi@0 1748 HeapWord* ntams = hr->next_top_at_mark_start();
aoqi@0 1749 HeapWord* top = hr->top();
aoqi@0 1750
aoqi@0 1751 assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
aoqi@0 1752
aoqi@0 1753 // Mark the allocated-since-marking portion...
aoqi@0 1754 if (ntams < top) {
aoqi@0 1755 // This definitely means the region has live objects.
aoqi@0 1756 set_bit_for_region(hr);
aoqi@0 1757
aoqi@0 1758 // Now set the bits in the card bitmap for [ntams, top)
aoqi@0 1759 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
aoqi@0 1760 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
aoqi@0 1761
aoqi@0 1762 // Note: if we're looking at the last region in heap - top
aoqi@0 1763 // could be actually just beyond the end of the heap; end_idx
aoqi@0 1764 // will then correspond to a (non-existent) card that is also
aoqi@0 1765 // just beyond the heap.
aoqi@0 1766 if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
aoqi@0 1767 // end of object is not card aligned - increment to cover
aoqi@0 1768 // all the cards spanned by the object
aoqi@0 1769 end_idx += 1;
aoqi@0 1770 }
aoqi@0 1771
aoqi@0 1772 assert(end_idx <= _card_bm->size(),
aoqi@0 1773 err_msg("oob: end_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
aoqi@0 1774 end_idx, _card_bm->size()));
aoqi@0 1775 assert(start_idx < _card_bm->size(),
aoqi@0 1776 err_msg("oob: start_idx= "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
aoqi@0 1777 start_idx, _card_bm->size()));
aoqi@0 1778
aoqi@0 1779 _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
aoqi@0 1780 }
aoqi@0 1781
aoqi@0 1782 // Set the bit for the region if it contains live data
aoqi@0 1783 if (hr->next_marked_bytes() > 0) {
aoqi@0 1784 set_bit_for_region(hr);
aoqi@0 1785 }
aoqi@0 1786
aoqi@0 1787 return false;
aoqi@0 1788 }
aoqi@0 1789 };
aoqi@0 1790
aoqi@0 1791 class G1ParFinalCountTask: public AbstractGangTask {
aoqi@0 1792 protected:
aoqi@0 1793 G1CollectedHeap* _g1h;
aoqi@0 1794 ConcurrentMark* _cm;
aoqi@0 1795 BitMap* _actual_region_bm;
aoqi@0 1796 BitMap* _actual_card_bm;
aoqi@0 1797
aoqi@0 1798 uint _n_workers;
aoqi@0 1799
aoqi@0 1800 public:
aoqi@0 1801 G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
aoqi@0 1802 : AbstractGangTask("G1 final counting"),
aoqi@0 1803 _g1h(g1h), _cm(_g1h->concurrent_mark()),
aoqi@0 1804 _actual_region_bm(region_bm), _actual_card_bm(card_bm),
aoqi@0 1805 _n_workers(0) {
aoqi@0 1806 // Use the value already set as the number of active threads
aoqi@0 1807 // in the call to run_task().
aoqi@0 1808 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1809 assert( _g1h->workers()->active_workers() > 0,
aoqi@0 1810 "Should have been previously set");
aoqi@0 1811 _n_workers = _g1h->workers()->active_workers();
aoqi@0 1812 } else {
aoqi@0 1813 _n_workers = 1;
aoqi@0 1814 }
aoqi@0 1815 }
aoqi@0 1816
aoqi@0 1817 void work(uint worker_id) {
aoqi@0 1818 assert(worker_id < _n_workers, "invariant");
aoqi@0 1819
aoqi@0 1820 FinalCountDataUpdateClosure final_update_cl(_g1h,
aoqi@0 1821 _actual_region_bm,
aoqi@0 1822 _actual_card_bm);
aoqi@0 1823
aoqi@0 1824 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1825 _g1h->heap_region_par_iterate_chunked(&final_update_cl,
aoqi@0 1826 worker_id,
aoqi@0 1827 _n_workers,
aoqi@0 1828 HeapRegion::FinalCountClaimValue);
aoqi@0 1829 } else {
aoqi@0 1830 _g1h->heap_region_iterate(&final_update_cl);
aoqi@0 1831 }
aoqi@0 1832 }
aoqi@0 1833 };
aoqi@0 1834
aoqi@0 1835 class G1ParNoteEndTask;
aoqi@0 1836
aoqi@0 1837 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
aoqi@0 1838 G1CollectedHeap* _g1;
aoqi@0 1839 size_t _max_live_bytes;
aoqi@0 1840 uint _regions_claimed;
aoqi@0 1841 size_t _freed_bytes;
aoqi@0 1842 FreeRegionList* _local_cleanup_list;
aoqi@0 1843 HeapRegionSetCount _old_regions_removed;
aoqi@0 1844 HeapRegionSetCount _humongous_regions_removed;
aoqi@0 1845 HRRSCleanupTask* _hrrs_cleanup_task;
aoqi@0 1846 double _claimed_region_time;
aoqi@0 1847 double _max_region_time;
aoqi@0 1848
aoqi@0 1849 public:
aoqi@0 1850 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
aoqi@0 1851 FreeRegionList* local_cleanup_list,
aoqi@0 1852 HRRSCleanupTask* hrrs_cleanup_task) :
aoqi@0 1853 _g1(g1),
aoqi@0 1854 _max_live_bytes(0), _regions_claimed(0),
aoqi@0 1855 _freed_bytes(0),
aoqi@0 1856 _claimed_region_time(0.0), _max_region_time(0.0),
aoqi@0 1857 _local_cleanup_list(local_cleanup_list),
aoqi@0 1858 _old_regions_removed(),
aoqi@0 1859 _humongous_regions_removed(),
aoqi@0 1860 _hrrs_cleanup_task(hrrs_cleanup_task) { }
aoqi@0 1861
aoqi@0 1862 size_t freed_bytes() { return _freed_bytes; }
aoqi@0 1863 const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
aoqi@0 1864 const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
aoqi@0 1865
aoqi@0 1866 bool doHeapRegion(HeapRegion *hr) {
aoqi@0 1867 if (hr->continuesHumongous()) {
aoqi@0 1868 return false;
aoqi@0 1869 }
aoqi@0 1870 // We use a claim value of zero here because all regions
aoqi@0 1871 // were claimed with value 1 in the FinalCount task.
aoqi@0 1872 _g1->reset_gc_time_stamps(hr);
aoqi@0 1873 double start = os::elapsedTime();
aoqi@0 1874 _regions_claimed++;
aoqi@0 1875 hr->note_end_of_marking();
aoqi@0 1876 _max_live_bytes += hr->max_live_bytes();
aoqi@0 1877
aoqi@0 1878 if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
aoqi@0 1879 _freed_bytes += hr->used();
aoqi@0 1880 hr->set_containing_set(NULL);
aoqi@0 1881 if (hr->isHumongous()) {
aoqi@0 1882 assert(hr->startsHumongous(), "we should only see starts humongous");
aoqi@0 1883 _humongous_regions_removed.increment(1u, hr->capacity());
aoqi@0 1884 _g1->free_humongous_region(hr, _local_cleanup_list, true);
aoqi@0 1885 } else {
aoqi@0 1886 _old_regions_removed.increment(1u, hr->capacity());
aoqi@0 1887 _g1->free_region(hr, _local_cleanup_list, true);
aoqi@0 1888 }
aoqi@0 1889 } else {
aoqi@0 1890 hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
aoqi@0 1891 }
aoqi@0 1892
aoqi@0 1893 double region_time = (os::elapsedTime() - start);
aoqi@0 1894 _claimed_region_time += region_time;
aoqi@0 1895 if (region_time > _max_region_time) {
aoqi@0 1896 _max_region_time = region_time;
aoqi@0 1897 }
aoqi@0 1898 return false;
aoqi@0 1899 }
aoqi@0 1900
aoqi@0 1901 size_t max_live_bytes() { return _max_live_bytes; }
aoqi@0 1902 uint regions_claimed() { return _regions_claimed; }
aoqi@0 1903 double claimed_region_time_sec() { return _claimed_region_time; }
aoqi@0 1904 double max_region_time_sec() { return _max_region_time; }
aoqi@0 1905 };
aoqi@0 1906
aoqi@0 1907 class G1ParNoteEndTask: public AbstractGangTask {
aoqi@0 1908 friend class G1NoteEndOfConcMarkClosure;
aoqi@0 1909
aoqi@0 1910 protected:
aoqi@0 1911 G1CollectedHeap* _g1h;
aoqi@0 1912 size_t _max_live_bytes;
aoqi@0 1913 size_t _freed_bytes;
aoqi@0 1914 FreeRegionList* _cleanup_list;
aoqi@0 1915
aoqi@0 1916 public:
aoqi@0 1917 G1ParNoteEndTask(G1CollectedHeap* g1h,
aoqi@0 1918 FreeRegionList* cleanup_list) :
aoqi@0 1919 AbstractGangTask("G1 note end"), _g1h(g1h),
aoqi@0 1920 _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
aoqi@0 1921
aoqi@0 1922 void work(uint worker_id) {
aoqi@0 1923 double start = os::elapsedTime();
aoqi@0 1924 FreeRegionList local_cleanup_list("Local Cleanup List");
aoqi@0 1925 HRRSCleanupTask hrrs_cleanup_task;
aoqi@0 1926 G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
aoqi@0 1927 &hrrs_cleanup_task);
aoqi@0 1928 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1929 _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
aoqi@0 1930 _g1h->workers()->active_workers(),
aoqi@0 1931 HeapRegion::NoteEndClaimValue);
aoqi@0 1932 } else {
aoqi@0 1933 _g1h->heap_region_iterate(&g1_note_end);
aoqi@0 1934 }
aoqi@0 1935 assert(g1_note_end.complete(), "Shouldn't have yielded!");
aoqi@0 1936
aoqi@0 1937 // Now update the lists
aoqi@0 1938 _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
aoqi@0 1939 {
aoqi@0 1940 MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 1941 _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
aoqi@0 1942 _max_live_bytes += g1_note_end.max_live_bytes();
aoqi@0 1943 _freed_bytes += g1_note_end.freed_bytes();
aoqi@0 1944
aoqi@0 1945 // If we iterate over the global cleanup list at the end of
aoqi@0 1946 // cleanup to do this printing we will not guarantee to only
aoqi@0 1947 // generate output for the newly-reclaimed regions (the list
aoqi@0 1948 // might not be empty at the beginning of cleanup; we might
aoqi@0 1949 // still be working on its previous contents). So we do the
aoqi@0 1950 // printing here, before we append the new regions to the global
aoqi@0 1951 // cleanup list.
aoqi@0 1952
aoqi@0 1953 G1HRPrinter* hr_printer = _g1h->hr_printer();
aoqi@0 1954 if (hr_printer->is_active()) {
aoqi@0 1955 FreeRegionListIterator iter(&local_cleanup_list);
aoqi@0 1956 while (iter.more_available()) {
aoqi@0 1957 HeapRegion* hr = iter.get_next();
aoqi@0 1958 hr_printer->cleanup(hr);
aoqi@0 1959 }
aoqi@0 1960 }
aoqi@0 1961
aoqi@0 1962 _cleanup_list->add_ordered(&local_cleanup_list);
aoqi@0 1963 assert(local_cleanup_list.is_empty(), "post-condition");
aoqi@0 1964
aoqi@0 1965 HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
aoqi@0 1966 }
aoqi@0 1967 }
aoqi@0 1968 size_t max_live_bytes() { return _max_live_bytes; }
aoqi@0 1969 size_t freed_bytes() { return _freed_bytes; }
aoqi@0 1970 };
aoqi@0 1971
aoqi@0 1972 class G1ParScrubRemSetTask: public AbstractGangTask {
aoqi@0 1973 protected:
aoqi@0 1974 G1RemSet* _g1rs;
aoqi@0 1975 BitMap* _region_bm;
aoqi@0 1976 BitMap* _card_bm;
aoqi@0 1977 public:
aoqi@0 1978 G1ParScrubRemSetTask(G1CollectedHeap* g1h,
aoqi@0 1979 BitMap* region_bm, BitMap* card_bm) :
aoqi@0 1980 AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
aoqi@0 1981 _region_bm(region_bm), _card_bm(card_bm) { }
aoqi@0 1982
aoqi@0 1983 void work(uint worker_id) {
aoqi@0 1984 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 1985 _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
aoqi@0 1986 HeapRegion::ScrubRemSetClaimValue);
aoqi@0 1987 } else {
aoqi@0 1988 _g1rs->scrub(_region_bm, _card_bm);
aoqi@0 1989 }
aoqi@0 1990 }
aoqi@0 1991
aoqi@0 1992 };
aoqi@0 1993
aoqi@0 1994 void ConcurrentMark::cleanup() {
aoqi@0 1995 // world is stopped at this checkpoint
aoqi@0 1996 assert(SafepointSynchronize::is_at_safepoint(),
aoqi@0 1997 "world should be stopped");
aoqi@0 1998 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 1999
aoqi@0 2000 // If a full collection has happened, we shouldn't do this.
aoqi@0 2001 if (has_aborted()) {
aoqi@0 2002 g1h->set_marking_complete(); // So bitmap clearing isn't confused
aoqi@0 2003 return;
aoqi@0 2004 }
aoqi@0 2005
aoqi@0 2006 g1h->verify_region_sets_optional();
aoqi@0 2007
aoqi@0 2008 if (VerifyDuringGC) {
aoqi@0 2009 HandleMark hm; // handle scope
aoqi@0 2010 Universe::heap()->prepare_for_verify();
aoqi@0 2011 Universe::verify(VerifyOption_G1UsePrevMarking,
aoqi@0 2012 " VerifyDuringGC:(before)");
aoqi@0 2013 }
brutisso@7005 2014 g1h->check_bitmaps("Cleanup Start");
aoqi@0 2015
aoqi@0 2016 G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
aoqi@0 2017 g1p->record_concurrent_mark_cleanup_start();
aoqi@0 2018
aoqi@0 2019 double start = os::elapsedTime();
aoqi@0 2020
aoqi@0 2021 HeapRegionRemSet::reset_for_cleanup_tasks();
aoqi@0 2022
aoqi@0 2023 uint n_workers;
aoqi@0 2024
aoqi@0 2025 // Do counting once more with the world stopped for good measure.
aoqi@0 2026 G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
aoqi@0 2027
aoqi@0 2028 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 2029 assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
aoqi@0 2030 "sanity check");
aoqi@0 2031
aoqi@0 2032 g1h->set_par_threads();
aoqi@0 2033 n_workers = g1h->n_par_threads();
aoqi@0 2034 assert(g1h->n_par_threads() == n_workers,
aoqi@0 2035 "Should not have been reset");
aoqi@0 2036 g1h->workers()->run_task(&g1_par_count_task);
aoqi@0 2037 // Done with the parallel phase so reset to 0.
aoqi@0 2038 g1h->set_par_threads(0);
aoqi@0 2039
aoqi@0 2040 assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
aoqi@0 2041 "sanity check");
aoqi@0 2042 } else {
aoqi@0 2043 n_workers = 1;
aoqi@0 2044 g1_par_count_task.work(0);
aoqi@0 2045 }
aoqi@0 2046
aoqi@0 2047 if (VerifyDuringGC) {
aoqi@0 2048 // Verify that the counting data accumulated during marking matches
aoqi@0 2049 // that calculated by walking the marking bitmap.
aoqi@0 2050
aoqi@0 2051 // Bitmaps to hold expected values
mgerdin@6977 2052 BitMap expected_region_bm(_region_bm.size(), true);
mgerdin@6977 2053 BitMap expected_card_bm(_card_bm.size(), true);
aoqi@0 2054
aoqi@0 2055 G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
aoqi@0 2056 &_region_bm,
aoqi@0 2057 &_card_bm,
aoqi@0 2058 &expected_region_bm,
aoqi@0 2059 &expected_card_bm);
aoqi@0 2060
aoqi@0 2061 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 2062 g1h->set_par_threads((int)n_workers);
aoqi@0 2063 g1h->workers()->run_task(&g1_par_verify_task);
aoqi@0 2064 // Done with the parallel phase so reset to 0.
aoqi@0 2065 g1h->set_par_threads(0);
aoqi@0 2066
aoqi@0 2067 assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
aoqi@0 2068 "sanity check");
aoqi@0 2069 } else {
aoqi@0 2070 g1_par_verify_task.work(0);
aoqi@0 2071 }
aoqi@0 2072
aoqi@0 2073 guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
aoqi@0 2074 }
aoqi@0 2075
aoqi@0 2076 size_t start_used_bytes = g1h->used();
aoqi@0 2077 g1h->set_marking_complete();
aoqi@0 2078
aoqi@0 2079 double count_end = os::elapsedTime();
aoqi@0 2080 double this_final_counting_time = (count_end - start);
aoqi@0 2081 _total_counting_time += this_final_counting_time;
aoqi@0 2082
aoqi@0 2083 if (G1PrintRegionLivenessInfo) {
aoqi@0 2084 G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
aoqi@0 2085 _g1h->heap_region_iterate(&cl);
aoqi@0 2086 }
aoqi@0 2087
aoqi@0 2088 // Install newly created mark bitMap as "prev".
aoqi@0 2089 swapMarkBitMaps();
aoqi@0 2090
aoqi@0 2091 g1h->reset_gc_time_stamp();
aoqi@0 2092
aoqi@0 2093 // Note end of marking in all heap regions.
aoqi@0 2094 G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
aoqi@0 2095 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 2096 g1h->set_par_threads((int)n_workers);
aoqi@0 2097 g1h->workers()->run_task(&g1_par_note_end_task);
aoqi@0 2098 g1h->set_par_threads(0);
aoqi@0 2099
aoqi@0 2100 assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
aoqi@0 2101 "sanity check");
aoqi@0 2102 } else {
aoqi@0 2103 g1_par_note_end_task.work(0);
aoqi@0 2104 }
aoqi@0 2105 g1h->check_gc_time_stamps();
aoqi@0 2106
aoqi@0 2107 if (!cleanup_list_is_empty()) {
aoqi@0 2108 // The cleanup list is not empty, so we'll have to process it
aoqi@0 2109 // concurrently. Notify anyone else that might be wanting free
aoqi@0 2110 // regions that there will be more free regions coming soon.
aoqi@0 2111 g1h->set_free_regions_coming();
aoqi@0 2112 }
aoqi@0 2113
aoqi@0 2114 // call below, since it affects the metric by which we sort the heap
aoqi@0 2115 // regions.
aoqi@0 2116 if (G1ScrubRemSets) {
aoqi@0 2117 double rs_scrub_start = os::elapsedTime();
aoqi@0 2118 G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
aoqi@0 2119 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 2120 g1h->set_par_threads((int)n_workers);
aoqi@0 2121 g1h->workers()->run_task(&g1_par_scrub_rs_task);
aoqi@0 2122 g1h->set_par_threads(0);
aoqi@0 2123
aoqi@0 2124 assert(g1h->check_heap_region_claim_values(
aoqi@0 2125 HeapRegion::ScrubRemSetClaimValue),
aoqi@0 2126 "sanity check");
aoqi@0 2127 } else {
aoqi@0 2128 g1_par_scrub_rs_task.work(0);
aoqi@0 2129 }
aoqi@0 2130
aoqi@0 2131 double rs_scrub_end = os::elapsedTime();
aoqi@0 2132 double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
aoqi@0 2133 _total_rs_scrub_time += this_rs_scrub_time;
aoqi@0 2134 }
aoqi@0 2135
aoqi@0 2136 // this will also free any regions totally full of garbage objects,
aoqi@0 2137 // and sort the regions.
aoqi@0 2138 g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
aoqi@0 2139
aoqi@0 2140 // Statistics.
aoqi@0 2141 double end = os::elapsedTime();
aoqi@0 2142 _cleanup_times.add((end - start) * 1000.0);
aoqi@0 2143
aoqi@0 2144 if (G1Log::fine()) {
aoqi@0 2145 g1h->print_size_transition(gclog_or_tty,
aoqi@0 2146 start_used_bytes,
aoqi@0 2147 g1h->used(),
aoqi@0 2148 g1h->capacity());
aoqi@0 2149 }
aoqi@0 2150
aoqi@0 2151 // Clean up will have freed any regions completely full of garbage.
aoqi@0 2152 // Update the soft reference policy with the new heap occupancy.
aoqi@0 2153 Universe::update_heap_info_at_gc();
aoqi@0 2154
aoqi@0 2155 if (VerifyDuringGC) {
aoqi@0 2156 HandleMark hm; // handle scope
aoqi@0 2157 Universe::heap()->prepare_for_verify();
aoqi@0 2158 Universe::verify(VerifyOption_G1UsePrevMarking,
aoqi@0 2159 " VerifyDuringGC:(after)");
aoqi@0 2160 }
brutisso@7005 2161 g1h->check_bitmaps("Cleanup End");
aoqi@0 2162
aoqi@0 2163 g1h->verify_region_sets_optional();
stefank@6992 2164
stefank@6992 2165 // We need to make this be a "collection" so any collection pause that
stefank@6992 2166 // races with it goes around and waits for completeCleanup to finish.
stefank@6992 2167 g1h->increment_total_collections();
stefank@6992 2168
stefank@6992 2169 // Clean out dead classes and update Metaspace sizes.
stefank@6996 2170 if (ClassUnloadingWithConcurrentMark) {
stefank@6996 2171 ClassLoaderDataGraph::purge();
stefank@6996 2172 }
stefank@6992 2173 MetaspaceGC::compute_new_size();
stefank@6992 2174
stefank@6992 2175 // We reclaimed old regions so we should calculate the sizes to make
stefank@6992 2176 // sure we update the old gen/space data.
stefank@6992 2177 g1h->g1mm()->update_sizes();
sjohanss@7370 2178 g1h->allocation_context_stats().update_after_mark();
stefank@6992 2179
aoqi@0 2180 g1h->trace_heap_after_concurrent_cycle();
aoqi@0 2181 }
aoqi@0 2182
aoqi@0 2183 void ConcurrentMark::completeCleanup() {
aoqi@0 2184 if (has_aborted()) return;
aoqi@0 2185
aoqi@0 2186 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 2187
aoqi@0 2188 _cleanup_list.verify_optional();
aoqi@0 2189 FreeRegionList tmp_free_list("Tmp Free List");
aoqi@0 2190
aoqi@0 2191 if (G1ConcRegionFreeingVerbose) {
aoqi@0 2192 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
aoqi@0 2193 "cleanup list has %u entries",
aoqi@0 2194 _cleanup_list.length());
aoqi@0 2195 }
aoqi@0 2196
tschatzl@7051 2197 // No one else should be accessing the _cleanup_list at this point,
tschatzl@7051 2198 // so it is not necessary to take any locks
aoqi@0 2199 while (!_cleanup_list.is_empty()) {
tschatzl@7050 2200 HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
aoqi@0 2201 assert(hr != NULL, "Got NULL from a non-empty list");
aoqi@0 2202 hr->par_clear();
aoqi@0 2203 tmp_free_list.add_ordered(hr);
aoqi@0 2204
aoqi@0 2205 // Instead of adding one region at a time to the secondary_free_list,
aoqi@0 2206 // we accumulate them in the local list and move them a few at a
aoqi@0 2207 // time. This also cuts down on the number of notify_all() calls
aoqi@0 2208 // we do during this process. We'll also append the local list when
aoqi@0 2209 // _cleanup_list is empty (which means we just removed the last
aoqi@0 2210 // region from the _cleanup_list).
aoqi@0 2211 if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
aoqi@0 2212 _cleanup_list.is_empty()) {
aoqi@0 2213 if (G1ConcRegionFreeingVerbose) {
aoqi@0 2214 gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
aoqi@0 2215 "appending %u entries to the secondary_free_list, "
aoqi@0 2216 "cleanup list still has %u entries",
aoqi@0 2217 tmp_free_list.length(),
aoqi@0 2218 _cleanup_list.length());
aoqi@0 2219 }
aoqi@0 2220
aoqi@0 2221 {
aoqi@0 2222 MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
aoqi@0 2223 g1h->secondary_free_list_add(&tmp_free_list);
aoqi@0 2224 SecondaryFreeList_lock->notify_all();
aoqi@0 2225 }
aoqi@0 2226
aoqi@0 2227 if (G1StressConcRegionFreeing) {
aoqi@0 2228 for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
aoqi@0 2229 os::sleep(Thread::current(), (jlong) 1, false);
aoqi@0 2230 }
aoqi@0 2231 }
aoqi@0 2232 }
aoqi@0 2233 }
aoqi@0 2234 assert(tmp_free_list.is_empty(), "post-condition");
aoqi@0 2235 }
aoqi@0 2236
aoqi@0 2237 // Supporting Object and Oop closures for reference discovery
aoqi@0 2238 // and processing in during marking
aoqi@0 2239
aoqi@0 2240 bool G1CMIsAliveClosure::do_object_b(oop obj) {
aoqi@0 2241 HeapWord* addr = (HeapWord*)obj;
aoqi@0 2242 return addr != NULL &&
aoqi@0 2243 (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
aoqi@0 2244 }
aoqi@0 2245
aoqi@0 2246 // 'Keep Alive' oop closure used by both serial parallel reference processing.
aoqi@0 2247 // Uses the CMTask associated with a worker thread (for serial reference
aoqi@0 2248 // processing the CMTask for worker 0 is used) to preserve (mark) and
aoqi@0 2249 // trace referent objects.
aoqi@0 2250 //
aoqi@0 2251 // Using the CMTask and embedded local queues avoids having the worker
aoqi@0 2252 // threads operating on the global mark stack. This reduces the risk
aoqi@0 2253 // of overflowing the stack - which we would rather avoid at this late
aoqi@0 2254 // state. Also using the tasks' local queues removes the potential
aoqi@0 2255 // of the workers interfering with each other that could occur if
aoqi@0 2256 // operating on the global stack.
aoqi@0 2257
aoqi@0 2258 class G1CMKeepAliveAndDrainClosure: public OopClosure {
aoqi@0 2259 ConcurrentMark* _cm;
aoqi@0 2260 CMTask* _task;
aoqi@0 2261 int _ref_counter_limit;
aoqi@0 2262 int _ref_counter;
aoqi@0 2263 bool _is_serial;
aoqi@0 2264 public:
aoqi@0 2265 G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
aoqi@0 2266 _cm(cm), _task(task), _is_serial(is_serial),
aoqi@0 2267 _ref_counter_limit(G1RefProcDrainInterval) {
aoqi@0 2268 assert(_ref_counter_limit > 0, "sanity");
aoqi@0 2269 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
aoqi@0 2270 _ref_counter = _ref_counter_limit;
aoqi@0 2271 }
aoqi@0 2272
aoqi@0 2273 virtual void do_oop(narrowOop* p) { do_oop_work(p); }
aoqi@0 2274 virtual void do_oop( oop* p) { do_oop_work(p); }
aoqi@0 2275
aoqi@0 2276 template <class T> void do_oop_work(T* p) {
aoqi@0 2277 if (!_cm->has_overflown()) {
aoqi@0 2278 oop obj = oopDesc::load_decode_heap_oop(p);
aoqi@0 2279 if (_cm->verbose_high()) {
aoqi@0 2280 gclog_or_tty->print_cr("\t[%u] we're looking at location "
aoqi@0 2281 "*"PTR_FORMAT" = "PTR_FORMAT,
aoqi@0 2282 _task->worker_id(), p2i(p), p2i((void*) obj));
aoqi@0 2283 }
aoqi@0 2284
aoqi@0 2285 _task->deal_with_reference(obj);
aoqi@0 2286 _ref_counter--;
aoqi@0 2287
aoqi@0 2288 if (_ref_counter == 0) {
aoqi@0 2289 // We have dealt with _ref_counter_limit references, pushing them
aoqi@0 2290 // and objects reachable from them on to the local stack (and
aoqi@0 2291 // possibly the global stack). Call CMTask::do_marking_step() to
aoqi@0 2292 // process these entries.
aoqi@0 2293 //
aoqi@0 2294 // We call CMTask::do_marking_step() in a loop, which we'll exit if
aoqi@0 2295 // there's nothing more to do (i.e. we're done with the entries that
aoqi@0 2296 // were pushed as a result of the CMTask::deal_with_reference() calls
aoqi@0 2297 // above) or we overflow.
aoqi@0 2298 //
aoqi@0 2299 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
aoqi@0 2300 // flag while there may still be some work to do. (See the comment at
aoqi@0 2301 // the beginning of CMTask::do_marking_step() for those conditions -
aoqi@0 2302 // one of which is reaching the specified time target.) It is only
aoqi@0 2303 // when CMTask::do_marking_step() returns without setting the
aoqi@0 2304 // has_aborted() flag that the marking step has completed.
aoqi@0 2305 do {
aoqi@0 2306 double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
aoqi@0 2307 _task->do_marking_step(mark_step_duration_ms,
aoqi@0 2308 false /* do_termination */,
aoqi@0 2309 _is_serial);
aoqi@0 2310 } while (_task->has_aborted() && !_cm->has_overflown());
aoqi@0 2311 _ref_counter = _ref_counter_limit;
aoqi@0 2312 }
aoqi@0 2313 } else {
aoqi@0 2314 if (_cm->verbose_high()) {
aoqi@0 2315 gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
aoqi@0 2316 }
aoqi@0 2317 }
aoqi@0 2318 }
aoqi@0 2319 };
aoqi@0 2320
aoqi@0 2321 // 'Drain' oop closure used by both serial and parallel reference processing.
aoqi@0 2322 // Uses the CMTask associated with a given worker thread (for serial
aoqi@0 2323 // reference processing the CMtask for worker 0 is used). Calls the
aoqi@0 2324 // do_marking_step routine, with an unbelievably large timeout value,
aoqi@0 2325 // to drain the marking data structures of the remaining entries
aoqi@0 2326 // added by the 'keep alive' oop closure above.
aoqi@0 2327
aoqi@0 2328 class G1CMDrainMarkingStackClosure: public VoidClosure {
aoqi@0 2329 ConcurrentMark* _cm;
aoqi@0 2330 CMTask* _task;
aoqi@0 2331 bool _is_serial;
aoqi@0 2332 public:
aoqi@0 2333 G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
aoqi@0 2334 _cm(cm), _task(task), _is_serial(is_serial) {
aoqi@0 2335 assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
aoqi@0 2336 }
aoqi@0 2337
aoqi@0 2338 void do_void() {
aoqi@0 2339 do {
aoqi@0 2340 if (_cm->verbose_high()) {
aoqi@0 2341 gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
aoqi@0 2342 _task->worker_id(), BOOL_TO_STR(_is_serial));
aoqi@0 2343 }
aoqi@0 2344
aoqi@0 2345 // We call CMTask::do_marking_step() to completely drain the local
aoqi@0 2346 // and global marking stacks of entries pushed by the 'keep alive'
aoqi@0 2347 // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
aoqi@0 2348 //
aoqi@0 2349 // CMTask::do_marking_step() is called in a loop, which we'll exit
aoqi@0 2350 // if there's nothing more to do (i.e. we'completely drained the
aoqi@0 2351 // entries that were pushed as a a result of applying the 'keep alive'
aoqi@0 2352 // closure to the entries on the discovered ref lists) or we overflow
aoqi@0 2353 // the global marking stack.
aoqi@0 2354 //
aoqi@0 2355 // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
aoqi@0 2356 // flag while there may still be some work to do. (See the comment at
aoqi@0 2357 // the beginning of CMTask::do_marking_step() for those conditions -
aoqi@0 2358 // one of which is reaching the specified time target.) It is only
aoqi@0 2359 // when CMTask::do_marking_step() returns without setting the
aoqi@0 2360 // has_aborted() flag that the marking step has completed.
aoqi@0 2361
aoqi@0 2362 _task->do_marking_step(1000000000.0 /* something very large */,
aoqi@0 2363 true /* do_termination */,
aoqi@0 2364 _is_serial);
aoqi@0 2365 } while (_task->has_aborted() && !_cm->has_overflown());
aoqi@0 2366 }
aoqi@0 2367 };
aoqi@0 2368
aoqi@0 2369 // Implementation of AbstractRefProcTaskExecutor for parallel
aoqi@0 2370 // reference processing at the end of G1 concurrent marking
aoqi@0 2371
aoqi@0 2372 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
aoqi@0 2373 private:
aoqi@0 2374 G1CollectedHeap* _g1h;
aoqi@0 2375 ConcurrentMark* _cm;
aoqi@0 2376 WorkGang* _workers;
aoqi@0 2377 int _active_workers;
aoqi@0 2378
aoqi@0 2379 public:
aoqi@0 2380 G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
aoqi@0 2381 ConcurrentMark* cm,
aoqi@0 2382 WorkGang* workers,
aoqi@0 2383 int n_workers) :
aoqi@0 2384 _g1h(g1h), _cm(cm),
aoqi@0 2385 _workers(workers), _active_workers(n_workers) { }
aoqi@0 2386
aoqi@0 2387 // Executes the given task using concurrent marking worker threads.
aoqi@0 2388 virtual void execute(ProcessTask& task);
aoqi@0 2389 virtual void execute(EnqueueTask& task);
aoqi@0 2390 };
aoqi@0 2391
aoqi@0 2392 class G1CMRefProcTaskProxy: public AbstractGangTask {
aoqi@0 2393 typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
aoqi@0 2394 ProcessTask& _proc_task;
aoqi@0 2395 G1CollectedHeap* _g1h;
aoqi@0 2396 ConcurrentMark* _cm;
aoqi@0 2397
aoqi@0 2398 public:
aoqi@0 2399 G1CMRefProcTaskProxy(ProcessTask& proc_task,
aoqi@0 2400 G1CollectedHeap* g1h,
aoqi@0 2401 ConcurrentMark* cm) :
aoqi@0 2402 AbstractGangTask("Process reference objects in parallel"),
aoqi@0 2403 _proc_task(proc_task), _g1h(g1h), _cm(cm) {
aoqi@0 2404 ReferenceProcessor* rp = _g1h->ref_processor_cm();
aoqi@0 2405 assert(rp->processing_is_mt(), "shouldn't be here otherwise");
aoqi@0 2406 }
aoqi@0 2407
aoqi@0 2408 virtual void work(uint worker_id) {
mdoerr@7020 2409 ResourceMark rm;
mdoerr@7020 2410 HandleMark hm;
aoqi@0 2411 CMTask* task = _cm->task(worker_id);
aoqi@0 2412 G1CMIsAliveClosure g1_is_alive(_g1h);
aoqi@0 2413 G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
aoqi@0 2414 G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
aoqi@0 2415
aoqi@0 2416 _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
aoqi@0 2417 }
aoqi@0 2418 };
aoqi@0 2419
aoqi@0 2420 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
aoqi@0 2421 assert(_workers != NULL, "Need parallel worker threads.");
aoqi@0 2422 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
aoqi@0 2423
aoqi@0 2424 G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
aoqi@0 2425
aoqi@0 2426 // We need to reset the concurrency level before each
aoqi@0 2427 // proxy task execution, so that the termination protocol
aoqi@0 2428 // and overflow handling in CMTask::do_marking_step() knows
aoqi@0 2429 // how many workers to wait for.
aoqi@0 2430 _cm->set_concurrency(_active_workers);
aoqi@0 2431 _g1h->set_par_threads(_active_workers);
aoqi@0 2432 _workers->run_task(&proc_task_proxy);
aoqi@0 2433 _g1h->set_par_threads(0);
aoqi@0 2434 }
aoqi@0 2435
aoqi@0 2436 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
aoqi@0 2437 typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
aoqi@0 2438 EnqueueTask& _enq_task;
aoqi@0 2439
aoqi@0 2440 public:
aoqi@0 2441 G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
aoqi@0 2442 AbstractGangTask("Enqueue reference objects in parallel"),
aoqi@0 2443 _enq_task(enq_task) { }
aoqi@0 2444
aoqi@0 2445 virtual void work(uint worker_id) {
aoqi@0 2446 _enq_task.work(worker_id);
aoqi@0 2447 }
aoqi@0 2448 };
aoqi@0 2449
aoqi@0 2450 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
aoqi@0 2451 assert(_workers != NULL, "Need parallel worker threads.");
aoqi@0 2452 assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
aoqi@0 2453
aoqi@0 2454 G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
aoqi@0 2455
aoqi@0 2456 // Not strictly necessary but...
aoqi@0 2457 //
aoqi@0 2458 // We need to reset the concurrency level before each
aoqi@0 2459 // proxy task execution, so that the termination protocol
aoqi@0 2460 // and overflow handling in CMTask::do_marking_step() knows
aoqi@0 2461 // how many workers to wait for.
aoqi@0 2462 _cm->set_concurrency(_active_workers);
aoqi@0 2463 _g1h->set_par_threads(_active_workers);
aoqi@0 2464 _workers->run_task(&enq_task_proxy);
aoqi@0 2465 _g1h->set_par_threads(0);
aoqi@0 2466 }
aoqi@0 2467
stefank@6992 2468 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
stefank@6992 2469 G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
stefank@6992 2470 }
stefank@6992 2471
stefank@6992 2472 // Helper class to get rid of some boilerplate code.
stefank@6992 2473 class G1RemarkGCTraceTime : public GCTraceTime {
stefank@6992 2474 static bool doit_and_prepend(bool doit) {
stefank@6992 2475 if (doit) {
stefank@6992 2476 gclog_or_tty->put(' ');
stefank@6992 2477 }
stefank@6992 2478 return doit;
stefank@6992 2479 }
stefank@6992 2480
stefank@6992 2481 public:
stefank@6992 2482 G1RemarkGCTraceTime(const char* title, bool doit)
stefank@6992 2483 : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
stefank@6992 2484 G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
stefank@6992 2485 }
stefank@6992 2486 };
stefank@6992 2487
aoqi@0 2488 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
aoqi@0 2489 if (has_overflown()) {
aoqi@0 2490 // Skip processing the discovered references if we have
aoqi@0 2491 // overflown the global marking stack. Reference objects
aoqi@0 2492 // only get discovered once so it is OK to not
aoqi@0 2493 // de-populate the discovered reference lists. We could have,
aoqi@0 2494 // but the only benefit would be that, when marking restarts,
aoqi@0 2495 // less reference objects are discovered.
aoqi@0 2496 return;
aoqi@0 2497 }
aoqi@0 2498
aoqi@0 2499 ResourceMark rm;
aoqi@0 2500 HandleMark hm;
aoqi@0 2501
aoqi@0 2502 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 2503
aoqi@0 2504 // Is alive closure.
aoqi@0 2505 G1CMIsAliveClosure g1_is_alive(g1h);
aoqi@0 2506
aoqi@0 2507 // Inner scope to exclude the cleaning of the string and symbol
aoqi@0 2508 // tables from the displayed time.
aoqi@0 2509 {
aoqi@0 2510 if (G1Log::finer()) {
aoqi@0 2511 gclog_or_tty->put(' ');
aoqi@0 2512 }
brutisso@6904 2513 GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
aoqi@0 2514
aoqi@0 2515 ReferenceProcessor* rp = g1h->ref_processor_cm();
aoqi@0 2516
aoqi@0 2517 // See the comment in G1CollectedHeap::ref_processing_init()
aoqi@0 2518 // about how reference processing currently works in G1.
aoqi@0 2519
aoqi@0 2520 // Set the soft reference policy
aoqi@0 2521 rp->setup_policy(clear_all_soft_refs);
aoqi@0 2522 assert(_markStack.isEmpty(), "mark stack should be empty");
aoqi@0 2523
aoqi@0 2524 // Instances of the 'Keep Alive' and 'Complete GC' closures used
aoqi@0 2525 // in serial reference processing. Note these closures are also
aoqi@0 2526 // used for serially processing (by the the current thread) the
aoqi@0 2527 // JNI references during parallel reference processing.
aoqi@0 2528 //
aoqi@0 2529 // These closures do not need to synchronize with the worker
aoqi@0 2530 // threads involved in parallel reference processing as these
aoqi@0 2531 // instances are executed serially by the current thread (e.g.
aoqi@0 2532 // reference processing is not multi-threaded and is thus
aoqi@0 2533 // performed by the current thread instead of a gang worker).
aoqi@0 2534 //
aoqi@0 2535 // The gang tasks involved in parallel reference procssing create
aoqi@0 2536 // their own instances of these closures, which do their own
aoqi@0 2537 // synchronization among themselves.
aoqi@0 2538 G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
aoqi@0 2539 G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
aoqi@0 2540
aoqi@0 2541 // We need at least one active thread. If reference processing
aoqi@0 2542 // is not multi-threaded we use the current (VMThread) thread,
aoqi@0 2543 // otherwise we use the work gang from the G1CollectedHeap and
aoqi@0 2544 // we utilize all the worker threads we can.
aoqi@0 2545 bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
aoqi@0 2546 uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
aoqi@0 2547 active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
aoqi@0 2548
aoqi@0 2549 // Parallel processing task executor.
aoqi@0 2550 G1CMRefProcTaskExecutor par_task_executor(g1h, this,
aoqi@0 2551 g1h->workers(), active_workers);
aoqi@0 2552 AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
aoqi@0 2553
aoqi@0 2554 // Set the concurrency level. The phase was already set prior to
aoqi@0 2555 // executing the remark task.
aoqi@0 2556 set_concurrency(active_workers);
aoqi@0 2557
aoqi@0 2558 // Set the degree of MT processing here. If the discovery was done MT,
aoqi@0 2559 // the number of threads involved during discovery could differ from
aoqi@0 2560 // the number of active workers. This is OK as long as the discovered
aoqi@0 2561 // Reference lists are balanced (see balance_all_queues() and balance_queues()).
aoqi@0 2562 rp->set_active_mt_degree(active_workers);
aoqi@0 2563
aoqi@0 2564 // Process the weak references.
aoqi@0 2565 const ReferenceProcessorStats& stats =
aoqi@0 2566 rp->process_discovered_references(&g1_is_alive,
aoqi@0 2567 &g1_keep_alive,
aoqi@0 2568 &g1_drain_mark_stack,
aoqi@0 2569 executor,
brutisso@6904 2570 g1h->gc_timer_cm(),
brutisso@6904 2571 concurrent_gc_id());
aoqi@0 2572 g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
aoqi@0 2573
aoqi@0 2574 // The do_oop work routines of the keep_alive and drain_marking_stack
aoqi@0 2575 // oop closures will set the has_overflown flag if we overflow the
aoqi@0 2576 // global marking stack.
aoqi@0 2577
aoqi@0 2578 assert(_markStack.overflow() || _markStack.isEmpty(),
aoqi@0 2579 "mark stack should be empty (unless it overflowed)");
aoqi@0 2580
aoqi@0 2581 if (_markStack.overflow()) {
aoqi@0 2582 // This should have been done already when we tried to push an
aoqi@0 2583 // entry on to the global mark stack. But let's do it again.
aoqi@0 2584 set_has_overflown();
aoqi@0 2585 }
aoqi@0 2586
aoqi@0 2587 assert(rp->num_q() == active_workers, "why not");
aoqi@0 2588
aoqi@0 2589 rp->enqueue_discovered_references(executor);
aoqi@0 2590
aoqi@0 2591 rp->verify_no_references_recorded();
aoqi@0 2592 assert(!rp->discovery_enabled(), "Post condition");
aoqi@0 2593 }
aoqi@0 2594
aoqi@0 2595 if (has_overflown()) {
aoqi@0 2596 // We can not trust g1_is_alive if the marking stack overflowed
aoqi@0 2597 return;
aoqi@0 2598 }
aoqi@0 2599
stefank@6992 2600 assert(_markStack.isEmpty(), "Marking should have completed");
stefank@6992 2601
stefank@6992 2602 // Unload Klasses, String, Symbols, Code Cache, etc.
stefank@6992 2603 {
stefank@6996 2604 G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
stefank@6996 2605
stefank@6996 2606 if (ClassUnloadingWithConcurrentMark) {
stefank@7333 2607 // Cleaning of klasses depends on correct information from MetadataMarkOnStack. The CodeCache::mark_on_stack
stefank@7333 2608 // part is too slow to be done serially, so it is handled during the weakRefsWorkParallelPart phase.
stefank@7333 2609 // Defer the cleaning until we have complete on_stack data.
stefank@7333 2610 MetadataOnStackMark md_on_stack(false /* Don't visit the code cache at this point */);
stefank@7333 2611
stefank@6996 2612 bool purged_classes;
stefank@6996 2613
stefank@6996 2614 {
stefank@6996 2615 G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
stefank@7333 2616 purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
stefank@6996 2617 }
stefank@6996 2618
stefank@6996 2619 {
stefank@6996 2620 G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
stefank@6996 2621 weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
stefank@6996 2622 }
stefank@7333 2623
stefank@7333 2624 {
stefank@7333 2625 G1RemarkGCTraceTime trace("Deallocate Metadata", G1Log::finest());
stefank@7333 2626 ClassLoaderDataGraph::free_deallocate_lists();
stefank@7333 2627 }
stefank@6996 2628 }
stefank@6996 2629
stefank@6996 2630 if (G1StringDedup::is_enabled()) {
stefank@6996 2631 G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
stefank@6996 2632 G1StringDedup::unlink(&g1_is_alive);
stefank@6996 2633 }
stefank@6992 2634 }
aoqi@0 2635 }
aoqi@0 2636
aoqi@0 2637 void ConcurrentMark::swapMarkBitMaps() {
aoqi@0 2638 CMBitMapRO* temp = _prevMarkBitMap;
aoqi@0 2639 _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap;
aoqi@0 2640 _nextMarkBitMap = (CMBitMap*) temp;
aoqi@0 2641 }
aoqi@0 2642
stefank@6992 2643 class CMObjectClosure;
stefank@6992 2644
stefank@6992 2645 // Closure for iterating over objects, currently only used for
stefank@6992 2646 // processing SATB buffers.
stefank@6992 2647 class CMObjectClosure : public ObjectClosure {
stefank@6992 2648 private:
stefank@6992 2649 CMTask* _task;
stefank@6992 2650
stefank@6992 2651 public:
stefank@6992 2652 void do_object(oop obj) {
stefank@6992 2653 _task->deal_with_reference(obj);
stefank@6992 2654 }
stefank@6992 2655
stefank@6992 2656 CMObjectClosure(CMTask* task) : _task(task) { }
stefank@6992 2657 };
stefank@6992 2658
stefank@6992 2659 class G1RemarkThreadsClosure : public ThreadClosure {
stefank@6992 2660 CMObjectClosure _cm_obj;
stefank@6992 2661 G1CMOopClosure _cm_cl;
stefank@6992 2662 MarkingCodeBlobClosure _code_cl;
stefank@6992 2663 int _thread_parity;
stefank@6992 2664 bool _is_par;
stefank@6992 2665
stefank@6992 2666 public:
stefank@6992 2667 G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
stefank@6992 2668 _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
stefank@6992 2669 _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
stefank@6992 2670
stefank@6992 2671 void do_thread(Thread* thread) {
stefank@6992 2672 if (thread->is_Java_thread()) {
stefank@6992 2673 if (thread->claim_oops_do(_is_par, _thread_parity)) {
stefank@6992 2674 JavaThread* jt = (JavaThread*)thread;
stefank@6992 2675
stefank@6992 2676 // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
stefank@6992 2677 // however the liveness of oops reachable from nmethods have very complex lifecycles:
stefank@6992 2678 // * Alive if on the stack of an executing method
stefank@6992 2679 // * Weakly reachable otherwise
stefank@6992 2680 // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
stefank@6992 2681 // live by the SATB invariant but other oops recorded in nmethods may behave differently.
stefank@6992 2682 jt->nmethods_do(&_code_cl);
stefank@6992 2683
stefank@6992 2684 jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
stefank@6992 2685 }
stefank@6992 2686 } else if (thread->is_VM_thread()) {
stefank@6992 2687 if (thread->claim_oops_do(_is_par, _thread_parity)) {
stefank@6992 2688 JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
stefank@6992 2689 }
stefank@6992 2690 }
stefank@6992 2691 }
stefank@6992 2692 };
stefank@6992 2693
aoqi@0 2694 class CMRemarkTask: public AbstractGangTask {
aoqi@0 2695 private:
aoqi@0 2696 ConcurrentMark* _cm;
aoqi@0 2697 bool _is_serial;
aoqi@0 2698 public:
aoqi@0 2699 void work(uint worker_id) {
aoqi@0 2700 // Since all available tasks are actually started, we should
aoqi@0 2701 // only proceed if we're supposed to be actived.
aoqi@0 2702 if (worker_id < _cm->active_tasks()) {
aoqi@0 2703 CMTask* task = _cm->task(worker_id);
aoqi@0 2704 task->record_start_time();
stefank@6992 2705 {
stefank@6992 2706 ResourceMark rm;
stefank@6992 2707 HandleMark hm;
stefank@6992 2708
stefank@6992 2709 G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
stefank@6992 2710 Threads::threads_do(&threads_f);
stefank@6992 2711 }
stefank@6992 2712
aoqi@0 2713 do {
aoqi@0 2714 task->do_marking_step(1000000000.0 /* something very large */,
aoqi@0 2715 true /* do_termination */,
aoqi@0 2716 _is_serial);
aoqi@0 2717 } while (task->has_aborted() && !_cm->has_overflown());
aoqi@0 2718 // If we overflow, then we do not want to restart. We instead
aoqi@0 2719 // want to abort remark and do concurrent marking again.
aoqi@0 2720 task->record_end_time();
aoqi@0 2721 }
aoqi@0 2722 }
aoqi@0 2723
aoqi@0 2724 CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
aoqi@0 2725 AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
aoqi@0 2726 _cm->terminator()->reset_for_reuse(active_workers);
aoqi@0 2727 }
aoqi@0 2728 };
aoqi@0 2729
aoqi@0 2730 void ConcurrentMark::checkpointRootsFinalWork() {
aoqi@0 2731 ResourceMark rm;
aoqi@0 2732 HandleMark hm;
aoqi@0 2733 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 2734
stefank@6992 2735 G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
stefank@6992 2736
aoqi@0 2737 g1h->ensure_parsability(false);
aoqi@0 2738
aoqi@0 2739 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 2740 G1CollectedHeap::StrongRootsScope srs(g1h);
aoqi@0 2741 // this is remark, so we'll use up all active threads
aoqi@0 2742 uint active_workers = g1h->workers()->active_workers();
aoqi@0 2743 if (active_workers == 0) {
aoqi@0 2744 assert(active_workers > 0, "Should have been set earlier");
aoqi@0 2745 active_workers = (uint) ParallelGCThreads;
aoqi@0 2746 g1h->workers()->set_active_workers(active_workers);
aoqi@0 2747 }
aoqi@0 2748 set_concurrency_and_phase(active_workers, false /* concurrent */);
aoqi@0 2749 // Leave _parallel_marking_threads at it's
aoqi@0 2750 // value originally calculated in the ConcurrentMark
aoqi@0 2751 // constructor and pass values of the active workers
aoqi@0 2752 // through the gang in the task.
aoqi@0 2753
aoqi@0 2754 CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
aoqi@0 2755 // We will start all available threads, even if we decide that the
aoqi@0 2756 // active_workers will be fewer. The extra ones will just bail out
aoqi@0 2757 // immediately.
aoqi@0 2758 g1h->set_par_threads(active_workers);
aoqi@0 2759 g1h->workers()->run_task(&remarkTask);
aoqi@0 2760 g1h->set_par_threads(0);
aoqi@0 2761 } else {
aoqi@0 2762 G1CollectedHeap::StrongRootsScope srs(g1h);
aoqi@0 2763 uint active_workers = 1;
aoqi@0 2764 set_concurrency_and_phase(active_workers, false /* concurrent */);
aoqi@0 2765
aoqi@0 2766 // Note - if there's no work gang then the VMThread will be
aoqi@0 2767 // the thread to execute the remark - serially. We have
aoqi@0 2768 // to pass true for the is_serial parameter so that
aoqi@0 2769 // CMTask::do_marking_step() doesn't enter the sync
aoqi@0 2770 // barriers in the event of an overflow. Doing so will
aoqi@0 2771 // cause an assert that the current thread is not a
aoqi@0 2772 // concurrent GC thread.
aoqi@0 2773 CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
aoqi@0 2774 remarkTask.work(0);
aoqi@0 2775 }
aoqi@0 2776 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 2777 guarantee(has_overflown() ||
aoqi@0 2778 satb_mq_set.completed_buffers_num() == 0,
aoqi@0 2779 err_msg("Invariant: has_overflown = %s, num buffers = %d",
aoqi@0 2780 BOOL_TO_STR(has_overflown()),
aoqi@0 2781 satb_mq_set.completed_buffers_num()));
aoqi@0 2782
aoqi@0 2783 print_stats();
aoqi@0 2784 }
aoqi@0 2785
aoqi@0 2786 #ifndef PRODUCT
aoqi@0 2787
aoqi@0 2788 class PrintReachableOopClosure: public OopClosure {
aoqi@0 2789 private:
aoqi@0 2790 G1CollectedHeap* _g1h;
aoqi@0 2791 outputStream* _out;
aoqi@0 2792 VerifyOption _vo;
aoqi@0 2793 bool _all;
aoqi@0 2794
aoqi@0 2795 public:
aoqi@0 2796 PrintReachableOopClosure(outputStream* out,
aoqi@0 2797 VerifyOption vo,
aoqi@0 2798 bool all) :
aoqi@0 2799 _g1h(G1CollectedHeap::heap()),
aoqi@0 2800 _out(out), _vo(vo), _all(all) { }
aoqi@0 2801
aoqi@0 2802 void do_oop(narrowOop* p) { do_oop_work(p); }
aoqi@0 2803 void do_oop( oop* p) { do_oop_work(p); }
aoqi@0 2804
aoqi@0 2805 template <class T> void do_oop_work(T* p) {
aoqi@0 2806 oop obj = oopDesc::load_decode_heap_oop(p);
aoqi@0 2807 const char* str = NULL;
aoqi@0 2808 const char* str2 = "";
aoqi@0 2809
aoqi@0 2810 if (obj == NULL) {
aoqi@0 2811 str = "";
aoqi@0 2812 } else if (!_g1h->is_in_g1_reserved(obj)) {
aoqi@0 2813 str = " O";
aoqi@0 2814 } else {
aoqi@0 2815 HeapRegion* hr = _g1h->heap_region_containing(obj);
aoqi@0 2816 bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
aoqi@0 2817 bool marked = _g1h->is_marked(obj, _vo);
aoqi@0 2818
aoqi@0 2819 if (over_tams) {
aoqi@0 2820 str = " >";
aoqi@0 2821 if (marked) {
aoqi@0 2822 str2 = " AND MARKED";
aoqi@0 2823 }
aoqi@0 2824 } else if (marked) {
aoqi@0 2825 str = " M";
aoqi@0 2826 } else {
aoqi@0 2827 str = " NOT";
aoqi@0 2828 }
aoqi@0 2829 }
aoqi@0 2830
aoqi@0 2831 _out->print_cr(" "PTR_FORMAT": "PTR_FORMAT"%s%s",
aoqi@0 2832 p2i(p), p2i((void*) obj), str, str2);
aoqi@0 2833 }
aoqi@0 2834 };
aoqi@0 2835
aoqi@0 2836 class PrintReachableObjectClosure : public ObjectClosure {
aoqi@0 2837 private:
aoqi@0 2838 G1CollectedHeap* _g1h;
aoqi@0 2839 outputStream* _out;
aoqi@0 2840 VerifyOption _vo;
aoqi@0 2841 bool _all;
aoqi@0 2842 HeapRegion* _hr;
aoqi@0 2843
aoqi@0 2844 public:
aoqi@0 2845 PrintReachableObjectClosure(outputStream* out,
aoqi@0 2846 VerifyOption vo,
aoqi@0 2847 bool all,
aoqi@0 2848 HeapRegion* hr) :
aoqi@0 2849 _g1h(G1CollectedHeap::heap()),
aoqi@0 2850 _out(out), _vo(vo), _all(all), _hr(hr) { }
aoqi@0 2851
aoqi@0 2852 void do_object(oop o) {
aoqi@0 2853 bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
aoqi@0 2854 bool marked = _g1h->is_marked(o, _vo);
aoqi@0 2855 bool print_it = _all || over_tams || marked;
aoqi@0 2856
aoqi@0 2857 if (print_it) {
aoqi@0 2858 _out->print_cr(" "PTR_FORMAT"%s",
aoqi@0 2859 p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
aoqi@0 2860 PrintReachableOopClosure oopCl(_out, _vo, _all);
aoqi@0 2861 o->oop_iterate_no_header(&oopCl);
aoqi@0 2862 }
aoqi@0 2863 }
aoqi@0 2864 };
aoqi@0 2865
aoqi@0 2866 class PrintReachableRegionClosure : public HeapRegionClosure {
aoqi@0 2867 private:
aoqi@0 2868 G1CollectedHeap* _g1h;
aoqi@0 2869 outputStream* _out;
aoqi@0 2870 VerifyOption _vo;
aoqi@0 2871 bool _all;
aoqi@0 2872
aoqi@0 2873 public:
aoqi@0 2874 bool doHeapRegion(HeapRegion* hr) {
aoqi@0 2875 HeapWord* b = hr->bottom();
aoqi@0 2876 HeapWord* e = hr->end();
aoqi@0 2877 HeapWord* t = hr->top();
aoqi@0 2878 HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
aoqi@0 2879 _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
aoqi@0 2880 "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
aoqi@0 2881 _out->cr();
aoqi@0 2882
aoqi@0 2883 HeapWord* from = b;
aoqi@0 2884 HeapWord* to = t;
aoqi@0 2885
aoqi@0 2886 if (to > from) {
aoqi@0 2887 _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
aoqi@0 2888 _out->cr();
aoqi@0 2889 PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
aoqi@0 2890 hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
aoqi@0 2891 _out->cr();
aoqi@0 2892 }
aoqi@0 2893
aoqi@0 2894 return false;
aoqi@0 2895 }
aoqi@0 2896
aoqi@0 2897 PrintReachableRegionClosure(outputStream* out,
aoqi@0 2898 VerifyOption vo,
aoqi@0 2899 bool all) :
aoqi@0 2900 _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
aoqi@0 2901 };
aoqi@0 2902
aoqi@0 2903 void ConcurrentMark::print_reachable(const char* str,
aoqi@0 2904 VerifyOption vo,
aoqi@0 2905 bool all) {
aoqi@0 2906 gclog_or_tty->cr();
aoqi@0 2907 gclog_or_tty->print_cr("== Doing heap dump... ");
aoqi@0 2908
aoqi@0 2909 if (G1PrintReachableBaseFile == NULL) {
aoqi@0 2910 gclog_or_tty->print_cr(" #### error: no base file defined");
aoqi@0 2911 return;
aoqi@0 2912 }
aoqi@0 2913
aoqi@0 2914 if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
aoqi@0 2915 (JVM_MAXPATHLEN - 1)) {
aoqi@0 2916 gclog_or_tty->print_cr(" #### error: file name too long");
aoqi@0 2917 return;
aoqi@0 2918 }
aoqi@0 2919
aoqi@0 2920 char file_name[JVM_MAXPATHLEN];
aoqi@0 2921 sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
aoqi@0 2922 gclog_or_tty->print_cr(" dumping to file %s", file_name);
aoqi@0 2923
aoqi@0 2924 fileStream fout(file_name);
aoqi@0 2925 if (!fout.is_open()) {
aoqi@0 2926 gclog_or_tty->print_cr(" #### error: could not open file");
aoqi@0 2927 return;
aoqi@0 2928 }
aoqi@0 2929
aoqi@0 2930 outputStream* out = &fout;
aoqi@0 2931 out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
aoqi@0 2932 out->cr();
aoqi@0 2933
aoqi@0 2934 out->print_cr("--- ITERATING OVER REGIONS");
aoqi@0 2935 out->cr();
aoqi@0 2936 PrintReachableRegionClosure rcl(out, vo, all);
aoqi@0 2937 _g1h->heap_region_iterate(&rcl);
aoqi@0 2938 out->cr();
aoqi@0 2939
aoqi@0 2940 gclog_or_tty->print_cr(" done");
aoqi@0 2941 gclog_or_tty->flush();
aoqi@0 2942 }
aoqi@0 2943
aoqi@0 2944 #endif // PRODUCT
aoqi@0 2945
aoqi@0 2946 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
aoqi@0 2947 // Note we are overriding the read-only view of the prev map here, via
aoqi@0 2948 // the cast.
aoqi@0 2949 ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
aoqi@0 2950 }
aoqi@0 2951
aoqi@0 2952 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
aoqi@0 2953 _nextMarkBitMap->clearRange(mr);
aoqi@0 2954 }
aoqi@0 2955
aoqi@0 2956 HeapRegion*
aoqi@0 2957 ConcurrentMark::claim_region(uint worker_id) {
aoqi@0 2958 // "checkpoint" the finger
aoqi@0 2959 HeapWord* finger = _finger;
aoqi@0 2960
aoqi@0 2961 // _heap_end will not change underneath our feet; it only changes at
aoqi@0 2962 // yield points.
aoqi@0 2963 while (finger < _heap_end) {
aoqi@0 2964 assert(_g1h->is_in_g1_reserved(finger), "invariant");
aoqi@0 2965
aoqi@0 2966 // Note on how this code handles humongous regions. In the
aoqi@0 2967 // normal case the finger will reach the start of a "starts
aoqi@0 2968 // humongous" (SH) region. Its end will either be the end of the
aoqi@0 2969 // last "continues humongous" (CH) region in the sequence, or the
aoqi@0 2970 // standard end of the SH region (if the SH is the only region in
aoqi@0 2971 // the sequence). That way claim_region() will skip over the CH
aoqi@0 2972 // regions. However, there is a subtle race between a CM thread
aoqi@0 2973 // executing this method and a mutator thread doing a humongous
aoqi@0 2974 // object allocation. The two are not mutually exclusive as the CM
aoqi@0 2975 // thread does not need to hold the Heap_lock when it gets
aoqi@0 2976 // here. So there is a chance that claim_region() will come across
aoqi@0 2977 // a free region that's in the progress of becoming a SH or a CH
aoqi@0 2978 // region. In the former case, it will either
aoqi@0 2979 // a) Miss the update to the region's end, in which case it will
aoqi@0 2980 // visit every subsequent CH region, will find their bitmaps
aoqi@0 2981 // empty, and do nothing, or
aoqi@0 2982 // b) Will observe the update of the region's end (in which case
aoqi@0 2983 // it will skip the subsequent CH regions).
aoqi@0 2984 // If it comes across a region that suddenly becomes CH, the
aoqi@0 2985 // scenario will be similar to b). So, the race between
aoqi@0 2986 // claim_region() and a humongous object allocation might force us
aoqi@0 2987 // to do a bit of unnecessary work (due to some unnecessary bitmap
aoqi@0 2988 // iterations) but it should not introduce and correctness issues.
tschatzl@7051 2989 HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
tschatzl@7051 2990
tschatzl@7051 2991 // Above heap_region_containing_raw may return NULL as we always scan claim
tschatzl@7051 2992 // until the end of the heap. In this case, just jump to the next region.
tschatzl@7051 2993 HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
aoqi@0 2994
aoqi@0 2995 // Is the gap between reading the finger and doing the CAS too long?
aoqi@0 2996 HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
tschatzl@7051 2997 if (res == finger && curr_region != NULL) {
aoqi@0 2998 // we succeeded
tschatzl@7051 2999 HeapWord* bottom = curr_region->bottom();
tschatzl@7051 3000 HeapWord* limit = curr_region->next_top_at_mark_start();
tschatzl@7051 3001
tschatzl@7051 3002 if (verbose_low()) {
tschatzl@7051 3003 gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
tschatzl@7051 3004 "["PTR_FORMAT", "PTR_FORMAT"), "
tschatzl@7051 3005 "limit = "PTR_FORMAT,
tschatzl@7051 3006 worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
tschatzl@7051 3007 }
aoqi@0 3008
aoqi@0 3009 // notice that _finger == end cannot be guaranteed here since,
aoqi@0 3010 // someone else might have moved the finger even further
aoqi@0 3011 assert(_finger >= end, "the finger should have moved forward");
aoqi@0 3012
aoqi@0 3013 if (verbose_low()) {
aoqi@0 3014 gclog_or_tty->print_cr("[%u] we were successful with region = "
aoqi@0 3015 PTR_FORMAT, worker_id, p2i(curr_region));
aoqi@0 3016 }
aoqi@0 3017
aoqi@0 3018 if (limit > bottom) {
aoqi@0 3019 if (verbose_low()) {
aoqi@0 3020 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
aoqi@0 3021 "returning it ", worker_id, p2i(curr_region));
aoqi@0 3022 }
aoqi@0 3023 return curr_region;
aoqi@0 3024 } else {
aoqi@0 3025 assert(limit == bottom,
aoqi@0 3026 "the region limit should be at bottom");
aoqi@0 3027 if (verbose_low()) {
aoqi@0 3028 gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
aoqi@0 3029 "returning NULL", worker_id, p2i(curr_region));
aoqi@0 3030 }
aoqi@0 3031 // we return NULL and the caller should try calling
aoqi@0 3032 // claim_region() again.
aoqi@0 3033 return NULL;
aoqi@0 3034 }
aoqi@0 3035 } else {
aoqi@0 3036 assert(_finger > finger, "the finger should have moved forward");
aoqi@0 3037 if (verbose_low()) {
tschatzl@7051 3038 if (curr_region == NULL) {
tschatzl@7051 3039 gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
tschatzl@7051 3040 "global finger = "PTR_FORMAT", "
tschatzl@7051 3041 "our finger = "PTR_FORMAT,
tschatzl@7051 3042 worker_id, p2i(_finger), p2i(finger));
tschatzl@7051 3043 } else {
tschatzl@7051 3044 gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
tschatzl@7051 3045 "global finger = "PTR_FORMAT", "
tschatzl@7051 3046 "our finger = "PTR_FORMAT,
tschatzl@7051 3047 worker_id, p2i(_finger), p2i(finger));
tschatzl@7051 3048 }
aoqi@0 3049 }
aoqi@0 3050
aoqi@0 3051 // read it again
aoqi@0 3052 finger = _finger;
aoqi@0 3053 }
aoqi@0 3054 }
aoqi@0 3055
aoqi@0 3056 return NULL;
aoqi@0 3057 }
aoqi@0 3058
aoqi@0 3059 #ifndef PRODUCT
aoqi@0 3060 enum VerifyNoCSetOopsPhase {
aoqi@0 3061 VerifyNoCSetOopsStack,
aoqi@0 3062 VerifyNoCSetOopsQueues,
aoqi@0 3063 VerifyNoCSetOopsSATBCompleted,
aoqi@0 3064 VerifyNoCSetOopsSATBThread
aoqi@0 3065 };
aoqi@0 3066
aoqi@0 3067 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure {
aoqi@0 3068 private:
aoqi@0 3069 G1CollectedHeap* _g1h;
aoqi@0 3070 VerifyNoCSetOopsPhase _phase;
aoqi@0 3071 int _info;
aoqi@0 3072
aoqi@0 3073 const char* phase_str() {
aoqi@0 3074 switch (_phase) {
aoqi@0 3075 case VerifyNoCSetOopsStack: return "Stack";
aoqi@0 3076 case VerifyNoCSetOopsQueues: return "Queue";
aoqi@0 3077 case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
aoqi@0 3078 case VerifyNoCSetOopsSATBThread: return "Thread SATB Buffers";
aoqi@0 3079 default: ShouldNotReachHere();
aoqi@0 3080 }
aoqi@0 3081 return NULL;
aoqi@0 3082 }
aoqi@0 3083
aoqi@0 3084 void do_object_work(oop obj) {
aoqi@0 3085 guarantee(!_g1h->obj_in_cs(obj),
aoqi@0 3086 err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
aoqi@0 3087 p2i((void*) obj), phase_str(), _info));
aoqi@0 3088 }
aoqi@0 3089
aoqi@0 3090 public:
aoqi@0 3091 VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
aoqi@0 3092
aoqi@0 3093 void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
aoqi@0 3094 _phase = phase;
aoqi@0 3095 _info = info;
aoqi@0 3096 }
aoqi@0 3097
aoqi@0 3098 virtual void do_oop(oop* p) {
aoqi@0 3099 oop obj = oopDesc::load_decode_heap_oop(p);
aoqi@0 3100 do_object_work(obj);
aoqi@0 3101 }
aoqi@0 3102
aoqi@0 3103 virtual void do_oop(narrowOop* p) {
aoqi@0 3104 // We should not come across narrow oops while scanning marking
aoqi@0 3105 // stacks and SATB buffers.
aoqi@0 3106 ShouldNotReachHere();
aoqi@0 3107 }
aoqi@0 3108
aoqi@0 3109 virtual void do_object(oop obj) {
aoqi@0 3110 do_object_work(obj);
aoqi@0 3111 }
aoqi@0 3112 };
aoqi@0 3113
aoqi@0 3114 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
aoqi@0 3115 bool verify_enqueued_buffers,
aoqi@0 3116 bool verify_thread_buffers,
aoqi@0 3117 bool verify_fingers) {
aoqi@0 3118 assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
aoqi@0 3119 if (!G1CollectedHeap::heap()->mark_in_progress()) {
aoqi@0 3120 return;
aoqi@0 3121 }
aoqi@0 3122
aoqi@0 3123 VerifyNoCSetOopsClosure cl;
aoqi@0 3124
aoqi@0 3125 if (verify_stacks) {
aoqi@0 3126 // Verify entries on the global mark stack
aoqi@0 3127 cl.set_phase(VerifyNoCSetOopsStack);
aoqi@0 3128 _markStack.oops_do(&cl);
aoqi@0 3129
aoqi@0 3130 // Verify entries on the task queues
aoqi@0 3131 for (uint i = 0; i < _max_worker_id; i += 1) {
aoqi@0 3132 cl.set_phase(VerifyNoCSetOopsQueues, i);
aoqi@0 3133 CMTaskQueue* queue = _task_queues->queue(i);
aoqi@0 3134 queue->oops_do(&cl);
aoqi@0 3135 }
aoqi@0 3136 }
aoqi@0 3137
aoqi@0 3138 SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
aoqi@0 3139
aoqi@0 3140 // Verify entries on the enqueued SATB buffers
aoqi@0 3141 if (verify_enqueued_buffers) {
aoqi@0 3142 cl.set_phase(VerifyNoCSetOopsSATBCompleted);
aoqi@0 3143 satb_qs.iterate_completed_buffers_read_only(&cl);
aoqi@0 3144 }
aoqi@0 3145
aoqi@0 3146 // Verify entries on the per-thread SATB buffers
aoqi@0 3147 if (verify_thread_buffers) {
aoqi@0 3148 cl.set_phase(VerifyNoCSetOopsSATBThread);
aoqi@0 3149 satb_qs.iterate_thread_buffers_read_only(&cl);
aoqi@0 3150 }
aoqi@0 3151
aoqi@0 3152 if (verify_fingers) {
aoqi@0 3153 // Verify the global finger
aoqi@0 3154 HeapWord* global_finger = finger();
aoqi@0 3155 if (global_finger != NULL && global_finger < _heap_end) {
aoqi@0 3156 // The global finger always points to a heap region boundary. We
aoqi@0 3157 // use heap_region_containing_raw() to get the containing region
aoqi@0 3158 // given that the global finger could be pointing to a free region
aoqi@0 3159 // which subsequently becomes continues humongous. If that
aoqi@0 3160 // happens, heap_region_containing() will return the bottom of the
aoqi@0 3161 // corresponding starts humongous region and the check below will
aoqi@0 3162 // not hold any more.
tschatzl@7051 3163 // Since we always iterate over all regions, we might get a NULL HeapRegion
tschatzl@7051 3164 // here.
aoqi@0 3165 HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
tschatzl@7051 3166 guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
aoqi@0 3167 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
aoqi@0 3168 p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
aoqi@0 3169 }
aoqi@0 3170
aoqi@0 3171 // Verify the task fingers
aoqi@0 3172 assert(parallel_marking_threads() <= _max_worker_id, "sanity");
aoqi@0 3173 for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
aoqi@0 3174 CMTask* task = _tasks[i];
aoqi@0 3175 HeapWord* task_finger = task->finger();
aoqi@0 3176 if (task_finger != NULL && task_finger < _heap_end) {
aoqi@0 3177 // See above note on the global finger verification.
aoqi@0 3178 HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
tschatzl@7051 3179 guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
aoqi@0 3180 !task_hr->in_collection_set(),
aoqi@0 3181 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
aoqi@0 3182 p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
aoqi@0 3183 }
aoqi@0 3184 }
aoqi@0 3185 }
aoqi@0 3186 }
aoqi@0 3187 #endif // PRODUCT
aoqi@0 3188
aoqi@0 3189 // Aggregate the counting data that was constructed concurrently
aoqi@0 3190 // with marking.
aoqi@0 3191 class AggregateCountDataHRClosure: public HeapRegionClosure {
aoqi@0 3192 G1CollectedHeap* _g1h;
aoqi@0 3193 ConcurrentMark* _cm;
aoqi@0 3194 CardTableModRefBS* _ct_bs;
aoqi@0 3195 BitMap* _cm_card_bm;
aoqi@0 3196 uint _max_worker_id;
aoqi@0 3197
aoqi@0 3198 public:
aoqi@0 3199 AggregateCountDataHRClosure(G1CollectedHeap* g1h,
aoqi@0 3200 BitMap* cm_card_bm,
aoqi@0 3201 uint max_worker_id) :
aoqi@0 3202 _g1h(g1h), _cm(g1h->concurrent_mark()),
aoqi@0 3203 _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
aoqi@0 3204 _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
aoqi@0 3205
aoqi@0 3206 bool doHeapRegion(HeapRegion* hr) {
aoqi@0 3207 if (hr->continuesHumongous()) {
aoqi@0 3208 // We will ignore these here and process them when their
aoqi@0 3209 // associated "starts humongous" region is processed.
aoqi@0 3210 // Note that we cannot rely on their associated
aoqi@0 3211 // "starts humongous" region to have their bit set to 1
aoqi@0 3212 // since, due to the region chunking in the parallel region
aoqi@0 3213 // iteration, a "continues humongous" region might be visited
aoqi@0 3214 // before its associated "starts humongous".
aoqi@0 3215 return false;
aoqi@0 3216 }
aoqi@0 3217
aoqi@0 3218 HeapWord* start = hr->bottom();
aoqi@0 3219 HeapWord* limit = hr->next_top_at_mark_start();
aoqi@0 3220 HeapWord* end = hr->end();
aoqi@0 3221
aoqi@0 3222 assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
aoqi@0 3223 err_msg("Preconditions not met - "
aoqi@0 3224 "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
aoqi@0 3225 "top: "PTR_FORMAT", end: "PTR_FORMAT,
aoqi@0 3226 p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
aoqi@0 3227
aoqi@0 3228 assert(hr->next_marked_bytes() == 0, "Precondition");
aoqi@0 3229
aoqi@0 3230 if (start == limit) {
aoqi@0 3231 // NTAMS of this region has not been set so nothing to do.
aoqi@0 3232 return false;
aoqi@0 3233 }
aoqi@0 3234
aoqi@0 3235 // 'start' should be in the heap.
aoqi@0 3236 assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
aoqi@0 3237 // 'end' *may* be just beyone the end of the heap (if hr is the last region)
aoqi@0 3238 assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
aoqi@0 3239
aoqi@0 3240 BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
aoqi@0 3241 BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
aoqi@0 3242 BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
aoqi@0 3243
aoqi@0 3244 // If ntams is not card aligned then we bump card bitmap index
aoqi@0 3245 // for limit so that we get the all the cards spanned by
aoqi@0 3246 // the object ending at ntams.
aoqi@0 3247 // Note: if this is the last region in the heap then ntams
aoqi@0 3248 // could be actually just beyond the end of the the heap;
aoqi@0 3249 // limit_idx will then correspond to a (non-existent) card
aoqi@0 3250 // that is also outside the heap.
aoqi@0 3251 if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
aoqi@0 3252 limit_idx += 1;
aoqi@0 3253 }
aoqi@0 3254
aoqi@0 3255 assert(limit_idx <= end_idx, "or else use atomics");
aoqi@0 3256
aoqi@0 3257 // Aggregate the "stripe" in the count data associated with hr.
tschatzl@7091 3258 uint hrm_index = hr->hrm_index();
aoqi@0 3259 size_t marked_bytes = 0;
aoqi@0 3260
aoqi@0 3261 for (uint i = 0; i < _max_worker_id; i += 1) {
aoqi@0 3262 size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
aoqi@0 3263 BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
aoqi@0 3264
aoqi@0 3265 // Fetch the marked_bytes in this region for task i and
aoqi@0 3266 // add it to the running total for this region.
tschatzl@7091 3267 marked_bytes += marked_bytes_array[hrm_index];
aoqi@0 3268
aoqi@0 3269 // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
aoqi@0 3270 // into the global card bitmap.
aoqi@0 3271 BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
aoqi@0 3272
aoqi@0 3273 while (scan_idx < limit_idx) {
aoqi@0 3274 assert(task_card_bm->at(scan_idx) == true, "should be");
aoqi@0 3275 _cm_card_bm->set_bit(scan_idx);
aoqi@0 3276 assert(_cm_card_bm->at(scan_idx) == true, "should be");
aoqi@0 3277
aoqi@0 3278 // BitMap::get_next_one_offset() can handle the case when
aoqi@0 3279 // its left_offset parameter is greater than its right_offset
aoqi@0 3280 // parameter. It does, however, have an early exit if
aoqi@0 3281 // left_offset == right_offset. So let's limit the value
aoqi@0 3282 // passed in for left offset here.
aoqi@0 3283 BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
aoqi@0 3284 scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
aoqi@0 3285 }
aoqi@0 3286 }
aoqi@0 3287
aoqi@0 3288 // Update the marked bytes for this region.
aoqi@0 3289 hr->add_to_marked_bytes(marked_bytes);
aoqi@0 3290
aoqi@0 3291 // Next heap region
aoqi@0 3292 return false;
aoqi@0 3293 }
aoqi@0 3294 };
aoqi@0 3295
aoqi@0 3296 class G1AggregateCountDataTask: public AbstractGangTask {
aoqi@0 3297 protected:
aoqi@0 3298 G1CollectedHeap* _g1h;
aoqi@0 3299 ConcurrentMark* _cm;
aoqi@0 3300 BitMap* _cm_card_bm;
aoqi@0 3301 uint _max_worker_id;
aoqi@0 3302 int _active_workers;
aoqi@0 3303
aoqi@0 3304 public:
aoqi@0 3305 G1AggregateCountDataTask(G1CollectedHeap* g1h,
aoqi@0 3306 ConcurrentMark* cm,
aoqi@0 3307 BitMap* cm_card_bm,
aoqi@0 3308 uint max_worker_id,
aoqi@0 3309 int n_workers) :
aoqi@0 3310 AbstractGangTask("Count Aggregation"),
aoqi@0 3311 _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
aoqi@0 3312 _max_worker_id(max_worker_id),
aoqi@0 3313 _active_workers(n_workers) { }
aoqi@0 3314
aoqi@0 3315 void work(uint worker_id) {
aoqi@0 3316 AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
aoqi@0 3317
aoqi@0 3318 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 3319 _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
aoqi@0 3320 _active_workers,
aoqi@0 3321 HeapRegion::AggregateCountClaimValue);
aoqi@0 3322 } else {
aoqi@0 3323 _g1h->heap_region_iterate(&cl);
aoqi@0 3324 }
aoqi@0 3325 }
aoqi@0 3326 };
aoqi@0 3327
aoqi@0 3328
aoqi@0 3329 void ConcurrentMark::aggregate_count_data() {
aoqi@0 3330 int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
aoqi@0 3331 _g1h->workers()->active_workers() :
aoqi@0 3332 1);
aoqi@0 3333
aoqi@0 3334 G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
aoqi@0 3335 _max_worker_id, n_workers);
aoqi@0 3336
aoqi@0 3337 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 3338 assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
aoqi@0 3339 "sanity check");
aoqi@0 3340 _g1h->set_par_threads(n_workers);
aoqi@0 3341 _g1h->workers()->run_task(&g1_par_agg_task);
aoqi@0 3342 _g1h->set_par_threads(0);
aoqi@0 3343
aoqi@0 3344 assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
aoqi@0 3345 "sanity check");
aoqi@0 3346 _g1h->reset_heap_region_claim_values();
aoqi@0 3347 } else {
aoqi@0 3348 g1_par_agg_task.work(0);
aoqi@0 3349 }
aoqi@0 3350 }
aoqi@0 3351
aoqi@0 3352 // Clear the per-worker arrays used to store the per-region counting data
aoqi@0 3353 void ConcurrentMark::clear_all_count_data() {
aoqi@0 3354 // Clear the global card bitmap - it will be filled during
aoqi@0 3355 // liveness count aggregation (during remark) and the
aoqi@0 3356 // final counting task.
aoqi@0 3357 _card_bm.clear();
aoqi@0 3358
aoqi@0 3359 // Clear the global region bitmap - it will be filled as part
aoqi@0 3360 // of the final counting task.
aoqi@0 3361 _region_bm.clear();
aoqi@0 3362
aoqi@0 3363 uint max_regions = _g1h->max_regions();
aoqi@0 3364 assert(_max_worker_id > 0, "uninitialized");
aoqi@0 3365
aoqi@0 3366 for (uint i = 0; i < _max_worker_id; i += 1) {
aoqi@0 3367 BitMap* task_card_bm = count_card_bitmap_for(i);
aoqi@0 3368 size_t* marked_bytes_array = count_marked_bytes_array_for(i);
aoqi@0 3369
aoqi@0 3370 assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
aoqi@0 3371 assert(marked_bytes_array != NULL, "uninitialized");
aoqi@0 3372
aoqi@0 3373 memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
aoqi@0 3374 task_card_bm->clear();
aoqi@0 3375 }
aoqi@0 3376 }
aoqi@0 3377
aoqi@0 3378 void ConcurrentMark::print_stats() {
aoqi@0 3379 if (verbose_stats()) {
aoqi@0 3380 gclog_or_tty->print_cr("---------------------------------------------------------------------");
aoqi@0 3381 for (size_t i = 0; i < _active_tasks; ++i) {
aoqi@0 3382 _tasks[i]->print_stats();
aoqi@0 3383 gclog_or_tty->print_cr("---------------------------------------------------------------------");
aoqi@0 3384 }
aoqi@0 3385 }
aoqi@0 3386 }
aoqi@0 3387
aoqi@0 3388 // abandon current marking iteration due to a Full GC
aoqi@0 3389 void ConcurrentMark::abort() {
tschatzl@7016 3390 // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
tschatzl@7016 3391 // concurrent bitmap clearing.
aoqi@0 3392 _nextMarkBitMap->clearAll();
brutisso@7005 3393
brutisso@7005 3394 // Note we cannot clear the previous marking bitmap here
brutisso@7005 3395 // since VerifyDuringGC verifies the objects marked during
brutisso@7005 3396 // a full GC against the previous bitmap.
brutisso@7005 3397
aoqi@0 3398 // Clear the liveness counting data
aoqi@0 3399 clear_all_count_data();
aoqi@0 3400 // Empty mark stack
aoqi@0 3401 reset_marking_state();
aoqi@0 3402 for (uint i = 0; i < _max_worker_id; ++i) {
aoqi@0 3403 _tasks[i]->clear_region_fields();
aoqi@0 3404 }
aoqi@0 3405 _first_overflow_barrier_sync.abort();
aoqi@0 3406 _second_overflow_barrier_sync.abort();
brutisso@6904 3407 const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
brutisso@6904 3408 if (!gc_id.is_undefined()) {
brutisso@6904 3409 // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
brutisso@6904 3410 // to detect that it was aborted. Only keep track of the first GC id that we aborted.
brutisso@6904 3411 _aborted_gc_id = gc_id;
brutisso@6904 3412 }
aoqi@0 3413 _has_aborted = true;
aoqi@0 3414
aoqi@0 3415 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 3416 satb_mq_set.abandon_partial_marking();
aoqi@0 3417 // This can be called either during or outside marking, we'll read
aoqi@0 3418 // the expected_active value from the SATB queue set.
aoqi@0 3419 satb_mq_set.set_active_all_threads(
aoqi@0 3420 false, /* new active value */
aoqi@0 3421 satb_mq_set.is_active() /* expected_active */);
aoqi@0 3422
aoqi@0 3423 _g1h->trace_heap_after_concurrent_cycle();
aoqi@0 3424 _g1h->register_concurrent_cycle_end();
aoqi@0 3425 }
aoqi@0 3426
brutisso@6904 3427 const GCId& ConcurrentMark::concurrent_gc_id() {
brutisso@6904 3428 if (has_aborted()) {
brutisso@6904 3429 return _aborted_gc_id;
brutisso@6904 3430 }
brutisso@6904 3431 return _g1h->gc_tracer_cm()->gc_id();
brutisso@6904 3432 }
brutisso@6904 3433
aoqi@0 3434 static void print_ms_time_info(const char* prefix, const char* name,
aoqi@0 3435 NumberSeq& ns) {
aoqi@0 3436 gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
aoqi@0 3437 prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
aoqi@0 3438 if (ns.num() > 0) {
aoqi@0 3439 gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]",
aoqi@0 3440 prefix, ns.sd(), ns.maximum());
aoqi@0 3441 }
aoqi@0 3442 }
aoqi@0 3443
aoqi@0 3444 void ConcurrentMark::print_summary_info() {
aoqi@0 3445 gclog_or_tty->print_cr(" Concurrent marking:");
aoqi@0 3446 print_ms_time_info(" ", "init marks", _init_times);
aoqi@0 3447 print_ms_time_info(" ", "remarks", _remark_times);
aoqi@0 3448 {
aoqi@0 3449 print_ms_time_info(" ", "final marks", _remark_mark_times);
aoqi@0 3450 print_ms_time_info(" ", "weak refs", _remark_weak_ref_times);
aoqi@0 3451
aoqi@0 3452 }
aoqi@0 3453 print_ms_time_info(" ", "cleanups", _cleanup_times);
aoqi@0 3454 gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).",
aoqi@0 3455 _total_counting_time,
aoqi@0 3456 (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
aoqi@0 3457 (double)_cleanup_times.num()
aoqi@0 3458 : 0.0));
aoqi@0 3459 if (G1ScrubRemSets) {
aoqi@0 3460 gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).",
aoqi@0 3461 _total_rs_scrub_time,
aoqi@0 3462 (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
aoqi@0 3463 (double)_cleanup_times.num()
aoqi@0 3464 : 0.0));
aoqi@0 3465 }
aoqi@0 3466 gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.",
aoqi@0 3467 (_init_times.sum() + _remark_times.sum() +
aoqi@0 3468 _cleanup_times.sum())/1000.0);
aoqi@0 3469 gclog_or_tty->print_cr(" Total concurrent time = %8.2f s "
aoqi@0 3470 "(%8.2f s marking).",
aoqi@0 3471 cmThread()->vtime_accum(),
aoqi@0 3472 cmThread()->vtime_mark_accum());
aoqi@0 3473 }
aoqi@0 3474
aoqi@0 3475 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
aoqi@0 3476 if (use_parallel_marking_threads()) {
aoqi@0 3477 _parallel_workers->print_worker_threads_on(st);
aoqi@0 3478 }
aoqi@0 3479 }
aoqi@0 3480
aoqi@0 3481 void ConcurrentMark::print_on_error(outputStream* st) const {
aoqi@0 3482 st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
aoqi@0 3483 p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
aoqi@0 3484 _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
aoqi@0 3485 _nextMarkBitMap->print_on_error(st, " Next Bits: ");
aoqi@0 3486 }
aoqi@0 3487
aoqi@0 3488 // We take a break if someone is trying to stop the world.
aoqi@0 3489 bool ConcurrentMark::do_yield_check(uint worker_id) {
pliden@6906 3490 if (SuspendibleThreadSet::should_yield()) {
aoqi@0 3491 if (worker_id == 0) {
aoqi@0 3492 _g1h->g1_policy()->record_concurrent_pause();
aoqi@0 3493 }
pliden@6906 3494 SuspendibleThreadSet::yield();
aoqi@0 3495 return true;
aoqi@0 3496 } else {
aoqi@0 3497 return false;
aoqi@0 3498 }
aoqi@0 3499 }
aoqi@0 3500
aoqi@0 3501 #ifndef PRODUCT
aoqi@0 3502 // for debugging purposes
aoqi@0 3503 void ConcurrentMark::print_finger() {
aoqi@0 3504 gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
aoqi@0 3505 p2i(_heap_start), p2i(_heap_end), p2i(_finger));
aoqi@0 3506 for (uint i = 0; i < _max_worker_id; ++i) {
aoqi@0 3507 gclog_or_tty->print(" %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
aoqi@0 3508 }
aoqi@0 3509 gclog_or_tty->cr();
aoqi@0 3510 }
aoqi@0 3511 #endif
aoqi@0 3512
aoqi@0 3513 void CMTask::scan_object(oop obj) {
aoqi@0 3514 assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
aoqi@0 3515
aoqi@0 3516 if (_cm->verbose_high()) {
aoqi@0 3517 gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
aoqi@0 3518 _worker_id, p2i((void*) obj));
aoqi@0 3519 }
aoqi@0 3520
aoqi@0 3521 size_t obj_size = obj->size();
aoqi@0 3522 _words_scanned += obj_size;
aoqi@0 3523
aoqi@0 3524 obj->oop_iterate(_cm_oop_closure);
aoqi@0 3525 statsOnly( ++_objs_scanned );
aoqi@0 3526 check_limits();
aoqi@0 3527 }
aoqi@0 3528
aoqi@0 3529 // Closure for iteration over bitmaps
aoqi@0 3530 class CMBitMapClosure : public BitMapClosure {
aoqi@0 3531 private:
aoqi@0 3532 // the bitmap that is being iterated over
aoqi@0 3533 CMBitMap* _nextMarkBitMap;
aoqi@0 3534 ConcurrentMark* _cm;
aoqi@0 3535 CMTask* _task;
aoqi@0 3536
aoqi@0 3537 public:
aoqi@0 3538 CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
aoqi@0 3539 _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
aoqi@0 3540
aoqi@0 3541 bool do_bit(size_t offset) {
aoqi@0 3542 HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
aoqi@0 3543 assert(_nextMarkBitMap->isMarked(addr), "invariant");
aoqi@0 3544 assert( addr < _cm->finger(), "invariant");
aoqi@0 3545
aoqi@0 3546 statsOnly( _task->increase_objs_found_on_bitmap() );
aoqi@0 3547 assert(addr >= _task->finger(), "invariant");
aoqi@0 3548
aoqi@0 3549 // We move that task's local finger along.
aoqi@0 3550 _task->move_finger_to(addr);
aoqi@0 3551
aoqi@0 3552 _task->scan_object(oop(addr));
aoqi@0 3553 // we only partially drain the local queue and global stack
aoqi@0 3554 _task->drain_local_queue(true);
aoqi@0 3555 _task->drain_global_stack(true);
aoqi@0 3556
aoqi@0 3557 // if the has_aborted flag has been raised, we need to bail out of
aoqi@0 3558 // the iteration
aoqi@0 3559 return !_task->has_aborted();
aoqi@0 3560 }
aoqi@0 3561 };
aoqi@0 3562
aoqi@0 3563 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
aoqi@0 3564 ConcurrentMark* cm,
aoqi@0 3565 CMTask* task)
aoqi@0 3566 : _g1h(g1h), _cm(cm), _task(task) {
aoqi@0 3567 assert(_ref_processor == NULL, "should be initialized to NULL");
aoqi@0 3568
aoqi@0 3569 if (G1UseConcMarkReferenceProcessing) {
aoqi@0 3570 _ref_processor = g1h->ref_processor_cm();
aoqi@0 3571 assert(_ref_processor != NULL, "should not be NULL");
aoqi@0 3572 }
aoqi@0 3573 }
aoqi@0 3574
aoqi@0 3575 void CMTask::setup_for_region(HeapRegion* hr) {
aoqi@0 3576 assert(hr != NULL,
brutisso@7049 3577 "claim_region() should have filtered out NULL regions");
aoqi@0 3578 assert(!hr->continuesHumongous(),
aoqi@0 3579 "claim_region() should have filtered out continues humongous regions");
aoqi@0 3580
aoqi@0 3581 if (_cm->verbose_low()) {
aoqi@0 3582 gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
aoqi@0 3583 _worker_id, p2i(hr));
aoqi@0 3584 }
aoqi@0 3585
aoqi@0 3586 _curr_region = hr;
aoqi@0 3587 _finger = hr->bottom();
aoqi@0 3588 update_region_limit();
aoqi@0 3589 }
aoqi@0 3590
aoqi@0 3591 void CMTask::update_region_limit() {
aoqi@0 3592 HeapRegion* hr = _curr_region;
aoqi@0 3593 HeapWord* bottom = hr->bottom();
aoqi@0 3594 HeapWord* limit = hr->next_top_at_mark_start();
aoqi@0 3595
aoqi@0 3596 if (limit == bottom) {
aoqi@0 3597 if (_cm->verbose_low()) {
aoqi@0 3598 gclog_or_tty->print_cr("[%u] found an empty region "
aoqi@0 3599 "["PTR_FORMAT", "PTR_FORMAT")",
aoqi@0 3600 _worker_id, p2i(bottom), p2i(limit));
aoqi@0 3601 }
aoqi@0 3602 // The region was collected underneath our feet.
aoqi@0 3603 // We set the finger to bottom to ensure that the bitmap
aoqi@0 3604 // iteration that will follow this will not do anything.
aoqi@0 3605 // (this is not a condition that holds when we set the region up,
aoqi@0 3606 // as the region is not supposed to be empty in the first place)
aoqi@0 3607 _finger = bottom;
aoqi@0 3608 } else if (limit >= _region_limit) {
aoqi@0 3609 assert(limit >= _finger, "peace of mind");
aoqi@0 3610 } else {
aoqi@0 3611 assert(limit < _region_limit, "only way to get here");
aoqi@0 3612 // This can happen under some pretty unusual circumstances. An
aoqi@0 3613 // evacuation pause empties the region underneath our feet (NTAMS
aoqi@0 3614 // at bottom). We then do some allocation in the region (NTAMS
aoqi@0 3615 // stays at bottom), followed by the region being used as a GC
aoqi@0 3616 // alloc region (NTAMS will move to top() and the objects
aoqi@0 3617 // originally below it will be grayed). All objects now marked in
aoqi@0 3618 // the region are explicitly grayed, if below the global finger,
aoqi@0 3619 // and we do not need in fact to scan anything else. So, we simply
aoqi@0 3620 // set _finger to be limit to ensure that the bitmap iteration
aoqi@0 3621 // doesn't do anything.
aoqi@0 3622 _finger = limit;
aoqi@0 3623 }
aoqi@0 3624
aoqi@0 3625 _region_limit = limit;
aoqi@0 3626 }
aoqi@0 3627
aoqi@0 3628 void CMTask::giveup_current_region() {
aoqi@0 3629 assert(_curr_region != NULL, "invariant");
aoqi@0 3630 if (_cm->verbose_low()) {
aoqi@0 3631 gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
aoqi@0 3632 _worker_id, p2i(_curr_region));
aoqi@0 3633 }
aoqi@0 3634 clear_region_fields();
aoqi@0 3635 }
aoqi@0 3636
aoqi@0 3637 void CMTask::clear_region_fields() {
aoqi@0 3638 // Values for these three fields that indicate that we're not
aoqi@0 3639 // holding on to a region.
aoqi@0 3640 _curr_region = NULL;
aoqi@0 3641 _finger = NULL;
aoqi@0 3642 _region_limit = NULL;
aoqi@0 3643 }
aoqi@0 3644
aoqi@0 3645 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
aoqi@0 3646 if (cm_oop_closure == NULL) {
aoqi@0 3647 assert(_cm_oop_closure != NULL, "invariant");
aoqi@0 3648 } else {
aoqi@0 3649 assert(_cm_oop_closure == NULL, "invariant");
aoqi@0 3650 }
aoqi@0 3651 _cm_oop_closure = cm_oop_closure;
aoqi@0 3652 }
aoqi@0 3653
aoqi@0 3654 void CMTask::reset(CMBitMap* nextMarkBitMap) {
aoqi@0 3655 guarantee(nextMarkBitMap != NULL, "invariant");
aoqi@0 3656
aoqi@0 3657 if (_cm->verbose_low()) {
aoqi@0 3658 gclog_or_tty->print_cr("[%u] resetting", _worker_id);
aoqi@0 3659 }
aoqi@0 3660
aoqi@0 3661 _nextMarkBitMap = nextMarkBitMap;
aoqi@0 3662 clear_region_fields();
aoqi@0 3663
aoqi@0 3664 _calls = 0;
aoqi@0 3665 _elapsed_time_ms = 0.0;
aoqi@0 3666 _termination_time_ms = 0.0;
aoqi@0 3667 _termination_start_time_ms = 0.0;
aoqi@0 3668
aoqi@0 3669 #if _MARKING_STATS_
aoqi@0 3670 _local_pushes = 0;
aoqi@0 3671 _local_pops = 0;
aoqi@0 3672 _local_max_size = 0;
aoqi@0 3673 _objs_scanned = 0;
aoqi@0 3674 _global_pushes = 0;
aoqi@0 3675 _global_pops = 0;
aoqi@0 3676 _global_max_size = 0;
aoqi@0 3677 _global_transfers_to = 0;
aoqi@0 3678 _global_transfers_from = 0;
aoqi@0 3679 _regions_claimed = 0;
aoqi@0 3680 _objs_found_on_bitmap = 0;
aoqi@0 3681 _satb_buffers_processed = 0;
aoqi@0 3682 _steal_attempts = 0;
aoqi@0 3683 _steals = 0;
aoqi@0 3684 _aborted = 0;
aoqi@0 3685 _aborted_overflow = 0;
aoqi@0 3686 _aborted_cm_aborted = 0;
aoqi@0 3687 _aborted_yield = 0;
aoqi@0 3688 _aborted_timed_out = 0;
aoqi@0 3689 _aborted_satb = 0;
aoqi@0 3690 _aborted_termination = 0;
aoqi@0 3691 #endif // _MARKING_STATS_
aoqi@0 3692 }
aoqi@0 3693
aoqi@0 3694 bool CMTask::should_exit_termination() {
aoqi@0 3695 regular_clock_call();
aoqi@0 3696 // This is called when we are in the termination protocol. We should
aoqi@0 3697 // quit if, for some reason, this task wants to abort or the global
aoqi@0 3698 // stack is not empty (this means that we can get work from it).
aoqi@0 3699 return !_cm->mark_stack_empty() || has_aborted();
aoqi@0 3700 }
aoqi@0 3701
aoqi@0 3702 void CMTask::reached_limit() {
aoqi@0 3703 assert(_words_scanned >= _words_scanned_limit ||
aoqi@0 3704 _refs_reached >= _refs_reached_limit ,
aoqi@0 3705 "shouldn't have been called otherwise");
aoqi@0 3706 regular_clock_call();
aoqi@0 3707 }
aoqi@0 3708
aoqi@0 3709 void CMTask::regular_clock_call() {
aoqi@0 3710 if (has_aborted()) return;
aoqi@0 3711
aoqi@0 3712 // First, we need to recalculate the words scanned and refs reached
aoqi@0 3713 // limits for the next clock call.
aoqi@0 3714 recalculate_limits();
aoqi@0 3715
aoqi@0 3716 // During the regular clock call we do the following
aoqi@0 3717
aoqi@0 3718 // (1) If an overflow has been flagged, then we abort.
aoqi@0 3719 if (_cm->has_overflown()) {
aoqi@0 3720 set_has_aborted();
aoqi@0 3721 return;
aoqi@0 3722 }
aoqi@0 3723
aoqi@0 3724 // If we are not concurrent (i.e. we're doing remark) we don't need
aoqi@0 3725 // to check anything else. The other steps are only needed during
aoqi@0 3726 // the concurrent marking phase.
aoqi@0 3727 if (!concurrent()) return;
aoqi@0 3728
aoqi@0 3729 // (2) If marking has been aborted for Full GC, then we also abort.
aoqi@0 3730 if (_cm->has_aborted()) {
aoqi@0 3731 set_has_aborted();
aoqi@0 3732 statsOnly( ++_aborted_cm_aborted );
aoqi@0 3733 return;
aoqi@0 3734 }
aoqi@0 3735
aoqi@0 3736 double curr_time_ms = os::elapsedVTime() * 1000.0;
aoqi@0 3737
aoqi@0 3738 // (3) If marking stats are enabled, then we update the step history.
aoqi@0 3739 #if _MARKING_STATS_
aoqi@0 3740 if (_words_scanned >= _words_scanned_limit) {
aoqi@0 3741 ++_clock_due_to_scanning;
aoqi@0 3742 }
aoqi@0 3743 if (_refs_reached >= _refs_reached_limit) {
aoqi@0 3744 ++_clock_due_to_marking;
aoqi@0 3745 }
aoqi@0 3746
aoqi@0 3747 double last_interval_ms = curr_time_ms - _interval_start_time_ms;
aoqi@0 3748 _interval_start_time_ms = curr_time_ms;
aoqi@0 3749 _all_clock_intervals_ms.add(last_interval_ms);
aoqi@0 3750
aoqi@0 3751 if (_cm->verbose_medium()) {
aoqi@0 3752 gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
tschatzl@7094 3753 "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
aoqi@0 3754 _worker_id, last_interval_ms,
aoqi@0 3755 _words_scanned,
aoqi@0 3756 (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
aoqi@0 3757 _refs_reached,
aoqi@0 3758 (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
aoqi@0 3759 }
aoqi@0 3760 #endif // _MARKING_STATS_
aoqi@0 3761
aoqi@0 3762 // (4) We check whether we should yield. If we have to, then we abort.
pliden@6906 3763 if (SuspendibleThreadSet::should_yield()) {
aoqi@0 3764 // We should yield. To do this we abort the task. The caller is
aoqi@0 3765 // responsible for yielding.
aoqi@0 3766 set_has_aborted();
aoqi@0 3767 statsOnly( ++_aborted_yield );
aoqi@0 3768 return;
aoqi@0 3769 }
aoqi@0 3770
aoqi@0 3771 // (5) We check whether we've reached our time quota. If we have,
aoqi@0 3772 // then we abort.
aoqi@0 3773 double elapsed_time_ms = curr_time_ms - _start_time_ms;
aoqi@0 3774 if (elapsed_time_ms > _time_target_ms) {
aoqi@0 3775 set_has_aborted();
aoqi@0 3776 _has_timed_out = true;
aoqi@0 3777 statsOnly( ++_aborted_timed_out );
aoqi@0 3778 return;
aoqi@0 3779 }
aoqi@0 3780
aoqi@0 3781 // (6) Finally, we check whether there are enough completed STAB
aoqi@0 3782 // buffers available for processing. If there are, we abort.
aoqi@0 3783 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 3784 if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
aoqi@0 3785 if (_cm->verbose_low()) {
aoqi@0 3786 gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
aoqi@0 3787 _worker_id);
aoqi@0 3788 }
aoqi@0 3789 // we do need to process SATB buffers, we'll abort and restart
aoqi@0 3790 // the marking task to do so
aoqi@0 3791 set_has_aborted();
aoqi@0 3792 statsOnly( ++_aborted_satb );
aoqi@0 3793 return;
aoqi@0 3794 }
aoqi@0 3795 }
aoqi@0 3796
aoqi@0 3797 void CMTask::recalculate_limits() {
aoqi@0 3798 _real_words_scanned_limit = _words_scanned + words_scanned_period;
aoqi@0 3799 _words_scanned_limit = _real_words_scanned_limit;
aoqi@0 3800
aoqi@0 3801 _real_refs_reached_limit = _refs_reached + refs_reached_period;
aoqi@0 3802 _refs_reached_limit = _real_refs_reached_limit;
aoqi@0 3803 }
aoqi@0 3804
aoqi@0 3805 void CMTask::decrease_limits() {
aoqi@0 3806 // This is called when we believe that we're going to do an infrequent
aoqi@0 3807 // operation which will increase the per byte scanned cost (i.e. move
aoqi@0 3808 // entries to/from the global stack). It basically tries to decrease the
aoqi@0 3809 // scanning limit so that the clock is called earlier.
aoqi@0 3810
aoqi@0 3811 if (_cm->verbose_medium()) {
aoqi@0 3812 gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
aoqi@0 3813 }
aoqi@0 3814
aoqi@0 3815 _words_scanned_limit = _real_words_scanned_limit -
aoqi@0 3816 3 * words_scanned_period / 4;
aoqi@0 3817 _refs_reached_limit = _real_refs_reached_limit -
aoqi@0 3818 3 * refs_reached_period / 4;
aoqi@0 3819 }
aoqi@0 3820
aoqi@0 3821 void CMTask::move_entries_to_global_stack() {
aoqi@0 3822 // local array where we'll store the entries that will be popped
aoqi@0 3823 // from the local queue
aoqi@0 3824 oop buffer[global_stack_transfer_size];
aoqi@0 3825
aoqi@0 3826 int n = 0;
aoqi@0 3827 oop obj;
aoqi@0 3828 while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
aoqi@0 3829 buffer[n] = obj;
aoqi@0 3830 ++n;
aoqi@0 3831 }
aoqi@0 3832
aoqi@0 3833 if (n > 0) {
aoqi@0 3834 // we popped at least one entry from the local queue
aoqi@0 3835
aoqi@0 3836 statsOnly( ++_global_transfers_to; _local_pops += n );
aoqi@0 3837
aoqi@0 3838 if (!_cm->mark_stack_push(buffer, n)) {
aoqi@0 3839 if (_cm->verbose_low()) {
aoqi@0 3840 gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
aoqi@0 3841 _worker_id);
aoqi@0 3842 }
aoqi@0 3843 set_has_aborted();
aoqi@0 3844 } else {
aoqi@0 3845 // the transfer was successful
aoqi@0 3846
aoqi@0 3847 if (_cm->verbose_medium()) {
aoqi@0 3848 gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
aoqi@0 3849 _worker_id, n);
aoqi@0 3850 }
aoqi@0 3851 statsOnly( int tmp_size = _cm->mark_stack_size();
aoqi@0 3852 if (tmp_size > _global_max_size) {
aoqi@0 3853 _global_max_size = tmp_size;
aoqi@0 3854 }
aoqi@0 3855 _global_pushes += n );
aoqi@0 3856 }
aoqi@0 3857 }
aoqi@0 3858
aoqi@0 3859 // this operation was quite expensive, so decrease the limits
aoqi@0 3860 decrease_limits();
aoqi@0 3861 }
aoqi@0 3862
aoqi@0 3863 void CMTask::get_entries_from_global_stack() {
aoqi@0 3864 // local array where we'll store the entries that will be popped
aoqi@0 3865 // from the global stack.
aoqi@0 3866 oop buffer[global_stack_transfer_size];
aoqi@0 3867 int n;
aoqi@0 3868 _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
aoqi@0 3869 assert(n <= global_stack_transfer_size,
aoqi@0 3870 "we should not pop more than the given limit");
aoqi@0 3871 if (n > 0) {
aoqi@0 3872 // yes, we did actually pop at least one entry
aoqi@0 3873
aoqi@0 3874 statsOnly( ++_global_transfers_from; _global_pops += n );
aoqi@0 3875 if (_cm->verbose_medium()) {
aoqi@0 3876 gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
aoqi@0 3877 _worker_id, n);
aoqi@0 3878 }
aoqi@0 3879 for (int i = 0; i < n; ++i) {
aoqi@0 3880 bool success = _task_queue->push(buffer[i]);
aoqi@0 3881 // We only call this when the local queue is empty or under a
aoqi@0 3882 // given target limit. So, we do not expect this push to fail.
aoqi@0 3883 assert(success, "invariant");
aoqi@0 3884 }
aoqi@0 3885
aoqi@0 3886 statsOnly( int tmp_size = _task_queue->size();
aoqi@0 3887 if (tmp_size > _local_max_size) {
aoqi@0 3888 _local_max_size = tmp_size;
aoqi@0 3889 }
aoqi@0 3890 _local_pushes += n );
aoqi@0 3891 }
aoqi@0 3892
aoqi@0 3893 // this operation was quite expensive, so decrease the limits
aoqi@0 3894 decrease_limits();
aoqi@0 3895 }
aoqi@0 3896
aoqi@0 3897 void CMTask::drain_local_queue(bool partially) {
aoqi@0 3898 if (has_aborted()) return;
aoqi@0 3899
aoqi@0 3900 // Decide what the target size is, depending whether we're going to
aoqi@0 3901 // drain it partially (so that other tasks can steal if they run out
aoqi@0 3902 // of things to do) or totally (at the very end).
aoqi@0 3903 size_t target_size;
aoqi@0 3904 if (partially) {
aoqi@0 3905 target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
aoqi@0 3906 } else {
aoqi@0 3907 target_size = 0;
aoqi@0 3908 }
aoqi@0 3909
aoqi@0 3910 if (_task_queue->size() > target_size) {
aoqi@0 3911 if (_cm->verbose_high()) {
aoqi@0 3912 gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
aoqi@0 3913 _worker_id, target_size);
aoqi@0 3914 }
aoqi@0 3915
aoqi@0 3916 oop obj;
aoqi@0 3917 bool ret = _task_queue->pop_local(obj);
aoqi@0 3918 while (ret) {
aoqi@0 3919 statsOnly( ++_local_pops );
aoqi@0 3920
aoqi@0 3921 if (_cm->verbose_high()) {
aoqi@0 3922 gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
aoqi@0 3923 p2i((void*) obj));
aoqi@0 3924 }
aoqi@0 3925
aoqi@0 3926 assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
aoqi@0 3927 assert(!_g1h->is_on_master_free_list(
aoqi@0 3928 _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
aoqi@0 3929
aoqi@0 3930 scan_object(obj);
aoqi@0 3931
aoqi@0 3932 if (_task_queue->size() <= target_size || has_aborted()) {
aoqi@0 3933 ret = false;
aoqi@0 3934 } else {
aoqi@0 3935 ret = _task_queue->pop_local(obj);
aoqi@0 3936 }
aoqi@0 3937 }
aoqi@0 3938
aoqi@0 3939 if (_cm->verbose_high()) {
aoqi@0 3940 gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
aoqi@0 3941 _worker_id, _task_queue->size());
aoqi@0 3942 }
aoqi@0 3943 }
aoqi@0 3944 }
aoqi@0 3945
aoqi@0 3946 void CMTask::drain_global_stack(bool partially) {
aoqi@0 3947 if (has_aborted()) return;
aoqi@0 3948
aoqi@0 3949 // We have a policy to drain the local queue before we attempt to
aoqi@0 3950 // drain the global stack.
aoqi@0 3951 assert(partially || _task_queue->size() == 0, "invariant");
aoqi@0 3952
aoqi@0 3953 // Decide what the target size is, depending whether we're going to
aoqi@0 3954 // drain it partially (so that other tasks can steal if they run out
aoqi@0 3955 // of things to do) or totally (at the very end). Notice that,
aoqi@0 3956 // because we move entries from the global stack in chunks or
aoqi@0 3957 // because another task might be doing the same, we might in fact
aoqi@0 3958 // drop below the target. But, this is not a problem.
aoqi@0 3959 size_t target_size;
aoqi@0 3960 if (partially) {
aoqi@0 3961 target_size = _cm->partial_mark_stack_size_target();
aoqi@0 3962 } else {
aoqi@0 3963 target_size = 0;
aoqi@0 3964 }
aoqi@0 3965
aoqi@0 3966 if (_cm->mark_stack_size() > target_size) {
aoqi@0 3967 if (_cm->verbose_low()) {
aoqi@0 3968 gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
aoqi@0 3969 _worker_id, target_size);
aoqi@0 3970 }
aoqi@0 3971
aoqi@0 3972 while (!has_aborted() && _cm->mark_stack_size() > target_size) {
aoqi@0 3973 get_entries_from_global_stack();
aoqi@0 3974 drain_local_queue(partially);
aoqi@0 3975 }
aoqi@0 3976
aoqi@0 3977 if (_cm->verbose_low()) {
aoqi@0 3978 gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
aoqi@0 3979 _worker_id, _cm->mark_stack_size());
aoqi@0 3980 }
aoqi@0 3981 }
aoqi@0 3982 }
aoqi@0 3983
aoqi@0 3984 // SATB Queue has several assumptions on whether to call the par or
aoqi@0 3985 // non-par versions of the methods. this is why some of the code is
aoqi@0 3986 // replicated. We should really get rid of the single-threaded version
aoqi@0 3987 // of the code to simplify things.
aoqi@0 3988 void CMTask::drain_satb_buffers() {
aoqi@0 3989 if (has_aborted()) return;
aoqi@0 3990
aoqi@0 3991 // We set this so that the regular clock knows that we're in the
aoqi@0 3992 // middle of draining buffers and doesn't set the abort flag when it
aoqi@0 3993 // notices that SATB buffers are available for draining. It'd be
aoqi@0 3994 // very counter productive if it did that. :-)
aoqi@0 3995 _draining_satb_buffers = true;
aoqi@0 3996
aoqi@0 3997 CMObjectClosure oc(this);
aoqi@0 3998 SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
aoqi@0 3999 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 4000 satb_mq_set.set_par_closure(_worker_id, &oc);
aoqi@0 4001 } else {
aoqi@0 4002 satb_mq_set.set_closure(&oc);
aoqi@0 4003 }
aoqi@0 4004
aoqi@0 4005 // This keeps claiming and applying the closure to completed buffers
aoqi@0 4006 // until we run out of buffers or we need to abort.
aoqi@0 4007 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 4008 while (!has_aborted() &&
aoqi@0 4009 satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
aoqi@0 4010 if (_cm->verbose_medium()) {
aoqi@0 4011 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
aoqi@0 4012 }
aoqi@0 4013 statsOnly( ++_satb_buffers_processed );
aoqi@0 4014 regular_clock_call();
aoqi@0 4015 }
aoqi@0 4016 } else {
aoqi@0 4017 while (!has_aborted() &&
aoqi@0 4018 satb_mq_set.apply_closure_to_completed_buffer()) {
aoqi@0 4019 if (_cm->verbose_medium()) {
aoqi@0 4020 gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
aoqi@0 4021 }
aoqi@0 4022 statsOnly( ++_satb_buffers_processed );
aoqi@0 4023 regular_clock_call();
aoqi@0 4024 }
aoqi@0 4025 }
aoqi@0 4026
aoqi@0 4027 _draining_satb_buffers = false;
aoqi@0 4028
aoqi@0 4029 assert(has_aborted() ||
aoqi@0 4030 concurrent() ||
aoqi@0 4031 satb_mq_set.completed_buffers_num() == 0, "invariant");
aoqi@0 4032
aoqi@0 4033 if (G1CollectedHeap::use_parallel_gc_threads()) {
aoqi@0 4034 satb_mq_set.set_par_closure(_worker_id, NULL);
aoqi@0 4035 } else {
aoqi@0 4036 satb_mq_set.set_closure(NULL);
aoqi@0 4037 }
aoqi@0 4038
aoqi@0 4039 // again, this was a potentially expensive operation, decrease the
aoqi@0 4040 // limits to get the regular clock call early
aoqi@0 4041 decrease_limits();
aoqi@0 4042 }
aoqi@0 4043
aoqi@0 4044 void CMTask::print_stats() {
aoqi@0 4045 gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
aoqi@0 4046 _worker_id, _calls);
aoqi@0 4047 gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms",
aoqi@0 4048 _elapsed_time_ms, _termination_time_ms);
aoqi@0 4049 gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
aoqi@0 4050 _step_times_ms.num(), _step_times_ms.avg(),
aoqi@0 4051 _step_times_ms.sd());
aoqi@0 4052 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
aoqi@0 4053 _step_times_ms.maximum(), _step_times_ms.sum());
aoqi@0 4054
aoqi@0 4055 #if _MARKING_STATS_
aoqi@0 4056 gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
aoqi@0 4057 _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
aoqi@0 4058 _all_clock_intervals_ms.sd());
aoqi@0 4059 gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms",
aoqi@0 4060 _all_clock_intervals_ms.maximum(),
aoqi@0 4061 _all_clock_intervals_ms.sum());
aoqi@0 4062 gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d",
aoqi@0 4063 _clock_due_to_scanning, _clock_due_to_marking);
aoqi@0 4064 gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d",
aoqi@0 4065 _objs_scanned, _objs_found_on_bitmap);
aoqi@0 4066 gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d",
aoqi@0 4067 _local_pushes, _local_pops, _local_max_size);
aoqi@0 4068 gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d",
aoqi@0 4069 _global_pushes, _global_pops, _global_max_size);
aoqi@0 4070 gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d",
aoqi@0 4071 _global_transfers_to,_global_transfers_from);
aoqi@0 4072 gclog_or_tty->print_cr(" Regions: claimed = %d", _regions_claimed);
aoqi@0 4073 gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed);
aoqi@0 4074 gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d",
aoqi@0 4075 _steal_attempts, _steals);
aoqi@0 4076 gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted);
aoqi@0 4077 gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d",
aoqi@0 4078 _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
aoqi@0 4079 gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d",
aoqi@0 4080 _aborted_timed_out, _aborted_satb, _aborted_termination);
aoqi@0 4081 #endif // _MARKING_STATS_
aoqi@0 4082 }
aoqi@0 4083
aoqi@0 4084 /*****************************************************************************
aoqi@0 4085
aoqi@0 4086 The do_marking_step(time_target_ms, ...) method is the building
aoqi@0 4087 block of the parallel marking framework. It can be called in parallel
aoqi@0 4088 with other invocations of do_marking_step() on different tasks
aoqi@0 4089 (but only one per task, obviously) and concurrently with the
aoqi@0 4090 mutator threads, or during remark, hence it eliminates the need
aoqi@0 4091 for two versions of the code. When called during remark, it will
aoqi@0 4092 pick up from where the task left off during the concurrent marking
aoqi@0 4093 phase. Interestingly, tasks are also claimable during evacuation
aoqi@0 4094 pauses too, since do_marking_step() ensures that it aborts before
aoqi@0 4095 it needs to yield.
aoqi@0 4096
aoqi@0 4097 The data structures that it uses to do marking work are the
aoqi@0 4098 following:
aoqi@0 4099
aoqi@0 4100 (1) Marking Bitmap. If there are gray objects that appear only
aoqi@0 4101 on the bitmap (this happens either when dealing with an overflow
aoqi@0 4102 or when the initial marking phase has simply marked the roots
aoqi@0 4103 and didn't push them on the stack), then tasks claim heap
aoqi@0 4104 regions whose bitmap they then scan to find gray objects. A
aoqi@0 4105 global finger indicates where the end of the last claimed region
aoqi@0 4106 is. A local finger indicates how far into the region a task has
aoqi@0 4107 scanned. The two fingers are used to determine how to gray an
aoqi@0 4108 object (i.e. whether simply marking it is OK, as it will be
aoqi@0 4109 visited by a task in the future, or whether it needs to be also
aoqi@0 4110 pushed on a stack).
aoqi@0 4111
aoqi@0 4112 (2) Local Queue. The local queue of the task which is accessed
aoqi@0 4113 reasonably efficiently by the task. Other tasks can steal from
aoqi@0 4114 it when they run out of work. Throughout the marking phase, a
aoqi@0 4115 task attempts to keep its local queue short but not totally
aoqi@0 4116 empty, so that entries are available for stealing by other
aoqi@0 4117 tasks. Only when there is no more work, a task will totally
aoqi@0 4118 drain its local queue.
aoqi@0 4119
aoqi@0 4120 (3) Global Mark Stack. This handles local queue overflow. During
aoqi@0 4121 marking only sets of entries are moved between it and the local
aoqi@0 4122 queues, as access to it requires a mutex and more fine-grain
aoqi@0 4123 interaction with it which might cause contention. If it
aoqi@0 4124 overflows, then the marking phase should restart and iterate
aoqi@0 4125 over the bitmap to identify gray objects. Throughout the marking
aoqi@0 4126 phase, tasks attempt to keep the global mark stack at a small
aoqi@0 4127 length but not totally empty, so that entries are available for
aoqi@0 4128 popping by other tasks. Only when there is no more work, tasks
aoqi@0 4129 will totally drain the global mark stack.
aoqi@0 4130
aoqi@0 4131 (4) SATB Buffer Queue. This is where completed SATB buffers are
aoqi@0 4132 made available. Buffers are regularly removed from this queue
aoqi@0 4133 and scanned for roots, so that the queue doesn't get too
aoqi@0 4134 long. During remark, all completed buffers are processed, as
aoqi@0 4135 well as the filled in parts of any uncompleted buffers.
aoqi@0 4136
aoqi@0 4137 The do_marking_step() method tries to abort when the time target
aoqi@0 4138 has been reached. There are a few other cases when the
aoqi@0 4139 do_marking_step() method also aborts:
aoqi@0 4140
aoqi@0 4141 (1) When the marking phase has been aborted (after a Full GC).
aoqi@0 4142
aoqi@0 4143 (2) When a global overflow (on the global stack) has been
aoqi@0 4144 triggered. Before the task aborts, it will actually sync up with
aoqi@0 4145 the other tasks to ensure that all the marking data structures
aoqi@0 4146 (local queues, stacks, fingers etc.) are re-initialized so that
aoqi@0 4147 when do_marking_step() completes, the marking phase can
aoqi@0 4148 immediately restart.
aoqi@0 4149
aoqi@0 4150 (3) When enough completed SATB buffers are available. The
aoqi@0 4151 do_marking_step() method only tries to drain SATB buffers right
aoqi@0 4152 at the beginning. So, if enough buffers are available, the
aoqi@0 4153 marking step aborts and the SATB buffers are processed at
aoqi@0 4154 the beginning of the next invocation.
aoqi@0 4155
aoqi@0 4156 (4) To yield. when we have to yield then we abort and yield
aoqi@0 4157 right at the end of do_marking_step(). This saves us from a lot
aoqi@0 4158 of hassle as, by yielding we might allow a Full GC. If this
aoqi@0 4159 happens then objects will be compacted underneath our feet, the
aoqi@0 4160 heap might shrink, etc. We save checking for this by just
aoqi@0 4161 aborting and doing the yield right at the end.
aoqi@0 4162
aoqi@0 4163 From the above it follows that the do_marking_step() method should
aoqi@0 4164 be called in a loop (or, otherwise, regularly) until it completes.
aoqi@0 4165
aoqi@0 4166 If a marking step completes without its has_aborted() flag being
aoqi@0 4167 true, it means it has completed the current marking phase (and
aoqi@0 4168 also all other marking tasks have done so and have all synced up).
aoqi@0 4169
aoqi@0 4170 A method called regular_clock_call() is invoked "regularly" (in
aoqi@0 4171 sub ms intervals) throughout marking. It is this clock method that
aoqi@0 4172 checks all the abort conditions which were mentioned above and
aoqi@0 4173 decides when the task should abort. A work-based scheme is used to
aoqi@0 4174 trigger this clock method: when the number of object words the
aoqi@0 4175 marking phase has scanned or the number of references the marking
aoqi@0 4176 phase has visited reach a given limit. Additional invocations to
aoqi@0 4177 the method clock have been planted in a few other strategic places
aoqi@0 4178 too. The initial reason for the clock method was to avoid calling
aoqi@0 4179 vtime too regularly, as it is quite expensive. So, once it was in
aoqi@0 4180 place, it was natural to piggy-back all the other conditions on it
aoqi@0 4181 too and not constantly check them throughout the code.
aoqi@0 4182
aoqi@0 4183 If do_termination is true then do_marking_step will enter its
aoqi@0 4184 termination protocol.
aoqi@0 4185
aoqi@0 4186 The value of is_serial must be true when do_marking_step is being
aoqi@0 4187 called serially (i.e. by the VMThread) and do_marking_step should
aoqi@0 4188 skip any synchronization in the termination and overflow code.
aoqi@0 4189 Examples include the serial remark code and the serial reference
aoqi@0 4190 processing closures.
aoqi@0 4191
aoqi@0 4192 The value of is_serial must be false when do_marking_step is
aoqi@0 4193 being called by any of the worker threads in a work gang.
aoqi@0 4194 Examples include the concurrent marking code (CMMarkingTask),
aoqi@0 4195 the MT remark code, and the MT reference processing closures.
aoqi@0 4196
aoqi@0 4197 *****************************************************************************/
aoqi@0 4198
aoqi@0 4199 void CMTask::do_marking_step(double time_target_ms,
aoqi@0 4200 bool do_termination,
aoqi@0 4201 bool is_serial) {
aoqi@0 4202 assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
aoqi@0 4203 assert(concurrent() == _cm->concurrent(), "they should be the same");
aoqi@0 4204
aoqi@0 4205 G1CollectorPolicy* g1_policy = _g1h->g1_policy();
aoqi@0 4206 assert(_task_queues != NULL, "invariant");
aoqi@0 4207 assert(_task_queue != NULL, "invariant");
aoqi@0 4208 assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
aoqi@0 4209
aoqi@0 4210 assert(!_claimed,
aoqi@0 4211 "only one thread should claim this task at any one time");
aoqi@0 4212
aoqi@0 4213 // OK, this doesn't safeguard again all possible scenarios, as it is
aoqi@0 4214 // possible for two threads to set the _claimed flag at the same
aoqi@0 4215 // time. But it is only for debugging purposes anyway and it will
aoqi@0 4216 // catch most problems.
aoqi@0 4217 _claimed = true;
aoqi@0 4218
aoqi@0 4219 _start_time_ms = os::elapsedVTime() * 1000.0;
aoqi@0 4220 statsOnly( _interval_start_time_ms = _start_time_ms );
aoqi@0 4221
aoqi@0 4222 // If do_stealing is true then do_marking_step will attempt to
aoqi@0 4223 // steal work from the other CMTasks. It only makes sense to
aoqi@0 4224 // enable stealing when the termination protocol is enabled
aoqi@0 4225 // and do_marking_step() is not being called serially.
aoqi@0 4226 bool do_stealing = do_termination && !is_serial;
aoqi@0 4227
aoqi@0 4228 double diff_prediction_ms =
aoqi@0 4229 g1_policy->get_new_prediction(&_marking_step_diffs_ms);
aoqi@0 4230 _time_target_ms = time_target_ms - diff_prediction_ms;
aoqi@0 4231
aoqi@0 4232 // set up the variables that are used in the work-based scheme to
aoqi@0 4233 // call the regular clock method
aoqi@0 4234 _words_scanned = 0;
aoqi@0 4235 _refs_reached = 0;
aoqi@0 4236 recalculate_limits();
aoqi@0 4237
aoqi@0 4238 // clear all flags
aoqi@0 4239 clear_has_aborted();
aoqi@0 4240 _has_timed_out = false;
aoqi@0 4241 _draining_satb_buffers = false;
aoqi@0 4242
aoqi@0 4243 ++_calls;
aoqi@0 4244
aoqi@0 4245 if (_cm->verbose_low()) {
aoqi@0 4246 gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
aoqi@0 4247 "target = %1.2lfms >>>>>>>>>>",
aoqi@0 4248 _worker_id, _calls, _time_target_ms);
aoqi@0 4249 }
aoqi@0 4250
aoqi@0 4251 // Set up the bitmap and oop closures. Anything that uses them is
aoqi@0 4252 // eventually called from this method, so it is OK to allocate these
aoqi@0 4253 // statically.
aoqi@0 4254 CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
aoqi@0 4255 G1CMOopClosure cm_oop_closure(_g1h, _cm, this);
aoqi@0 4256 set_cm_oop_closure(&cm_oop_closure);
aoqi@0 4257
aoqi@0 4258 if (_cm->has_overflown()) {
aoqi@0 4259 // This can happen if the mark stack overflows during a GC pause
aoqi@0 4260 // and this task, after a yield point, restarts. We have to abort
aoqi@0 4261 // as we need to get into the overflow protocol which happens
aoqi@0 4262 // right at the end of this task.
aoqi@0 4263 set_has_aborted();
aoqi@0 4264 }
aoqi@0 4265
aoqi@0 4266 // First drain any available SATB buffers. After this, we will not
aoqi@0 4267 // look at SATB buffers before the next invocation of this method.
aoqi@0 4268 // If enough completed SATB buffers are queued up, the regular clock
aoqi@0 4269 // will abort this task so that it restarts.
aoqi@0 4270 drain_satb_buffers();
aoqi@0 4271 // ...then partially drain the local queue and the global stack
aoqi@0 4272 drain_local_queue(true);
aoqi@0 4273 drain_global_stack(true);
aoqi@0 4274
aoqi@0 4275 do {
aoqi@0 4276 if (!has_aborted() && _curr_region != NULL) {
aoqi@0 4277 // This means that we're already holding on to a region.
aoqi@0 4278 assert(_finger != NULL, "if region is not NULL, then the finger "
aoqi@0 4279 "should not be NULL either");
aoqi@0 4280
aoqi@0 4281 // We might have restarted this task after an evacuation pause
aoqi@0 4282 // which might have evacuated the region we're holding on to
aoqi@0 4283 // underneath our feet. Let's read its limit again to make sure
aoqi@0 4284 // that we do not iterate over a region of the heap that
aoqi@0 4285 // contains garbage (update_region_limit() will also move
aoqi@0 4286 // _finger to the start of the region if it is found empty).
aoqi@0 4287 update_region_limit();
aoqi@0 4288 // We will start from _finger not from the start of the region,
aoqi@0 4289 // as we might be restarting this task after aborting half-way
aoqi@0 4290 // through scanning this region. In this case, _finger points to
aoqi@0 4291 // the address where we last found a marked object. If this is a
aoqi@0 4292 // fresh region, _finger points to start().
aoqi@0 4293 MemRegion mr = MemRegion(_finger, _region_limit);
aoqi@0 4294
aoqi@0 4295 if (_cm->verbose_low()) {
aoqi@0 4296 gclog_or_tty->print_cr("[%u] we're scanning part "
aoqi@0 4297 "["PTR_FORMAT", "PTR_FORMAT") "
aoqi@0 4298 "of region "HR_FORMAT,
aoqi@0 4299 _worker_id, p2i(_finger), p2i(_region_limit),
aoqi@0 4300 HR_FORMAT_PARAMS(_curr_region));
aoqi@0 4301 }
aoqi@0 4302
aoqi@0 4303 assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
aoqi@0 4304 "humongous regions should go around loop once only");
aoqi@0 4305
aoqi@0 4306 // Some special cases:
aoqi@0 4307 // If the memory region is empty, we can just give up the region.
aoqi@0 4308 // If the current region is humongous then we only need to check
aoqi@0 4309 // the bitmap for the bit associated with the start of the object,
aoqi@0 4310 // scan the object if it's live, and give up the region.
aoqi@0 4311 // Otherwise, let's iterate over the bitmap of the part of the region
aoqi@0 4312 // that is left.
aoqi@0 4313 // If the iteration is successful, give up the region.
aoqi@0 4314 if (mr.is_empty()) {
aoqi@0 4315 giveup_current_region();
aoqi@0 4316 regular_clock_call();
aoqi@0 4317 } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
aoqi@0 4318 if (_nextMarkBitMap->isMarked(mr.start())) {
aoqi@0 4319 // The object is marked - apply the closure
aoqi@0 4320 BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
aoqi@0 4321 bitmap_closure.do_bit(offset);
aoqi@0 4322 }
aoqi@0 4323 // Even if this task aborted while scanning the humongous object
aoqi@0 4324 // we can (and should) give up the current region.
aoqi@0 4325 giveup_current_region();
aoqi@0 4326 regular_clock_call();
aoqi@0 4327 } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
aoqi@0 4328 giveup_current_region();
aoqi@0 4329 regular_clock_call();
aoqi@0 4330 } else {
aoqi@0 4331 assert(has_aborted(), "currently the only way to do so");
aoqi@0 4332 // The only way to abort the bitmap iteration is to return
aoqi@0 4333 // false from the do_bit() method. However, inside the
aoqi@0 4334 // do_bit() method we move the _finger to point to the
aoqi@0 4335 // object currently being looked at. So, if we bail out, we
aoqi@0 4336 // have definitely set _finger to something non-null.
aoqi@0 4337 assert(_finger != NULL, "invariant");
aoqi@0 4338
aoqi@0 4339 // Region iteration was actually aborted. So now _finger
aoqi@0 4340 // points to the address of the object we last scanned. If we
aoqi@0 4341 // leave it there, when we restart this task, we will rescan
aoqi@0 4342 // the object. It is easy to avoid this. We move the finger by
aoqi@0 4343 // enough to point to the next possible object header (the
aoqi@0 4344 // bitmap knows by how much we need to move it as it knows its
aoqi@0 4345 // granularity).
aoqi@0 4346 assert(_finger < _region_limit, "invariant");
aoqi@0 4347 HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
aoqi@0 4348 // Check if bitmap iteration was aborted while scanning the last object
aoqi@0 4349 if (new_finger >= _region_limit) {
aoqi@0 4350 giveup_current_region();
aoqi@0 4351 } else {
aoqi@0 4352 move_finger_to(new_finger);
aoqi@0 4353 }
aoqi@0 4354 }
aoqi@0 4355 }
aoqi@0 4356 // At this point we have either completed iterating over the
aoqi@0 4357 // region we were holding on to, or we have aborted.
aoqi@0 4358
aoqi@0 4359 // We then partially drain the local queue and the global stack.
aoqi@0 4360 // (Do we really need this?)
aoqi@0 4361 drain_local_queue(true);
aoqi@0 4362 drain_global_stack(true);
aoqi@0 4363
aoqi@0 4364 // Read the note on the claim_region() method on why it might
aoqi@0 4365 // return NULL with potentially more regions available for
aoqi@0 4366 // claiming and why we have to check out_of_regions() to determine
aoqi@0 4367 // whether we're done or not.
aoqi@0 4368 while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
aoqi@0 4369 // We are going to try to claim a new region. We should have
aoqi@0 4370 // given up on the previous one.
aoqi@0 4371 // Separated the asserts so that we know which one fires.
aoqi@0 4372 assert(_curr_region == NULL, "invariant");
aoqi@0 4373 assert(_finger == NULL, "invariant");
aoqi@0 4374 assert(_region_limit == NULL, "invariant");
aoqi@0 4375 if (_cm->verbose_low()) {
aoqi@0 4376 gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
aoqi@0 4377 }
aoqi@0 4378 HeapRegion* claimed_region = _cm->claim_region(_worker_id);
aoqi@0 4379 if (claimed_region != NULL) {
aoqi@0 4380 // Yes, we managed to claim one
aoqi@0 4381 statsOnly( ++_regions_claimed );
aoqi@0 4382
aoqi@0 4383 if (_cm->verbose_low()) {
aoqi@0 4384 gclog_or_tty->print_cr("[%u] we successfully claimed "
aoqi@0 4385 "region "PTR_FORMAT,
aoqi@0 4386 _worker_id, p2i(claimed_region));
aoqi@0 4387 }
aoqi@0 4388
aoqi@0 4389 setup_for_region(claimed_region);
aoqi@0 4390 assert(_curr_region == claimed_region, "invariant");
aoqi@0 4391 }
aoqi@0 4392 // It is important to call the regular clock here. It might take
aoqi@0 4393 // a while to claim a region if, for example, we hit a large
aoqi@0 4394 // block of empty regions. So we need to call the regular clock
aoqi@0 4395 // method once round the loop to make sure it's called
aoqi@0 4396 // frequently enough.
aoqi@0 4397 regular_clock_call();
aoqi@0 4398 }
aoqi@0 4399
aoqi@0 4400 if (!has_aborted() && _curr_region == NULL) {
aoqi@0 4401 assert(_cm->out_of_regions(),
aoqi@0 4402 "at this point we should be out of regions");
aoqi@0 4403 }
aoqi@0 4404 } while ( _curr_region != NULL && !has_aborted());
aoqi@0 4405
aoqi@0 4406 if (!has_aborted()) {
aoqi@0 4407 // We cannot check whether the global stack is empty, since other
aoqi@0 4408 // tasks might be pushing objects to it concurrently.
aoqi@0 4409 assert(_cm->out_of_regions(),
aoqi@0 4410 "at this point we should be out of regions");
aoqi@0 4411
aoqi@0 4412 if (_cm->verbose_low()) {
aoqi@0 4413 gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
aoqi@0 4414 }
aoqi@0 4415
aoqi@0 4416 // Try to reduce the number of available SATB buffers so that
aoqi@0 4417 // remark has less work to do.
aoqi@0 4418 drain_satb_buffers();
aoqi@0 4419 }
aoqi@0 4420
aoqi@0 4421 // Since we've done everything else, we can now totally drain the
aoqi@0 4422 // local queue and global stack.
aoqi@0 4423 drain_local_queue(false);
aoqi@0 4424 drain_global_stack(false);
aoqi@0 4425
aoqi@0 4426 // Attempt at work stealing from other task's queues.
aoqi@0 4427 if (do_stealing && !has_aborted()) {
aoqi@0 4428 // We have not aborted. This means that we have finished all that
aoqi@0 4429 // we could. Let's try to do some stealing...
aoqi@0 4430
aoqi@0 4431 // We cannot check whether the global stack is empty, since other
aoqi@0 4432 // tasks might be pushing objects to it concurrently.
aoqi@0 4433 assert(_cm->out_of_regions() && _task_queue->size() == 0,
aoqi@0 4434 "only way to reach here");
aoqi@0 4435
aoqi@0 4436 if (_cm->verbose_low()) {
aoqi@0 4437 gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
aoqi@0 4438 }
aoqi@0 4439
aoqi@0 4440 while (!has_aborted()) {
aoqi@0 4441 oop obj;
aoqi@0 4442 statsOnly( ++_steal_attempts );
aoqi@0 4443
aoqi@0 4444 if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
aoqi@0 4445 if (_cm->verbose_medium()) {
aoqi@0 4446 gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
aoqi@0 4447 _worker_id, p2i((void*) obj));
aoqi@0 4448 }
aoqi@0 4449
aoqi@0 4450 statsOnly( ++_steals );
aoqi@0 4451
aoqi@0 4452 assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
aoqi@0 4453 "any stolen object should be marked");
aoqi@0 4454 scan_object(obj);
aoqi@0 4455
aoqi@0 4456 // And since we're towards the end, let's totally drain the
aoqi@0 4457 // local queue and global stack.
aoqi@0 4458 drain_local_queue(false);
aoqi@0 4459 drain_global_stack(false);
aoqi@0 4460 } else {
aoqi@0 4461 break;
aoqi@0 4462 }
aoqi@0 4463 }
aoqi@0 4464 }
aoqi@0 4465
aoqi@0 4466 // If we are about to wrap up and go into termination, check if we
aoqi@0 4467 // should raise the overflow flag.
aoqi@0 4468 if (do_termination && !has_aborted()) {
aoqi@0 4469 if (_cm->force_overflow()->should_force()) {
aoqi@0 4470 _cm->set_has_overflown();
aoqi@0 4471 regular_clock_call();
aoqi@0 4472 }
aoqi@0 4473 }
aoqi@0 4474
aoqi@0 4475 // We still haven't aborted. Now, let's try to get into the
aoqi@0 4476 // termination protocol.
aoqi@0 4477 if (do_termination && !has_aborted()) {
aoqi@0 4478 // We cannot check whether the global stack is empty, since other
aoqi@0 4479 // tasks might be concurrently pushing objects on it.
aoqi@0 4480 // Separated the asserts so that we know which one fires.
aoqi@0 4481 assert(_cm->out_of_regions(), "only way to reach here");
aoqi@0 4482 assert(_task_queue->size() == 0, "only way to reach here");
aoqi@0 4483
aoqi@0 4484 if (_cm->verbose_low()) {
aoqi@0 4485 gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
aoqi@0 4486 }
aoqi@0 4487
aoqi@0 4488 _termination_start_time_ms = os::elapsedVTime() * 1000.0;
aoqi@0 4489
aoqi@0 4490 // The CMTask class also extends the TerminatorTerminator class,
aoqi@0 4491 // hence its should_exit_termination() method will also decide
aoqi@0 4492 // whether to exit the termination protocol or not.
aoqi@0 4493 bool finished = (is_serial ||
aoqi@0 4494 _cm->terminator()->offer_termination(this));
aoqi@0 4495 double termination_end_time_ms = os::elapsedVTime() * 1000.0;
aoqi@0 4496 _termination_time_ms +=
aoqi@0 4497 termination_end_time_ms - _termination_start_time_ms;
aoqi@0 4498
aoqi@0 4499 if (finished) {
aoqi@0 4500 // We're all done.
aoqi@0 4501
aoqi@0 4502 if (_worker_id == 0) {
aoqi@0 4503 // let's allow task 0 to do this
aoqi@0 4504 if (concurrent()) {
aoqi@0 4505 assert(_cm->concurrent_marking_in_progress(), "invariant");
aoqi@0 4506 // we need to set this to false before the next
aoqi@0 4507 // safepoint. This way we ensure that the marking phase
aoqi@0 4508 // doesn't observe any more heap expansions.
aoqi@0 4509 _cm->clear_concurrent_marking_in_progress();
aoqi@0 4510 }
aoqi@0 4511 }
aoqi@0 4512
aoqi@0 4513 // We can now guarantee that the global stack is empty, since
aoqi@0 4514 // all other tasks have finished. We separated the guarantees so
aoqi@0 4515 // that, if a condition is false, we can immediately find out
aoqi@0 4516 // which one.
aoqi@0 4517 guarantee(_cm->out_of_regions(), "only way to reach here");
aoqi@0 4518 guarantee(_cm->mark_stack_empty(), "only way to reach here");
aoqi@0 4519 guarantee(_task_queue->size() == 0, "only way to reach here");
aoqi@0 4520 guarantee(!_cm->has_overflown(), "only way to reach here");
aoqi@0 4521 guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
aoqi@0 4522
aoqi@0 4523 if (_cm->verbose_low()) {
aoqi@0 4524 gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
aoqi@0 4525 }
aoqi@0 4526 } else {
aoqi@0 4527 // Apparently there's more work to do. Let's abort this task. It
aoqi@0 4528 // will restart it and we can hopefully find more things to do.
aoqi@0 4529
aoqi@0 4530 if (_cm->verbose_low()) {
aoqi@0 4531 gclog_or_tty->print_cr("[%u] apparently there is more work to do",
aoqi@0 4532 _worker_id);
aoqi@0 4533 }
aoqi@0 4534
aoqi@0 4535 set_has_aborted();
aoqi@0 4536 statsOnly( ++_aborted_termination );
aoqi@0 4537 }
aoqi@0 4538 }
aoqi@0 4539
aoqi@0 4540 // Mainly for debugging purposes to make sure that a pointer to the
aoqi@0 4541 // closure which was statically allocated in this frame doesn't
aoqi@0 4542 // escape it by accident.
aoqi@0 4543 set_cm_oop_closure(NULL);
aoqi@0 4544 double end_time_ms = os::elapsedVTime() * 1000.0;
aoqi@0 4545 double elapsed_time_ms = end_time_ms - _start_time_ms;
aoqi@0 4546 // Update the step history.
aoqi@0 4547 _step_times_ms.add(elapsed_time_ms);
aoqi@0 4548
aoqi@0 4549 if (has_aborted()) {
aoqi@0 4550 // The task was aborted for some reason.
aoqi@0 4551
aoqi@0 4552 statsOnly( ++_aborted );
aoqi@0 4553
aoqi@0 4554 if (_has_timed_out) {
aoqi@0 4555 double diff_ms = elapsed_time_ms - _time_target_ms;
aoqi@0 4556 // Keep statistics of how well we did with respect to hitting
aoqi@0 4557 // our target only if we actually timed out (if we aborted for
aoqi@0 4558 // other reasons, then the results might get skewed).
aoqi@0 4559 _marking_step_diffs_ms.add(diff_ms);
aoqi@0 4560 }
aoqi@0 4561
aoqi@0 4562 if (_cm->has_overflown()) {
aoqi@0 4563 // This is the interesting one. We aborted because a global
aoqi@0 4564 // overflow was raised. This means we have to restart the
aoqi@0 4565 // marking phase and start iterating over regions. However, in
aoqi@0 4566 // order to do this we have to make sure that all tasks stop
aoqi@0 4567 // what they are doing and re-initialise in a safe manner. We
aoqi@0 4568 // will achieve this with the use of two barrier sync points.
aoqi@0 4569
aoqi@0 4570 if (_cm->verbose_low()) {
aoqi@0 4571 gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
aoqi@0 4572 }
aoqi@0 4573
aoqi@0 4574 if (!is_serial) {
aoqi@0 4575 // We only need to enter the sync barrier if being called
aoqi@0 4576 // from a parallel context
aoqi@0 4577 _cm->enter_first_sync_barrier(_worker_id);
aoqi@0 4578
aoqi@0 4579 // When we exit this sync barrier we know that all tasks have
aoqi@0 4580 // stopped doing marking work. So, it's now safe to
aoqi@0 4581 // re-initialise our data structures. At the end of this method,
aoqi@0 4582 // task 0 will clear the global data structures.
aoqi@0 4583 }
aoqi@0 4584
aoqi@0 4585 statsOnly( ++_aborted_overflow );
aoqi@0 4586
aoqi@0 4587 // We clear the local state of this task...
aoqi@0 4588 clear_region_fields();
aoqi@0 4589
aoqi@0 4590 if (!is_serial) {
aoqi@0 4591 // ...and enter the second barrier.
aoqi@0 4592 _cm->enter_second_sync_barrier(_worker_id);
aoqi@0 4593 }
aoqi@0 4594 // At this point, if we're during the concurrent phase of
aoqi@0 4595 // marking, everything has been re-initialized and we're
aoqi@0 4596 // ready to restart.
aoqi@0 4597 }
aoqi@0 4598
aoqi@0 4599 if (_cm->verbose_low()) {
aoqi@0 4600 gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
aoqi@0 4601 "elapsed = %1.2lfms <<<<<<<<<<",
aoqi@0 4602 _worker_id, _time_target_ms, elapsed_time_ms);
aoqi@0 4603 if (_cm->has_aborted()) {
aoqi@0 4604 gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
aoqi@0 4605 _worker_id);
aoqi@0 4606 }
aoqi@0 4607 }
aoqi@0 4608 } else {
aoqi@0 4609 if (_cm->verbose_low()) {
aoqi@0 4610 gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
aoqi@0 4611 "elapsed = %1.2lfms <<<<<<<<<<",
aoqi@0 4612 _worker_id, _time_target_ms, elapsed_time_ms);
aoqi@0 4613 }
aoqi@0 4614 }
aoqi@0 4615
aoqi@0 4616 _claimed = false;
aoqi@0 4617 }
aoqi@0 4618
aoqi@0 4619 CMTask::CMTask(uint worker_id,
aoqi@0 4620 ConcurrentMark* cm,
aoqi@0 4621 size_t* marked_bytes,
aoqi@0 4622 BitMap* card_bm,
aoqi@0 4623 CMTaskQueue* task_queue,
aoqi@0 4624 CMTaskQueueSet* task_queues)
aoqi@0 4625 : _g1h(G1CollectedHeap::heap()),
aoqi@0 4626 _worker_id(worker_id), _cm(cm),
aoqi@0 4627 _claimed(false),
aoqi@0 4628 _nextMarkBitMap(NULL), _hash_seed(17),
aoqi@0 4629 _task_queue(task_queue),
aoqi@0 4630 _task_queues(task_queues),
aoqi@0 4631 _cm_oop_closure(NULL),
aoqi@0 4632 _marked_bytes_array(marked_bytes),
aoqi@0 4633 _card_bm(card_bm) {
aoqi@0 4634 guarantee(task_queue != NULL, "invariant");
aoqi@0 4635 guarantee(task_queues != NULL, "invariant");
aoqi@0 4636
aoqi@0 4637 statsOnly( _clock_due_to_scanning = 0;
aoqi@0 4638 _clock_due_to_marking = 0 );
aoqi@0 4639
aoqi@0 4640 _marking_step_diffs_ms.add(0.5);
aoqi@0 4641 }
aoqi@0 4642
aoqi@0 4643 // These are formatting macros that are used below to ensure
aoqi@0 4644 // consistent formatting. The *_H_* versions are used to format the
aoqi@0 4645 // header for a particular value and they should be kept consistent
aoqi@0 4646 // with the corresponding macro. Also note that most of the macros add
aoqi@0 4647 // the necessary white space (as a prefix) which makes them a bit
aoqi@0 4648 // easier to compose.
aoqi@0 4649
aoqi@0 4650 // All the output lines are prefixed with this string to be able to
aoqi@0 4651 // identify them easily in a large log file.
aoqi@0 4652 #define G1PPRL_LINE_PREFIX "###"
aoqi@0 4653
aoqi@0 4654 #define G1PPRL_ADDR_BASE_FORMAT " "PTR_FORMAT"-"PTR_FORMAT
aoqi@0 4655 #ifdef _LP64
aoqi@0 4656 #define G1PPRL_ADDR_BASE_H_FORMAT " %37s"
aoqi@0 4657 #else // _LP64
aoqi@0 4658 #define G1PPRL_ADDR_BASE_H_FORMAT " %21s"
aoqi@0 4659 #endif // _LP64
aoqi@0 4660
aoqi@0 4661 // For per-region info
aoqi@0 4662 #define G1PPRL_TYPE_FORMAT " %-4s"
aoqi@0 4663 #define G1PPRL_TYPE_H_FORMAT " %4s"
aoqi@0 4664 #define G1PPRL_BYTE_FORMAT " "SIZE_FORMAT_W(9)
aoqi@0 4665 #define G1PPRL_BYTE_H_FORMAT " %9s"
aoqi@0 4666 #define G1PPRL_DOUBLE_FORMAT " %14.1f"
aoqi@0 4667 #define G1PPRL_DOUBLE_H_FORMAT " %14s"
aoqi@0 4668
aoqi@0 4669 // For summary info
aoqi@0 4670 #define G1PPRL_SUM_ADDR_FORMAT(tag) " "tag":"G1PPRL_ADDR_BASE_FORMAT
aoqi@0 4671 #define G1PPRL_SUM_BYTE_FORMAT(tag) " "tag": "SIZE_FORMAT
aoqi@0 4672 #define G1PPRL_SUM_MB_FORMAT(tag) " "tag": %1.2f MB"
aoqi@0 4673 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
aoqi@0 4674
aoqi@0 4675 G1PrintRegionLivenessInfoClosure::
aoqi@0 4676 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
aoqi@0 4677 : _out(out),
aoqi@0 4678 _total_used_bytes(0), _total_capacity_bytes(0),
aoqi@0 4679 _total_prev_live_bytes(0), _total_next_live_bytes(0),
aoqi@0 4680 _hum_used_bytes(0), _hum_capacity_bytes(0),
aoqi@0 4681 _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
aoqi@0 4682 _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
aoqi@0 4683 G1CollectedHeap* g1h = G1CollectedHeap::heap();
aoqi@0 4684 MemRegion g1_reserved = g1h->g1_reserved();
aoqi@0 4685 double now = os::elapsedTime();
aoqi@0 4686
aoqi@0 4687 // Print the header of the output.
aoqi@0 4688 _out->cr();
aoqi@0 4689 _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
aoqi@0 4690 _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
aoqi@0 4691 G1PPRL_SUM_ADDR_FORMAT("reserved")
aoqi@0 4692 G1PPRL_SUM_BYTE_FORMAT("region-size"),
aoqi@0 4693 p2i(g1_reserved.start()), p2i(g1_reserved.end()),
aoqi@0 4694 HeapRegion::GrainBytes);
aoqi@0 4695 _out->print_cr(G1PPRL_LINE_PREFIX);
aoqi@0 4696 _out->print_cr(G1PPRL_LINE_PREFIX
aoqi@0 4697 G1PPRL_TYPE_H_FORMAT
aoqi@0 4698 G1PPRL_ADDR_BASE_H_FORMAT
aoqi@0 4699 G1PPRL_BYTE_H_FORMAT
aoqi@0 4700 G1PPRL_BYTE_H_FORMAT
aoqi@0 4701 G1PPRL_BYTE_H_FORMAT
aoqi@0 4702 G1PPRL_DOUBLE_H_FORMAT
aoqi@0 4703 G1PPRL_BYTE_H_FORMAT
aoqi@0 4704 G1PPRL_BYTE_H_FORMAT,
aoqi@0 4705 "type", "address-range",
aoqi@0 4706 "used", "prev-live", "next-live", "gc-eff",
aoqi@0 4707 "remset", "code-roots");
aoqi@0 4708 _out->print_cr(G1PPRL_LINE_PREFIX
aoqi@0 4709 G1PPRL_TYPE_H_FORMAT
aoqi@0 4710 G1PPRL_ADDR_BASE_H_FORMAT
aoqi@0 4711 G1PPRL_BYTE_H_FORMAT
aoqi@0 4712 G1PPRL_BYTE_H_FORMAT
aoqi@0 4713 G1PPRL_BYTE_H_FORMAT
aoqi@0 4714 G1PPRL_DOUBLE_H_FORMAT
aoqi@0 4715 G1PPRL_BYTE_H_FORMAT
aoqi@0 4716 G1PPRL_BYTE_H_FORMAT,
aoqi@0 4717 "", "",
aoqi@0 4718 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
aoqi@0 4719 "(bytes)", "(bytes)");
aoqi@0 4720 }
aoqi@0 4721
aoqi@0 4722 // It takes as a parameter a reference to one of the _hum_* fields, it
aoqi@0 4723 // deduces the corresponding value for a region in a humongous region
aoqi@0 4724 // series (either the region size, or what's left if the _hum_* field
aoqi@0 4725 // is < the region size), and updates the _hum_* field accordingly.
aoqi@0 4726 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
aoqi@0 4727 size_t bytes = 0;
aoqi@0 4728 // The > 0 check is to deal with the prev and next live bytes which
aoqi@0 4729 // could be 0.
aoqi@0 4730 if (*hum_bytes > 0) {
aoqi@0 4731 bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
aoqi@0 4732 *hum_bytes -= bytes;
aoqi@0 4733 }
aoqi@0 4734 return bytes;
aoqi@0 4735 }
aoqi@0 4736
aoqi@0 4737 // It deduces the values for a region in a humongous region series
aoqi@0 4738 // from the _hum_* fields and updates those accordingly. It assumes
aoqi@0 4739 // that that _hum_* fields have already been set up from the "starts
aoqi@0 4740 // humongous" region and we visit the regions in address order.
aoqi@0 4741 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
aoqi@0 4742 size_t* capacity_bytes,
aoqi@0 4743 size_t* prev_live_bytes,
aoqi@0 4744 size_t* next_live_bytes) {
aoqi@0 4745 assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
aoqi@0 4746 *used_bytes = get_hum_bytes(&_hum_used_bytes);
aoqi@0 4747 *capacity_bytes = get_hum_bytes(&_hum_capacity_bytes);
aoqi@0 4748 *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
aoqi@0 4749 *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
aoqi@0 4750 }
aoqi@0 4751
aoqi@0 4752 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
brutisso@7195 4753 const char* type = r->get_type_str();
aoqi@0 4754 HeapWord* bottom = r->bottom();
aoqi@0 4755 HeapWord* end = r->end();
aoqi@0 4756 size_t capacity_bytes = r->capacity();
aoqi@0 4757 size_t used_bytes = r->used();
aoqi@0 4758 size_t prev_live_bytes = r->live_bytes();
aoqi@0 4759 size_t next_live_bytes = r->next_live_bytes();
aoqi@0 4760 double gc_eff = r->gc_efficiency();
aoqi@0 4761 size_t remset_bytes = r->rem_set()->mem_size();
aoqi@0 4762 size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
aoqi@0 4763
brutisso@7195 4764 if (r->startsHumongous()) {
aoqi@0 4765 assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
aoqi@0 4766 _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
aoqi@0 4767 "they should have been zeroed after the last time we used them");
aoqi@0 4768 // Set up the _hum_* fields.
aoqi@0 4769 _hum_capacity_bytes = capacity_bytes;
aoqi@0 4770 _hum_used_bytes = used_bytes;
aoqi@0 4771 _hum_prev_live_bytes = prev_live_bytes;
aoqi@0 4772 _hum_next_live_bytes = next_live_bytes;
aoqi@0 4773 get_hum_bytes(&used_bytes, &capacity_bytes,
aoqi@0 4774 &prev_live_bytes, &next_live_bytes);
aoqi@0 4775 end = bottom + HeapRegion::GrainWords;
aoqi@0 4776 } else if (r->continuesHumongous()) {
aoqi@0 4777 get_hum_bytes(&used_bytes, &capacity_bytes,
aoqi@0 4778 &prev_live_bytes, &next_live_bytes);
aoqi@0 4779 assert(end == bottom + HeapRegion::GrainWords, "invariant");
aoqi@0 4780 }
aoqi@0 4781
aoqi@0 4782 _total_used_bytes += used_bytes;
aoqi@0 4783 _total_capacity_bytes += capacity_bytes;
aoqi@0 4784 _total_prev_live_bytes += prev_live_bytes;
aoqi@0 4785 _total_next_live_bytes += next_live_bytes;
aoqi@0 4786 _total_remset_bytes += remset_bytes;
aoqi@0 4787 _total_strong_code_roots_bytes += strong_code_roots_bytes;
aoqi@0 4788
aoqi@0 4789 // Print a line for this particular region.
aoqi@0 4790 _out->print_cr(G1PPRL_LINE_PREFIX
aoqi@0 4791 G1PPRL_TYPE_FORMAT
aoqi@0 4792 G1PPRL_ADDR_BASE_FORMAT
aoqi@0 4793 G1PPRL_BYTE_FORMAT
aoqi@0 4794 G1PPRL_BYTE_FORMAT
aoqi@0 4795 G1PPRL_BYTE_FORMAT
aoqi@0 4796 G1PPRL_DOUBLE_FORMAT
aoqi@0 4797 G1PPRL_BYTE_FORMAT
aoqi@0 4798 G1PPRL_BYTE_FORMAT,
aoqi@0 4799 type, p2i(bottom), p2i(end),
aoqi@0 4800 used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
aoqi@0 4801 remset_bytes, strong_code_roots_bytes);
aoqi@0 4802
aoqi@0 4803 return false;
aoqi@0 4804 }
aoqi@0 4805
aoqi@0 4806 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
aoqi@0 4807 // add static memory usages to remembered set sizes
aoqi@0 4808 _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
aoqi@0 4809 // Print the footer of the output.
aoqi@0 4810 _out->print_cr(G1PPRL_LINE_PREFIX);
aoqi@0 4811 _out->print_cr(G1PPRL_LINE_PREFIX
aoqi@0 4812 " SUMMARY"
aoqi@0 4813 G1PPRL_SUM_MB_FORMAT("capacity")
aoqi@0 4814 G1PPRL_SUM_MB_PERC_FORMAT("used")
aoqi@0 4815 G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
aoqi@0 4816 G1PPRL_SUM_MB_PERC_FORMAT("next-live")
aoqi@0 4817 G1PPRL_SUM_MB_FORMAT("remset")
aoqi@0 4818 G1PPRL_SUM_MB_FORMAT("code-roots"),
aoqi@0 4819 bytes_to_mb(_total_capacity_bytes),
aoqi@0 4820 bytes_to_mb(_total_used_bytes),
aoqi@0 4821 perc(_total_used_bytes, _total_capacity_bytes),
aoqi@0 4822 bytes_to_mb(_total_prev_live_bytes),
aoqi@0 4823 perc(_total_prev_live_bytes, _total_capacity_bytes),
aoqi@0 4824 bytes_to_mb(_total_next_live_bytes),
aoqi@0 4825 perc(_total_next_live_bytes, _total_capacity_bytes),
aoqi@0 4826 bytes_to_mb(_total_remset_bytes),
aoqi@0 4827 bytes_to_mb(_total_strong_code_roots_bytes));
aoqi@0 4828 _out->cr();
aoqi@0 4829 }

mercurial