1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp Thu Jun 05 15:57:56 2008 -0700 1.3 @@ -0,0 +1,3957 @@ 1.4 +/* 1.5 + * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. 1.6 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 1.7 + * 1.8 + * This code is free software; you can redistribute it and/or modify it 1.9 + * under the terms of the GNU General Public License version 2 only, as 1.10 + * published by the Free Software Foundation. 1.11 + * 1.12 + * This code is distributed in the hope that it will be useful, but WITHOUT 1.13 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 1.14 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1.15 + * version 2 for more details (a copy is included in the LICENSE file that 1.16 + * accompanied this code). 1.17 + * 1.18 + * You should have received a copy of the GNU General Public License version 1.19 + * 2 along with this work; if not, write to the Free Software Foundation, 1.20 + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 1.21 + * 1.22 + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, 1.23 + * CA 95054 USA or visit www.sun.com if you need additional information or 1.24 + * have any questions. 1.25 + * 1.26 + */ 1.27 + 1.28 +#include "incls/_precompiled.incl" 1.29 +#include "incls/_concurrentMark.cpp.incl" 1.30 + 1.31 +// 1.32 +// CMS Bit Map Wrapper 1.33 + 1.34 +CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): 1.35 + _bm((uintptr_t*)NULL,0), 1.36 + _shifter(shifter) { 1.37 + _bmStartWord = (HeapWord*)(rs.base()); 1.38 + _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes 1.39 + ReservedSpace brs(ReservedSpace::allocation_align_size_up( 1.40 + (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); 1.41 + 1.42 + guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); 1.43 + // For now we'll just commit all of the bit map up fromt. 1.44 + // Later on we'll try to be more parsimonious with swap. 1.45 + guarantee(_virtual_space.initialize(brs, brs.size()), 1.46 + "couldn't reseve backing store for CMS bit map"); 1.47 + assert(_virtual_space.committed_size() == brs.size(), 1.48 + "didn't reserve backing store for all of CMS bit map?"); 1.49 + _bm.set_map((uintptr_t*)_virtual_space.low()); 1.50 + assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= 1.51 + _bmWordSize, "inconsistency in bit map sizing"); 1.52 + _bm.set_size(_bmWordSize >> _shifter); 1.53 +} 1.54 + 1.55 +HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, 1.56 + HeapWord* limit) const { 1.57 + // First we must round addr *up* to a possible object boundary. 1.58 + addr = (HeapWord*)align_size_up((intptr_t)addr, 1.59 + HeapWordSize << _shifter); 1.60 + size_t addrOffset = heapWordToOffset(addr); 1.61 + if (limit == NULL) limit = _bmStartWord + _bmWordSize; 1.62 + size_t limitOffset = heapWordToOffset(limit); 1.63 + size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); 1.64 + HeapWord* nextAddr = offsetToHeapWord(nextOffset); 1.65 + assert(nextAddr >= addr, "get_next_one postcondition"); 1.66 + assert(nextAddr == limit || isMarked(nextAddr), 1.67 + "get_next_one postcondition"); 1.68 + return nextAddr; 1.69 +} 1.70 + 1.71 +HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, 1.72 + HeapWord* limit) const { 1.73 + size_t addrOffset = heapWordToOffset(addr); 1.74 + if (limit == NULL) limit = _bmStartWord + _bmWordSize; 1.75 + size_t limitOffset = heapWordToOffset(limit); 1.76 + size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); 1.77 + HeapWord* nextAddr = offsetToHeapWord(nextOffset); 1.78 + assert(nextAddr >= addr, "get_next_one postcondition"); 1.79 + assert(nextAddr == limit || !isMarked(nextAddr), 1.80 + "get_next_one postcondition"); 1.81 + return nextAddr; 1.82 +} 1.83 + 1.84 +int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { 1.85 + assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); 1.86 + return (int) (diff >> _shifter); 1.87 +} 1.88 + 1.89 +bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { 1.90 + HeapWord* left = MAX2(_bmStartWord, mr.start()); 1.91 + HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); 1.92 + if (right > left) { 1.93 + // Right-open interval [leftOffset, rightOffset). 1.94 + return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); 1.95 + } else { 1.96 + return true; 1.97 + } 1.98 +} 1.99 + 1.100 +void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, 1.101 + size_t from_start_index, 1.102 + HeapWord* to_start_word, 1.103 + size_t word_num) { 1.104 + _bm.mostly_disjoint_range_union(from_bitmap, 1.105 + from_start_index, 1.106 + heapWordToOffset(to_start_word), 1.107 + word_num); 1.108 +} 1.109 + 1.110 +#ifndef PRODUCT 1.111 +bool CMBitMapRO::covers(ReservedSpace rs) const { 1.112 + // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); 1.113 + assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, 1.114 + "size inconsistency"); 1.115 + return _bmStartWord == (HeapWord*)(rs.base()) && 1.116 + _bmWordSize == rs.size()>>LogHeapWordSize; 1.117 +} 1.118 +#endif 1.119 + 1.120 +void CMBitMap::clearAll() { 1.121 + _bm.clear(); 1.122 + return; 1.123 +} 1.124 + 1.125 +void CMBitMap::markRange(MemRegion mr) { 1.126 + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 1.127 + assert(!mr.is_empty(), "unexpected empty region"); 1.128 + assert((offsetToHeapWord(heapWordToOffset(mr.end())) == 1.129 + ((HeapWord *) mr.end())), 1.130 + "markRange memory region end is not card aligned"); 1.131 + // convert address range into offset range 1.132 + _bm.at_put_range(heapWordToOffset(mr.start()), 1.133 + heapWordToOffset(mr.end()), true); 1.134 +} 1.135 + 1.136 +void CMBitMap::clearRange(MemRegion mr) { 1.137 + mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); 1.138 + assert(!mr.is_empty(), "unexpected empty region"); 1.139 + // convert address range into offset range 1.140 + _bm.at_put_range(heapWordToOffset(mr.start()), 1.141 + heapWordToOffset(mr.end()), false); 1.142 +} 1.143 + 1.144 +MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, 1.145 + HeapWord* end_addr) { 1.146 + HeapWord* start = getNextMarkedWordAddress(addr); 1.147 + start = MIN2(start, end_addr); 1.148 + HeapWord* end = getNextUnmarkedWordAddress(start); 1.149 + end = MIN2(end, end_addr); 1.150 + assert(start <= end, "Consistency check"); 1.151 + MemRegion mr(start, end); 1.152 + if (!mr.is_empty()) { 1.153 + clearRange(mr); 1.154 + } 1.155 + return mr; 1.156 +} 1.157 + 1.158 +CMMarkStack::CMMarkStack(ConcurrentMark* cm) : 1.159 + _base(NULL), _cm(cm) 1.160 +#ifdef ASSERT 1.161 + , _drain_in_progress(false) 1.162 + , _drain_in_progress_yields(false) 1.163 +#endif 1.164 +{} 1.165 + 1.166 +void CMMarkStack::allocate(size_t size) { 1.167 + _base = NEW_C_HEAP_ARRAY(oop, size); 1.168 + if (_base == NULL) 1.169 + vm_exit_during_initialization("Failed to allocate " 1.170 + "CM region mark stack"); 1.171 + _index = 0; 1.172 + // QQQQ cast ... 1.173 + _capacity = (jint) size; 1.174 + _oops_do_bound = -1; 1.175 + NOT_PRODUCT(_max_depth = 0); 1.176 +} 1.177 + 1.178 +CMMarkStack::~CMMarkStack() { 1.179 + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 1.180 +} 1.181 + 1.182 +void CMMarkStack::par_push(oop ptr) { 1.183 + while (true) { 1.184 + if (isFull()) { 1.185 + _overflow = true; 1.186 + return; 1.187 + } 1.188 + // Otherwise... 1.189 + jint index = _index; 1.190 + jint next_index = index+1; 1.191 + jint res = Atomic::cmpxchg(next_index, &_index, index); 1.192 + if (res == index) { 1.193 + _base[index] = ptr; 1.194 + // Note that we don't maintain this atomically. We could, but it 1.195 + // doesn't seem necessary. 1.196 + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 1.197 + return; 1.198 + } 1.199 + // Otherwise, we need to try again. 1.200 + } 1.201 +} 1.202 + 1.203 +void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { 1.204 + while (true) { 1.205 + if (isFull()) { 1.206 + _overflow = true; 1.207 + return; 1.208 + } 1.209 + // Otherwise... 1.210 + jint index = _index; 1.211 + jint next_index = index + n; 1.212 + if (next_index > _capacity) { 1.213 + _overflow = true; 1.214 + return; 1.215 + } 1.216 + jint res = Atomic::cmpxchg(next_index, &_index, index); 1.217 + if (res == index) { 1.218 + for (int i = 0; i < n; i++) { 1.219 + int ind = index + i; 1.220 + assert(ind < _capacity, "By overflow test above."); 1.221 + _base[ind] = ptr_arr[i]; 1.222 + } 1.223 + NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); 1.224 + return; 1.225 + } 1.226 + // Otherwise, we need to try again. 1.227 + } 1.228 +} 1.229 + 1.230 + 1.231 +void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { 1.232 + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1.233 + jint start = _index; 1.234 + jint next_index = start + n; 1.235 + if (next_index > _capacity) { 1.236 + _overflow = true; 1.237 + return; 1.238 + } 1.239 + // Otherwise. 1.240 + _index = next_index; 1.241 + for (int i = 0; i < n; i++) { 1.242 + int ind = start + i; 1.243 + guarantee(ind < _capacity, "By overflow test above."); 1.244 + _base[ind] = ptr_arr[i]; 1.245 + } 1.246 +} 1.247 + 1.248 + 1.249 +bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { 1.250 + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1.251 + jint index = _index; 1.252 + if (index == 0) { 1.253 + *n = 0; 1.254 + return false; 1.255 + } else { 1.256 + int k = MIN2(max, index); 1.257 + jint new_ind = index - k; 1.258 + for (int j = 0; j < k; j++) { 1.259 + ptr_arr[j] = _base[new_ind + j]; 1.260 + } 1.261 + _index = new_ind; 1.262 + *n = k; 1.263 + return true; 1.264 + } 1.265 +} 1.266 + 1.267 + 1.268 +CMRegionStack::CMRegionStack() : _base(NULL) {} 1.269 + 1.270 +void CMRegionStack::allocate(size_t size) { 1.271 + _base = NEW_C_HEAP_ARRAY(MemRegion, size); 1.272 + if (_base == NULL) 1.273 + vm_exit_during_initialization("Failed to allocate " 1.274 + "CM region mark stack"); 1.275 + _index = 0; 1.276 + // QQQQ cast ... 1.277 + _capacity = (jint) size; 1.278 +} 1.279 + 1.280 +CMRegionStack::~CMRegionStack() { 1.281 + if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); 1.282 +} 1.283 + 1.284 +void CMRegionStack::push(MemRegion mr) { 1.285 + assert(mr.word_size() > 0, "Precondition"); 1.286 + while (true) { 1.287 + if (isFull()) { 1.288 + _overflow = true; 1.289 + return; 1.290 + } 1.291 + // Otherwise... 1.292 + jint index = _index; 1.293 + jint next_index = index+1; 1.294 + jint res = Atomic::cmpxchg(next_index, &_index, index); 1.295 + if (res == index) { 1.296 + _base[index] = mr; 1.297 + return; 1.298 + } 1.299 + // Otherwise, we need to try again. 1.300 + } 1.301 +} 1.302 + 1.303 +MemRegion CMRegionStack::pop() { 1.304 + while (true) { 1.305 + // Otherwise... 1.306 + jint index = _index; 1.307 + 1.308 + if (index == 0) { 1.309 + return MemRegion(); 1.310 + } 1.311 + jint next_index = index-1; 1.312 + jint res = Atomic::cmpxchg(next_index, &_index, index); 1.313 + if (res == index) { 1.314 + MemRegion mr = _base[next_index]; 1.315 + if (mr.start() != NULL) { 1.316 + tmp_guarantee_CM( mr.end() != NULL, "invariant" ); 1.317 + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); 1.318 + return mr; 1.319 + } else { 1.320 + // that entry was invalidated... let's skip it 1.321 + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); 1.322 + } 1.323 + } 1.324 + // Otherwise, we need to try again. 1.325 + } 1.326 +} 1.327 + 1.328 +bool CMRegionStack::invalidate_entries_into_cset() { 1.329 + bool result = false; 1.330 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.331 + for (int i = 0; i < _oops_do_bound; ++i) { 1.332 + MemRegion mr = _base[i]; 1.333 + if (mr.start() != NULL) { 1.334 + tmp_guarantee_CM( mr.end() != NULL, "invariant"); 1.335 + tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); 1.336 + HeapRegion* hr = g1h->heap_region_containing(mr.start()); 1.337 + tmp_guarantee_CM( hr != NULL, "invariant" ); 1.338 + if (hr->in_collection_set()) { 1.339 + // The region points into the collection set 1.340 + _base[i] = MemRegion(); 1.341 + result = true; 1.342 + } 1.343 + } else { 1.344 + // that entry was invalidated... let's skip it 1.345 + tmp_guarantee_CM( mr.end() == NULL, "invariant" ); 1.346 + } 1.347 + } 1.348 + return result; 1.349 +} 1.350 + 1.351 +template<class OopClosureClass> 1.352 +bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { 1.353 + assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after 1.354 + || SafepointSynchronize::is_at_safepoint(), 1.355 + "Drain recursion must be yield-safe."); 1.356 + bool res = true; 1.357 + debug_only(_drain_in_progress = true); 1.358 + debug_only(_drain_in_progress_yields = yield_after); 1.359 + while (!isEmpty()) { 1.360 + oop newOop = pop(); 1.361 + assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); 1.362 + assert(newOop->is_oop(), "Expected an oop"); 1.363 + assert(bm == NULL || bm->isMarked((HeapWord*)newOop), 1.364 + "only grey objects on this stack"); 1.365 + // iterate over the oops in this oop, marking and pushing 1.366 + // the ones in CMS generation. 1.367 + newOop->oop_iterate(cl); 1.368 + if (yield_after && _cm->do_yield_check()) { 1.369 + res = false; break; 1.370 + } 1.371 + } 1.372 + debug_only(_drain_in_progress = false); 1.373 + return res; 1.374 +} 1.375 + 1.376 +void CMMarkStack::oops_do(OopClosure* f) { 1.377 + if (_index == 0) return; 1.378 + assert(_oops_do_bound != -1 && _oops_do_bound <= _index, 1.379 + "Bound must be set."); 1.380 + for (int i = 0; i < _oops_do_bound; i++) { 1.381 + f->do_oop(&_base[i]); 1.382 + } 1.383 + _oops_do_bound = -1; 1.384 +} 1.385 + 1.386 +bool ConcurrentMark::not_yet_marked(oop obj) const { 1.387 + return (_g1h->is_obj_ill(obj) 1.388 + || (_g1h->is_in_permanent(obj) 1.389 + && !nextMarkBitMap()->isMarked((HeapWord*)obj))); 1.390 +} 1.391 + 1.392 +#ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away 1.393 +#pragma warning( disable:4355 ) // 'this' : used in base member initializer list 1.394 +#endif // _MSC_VER 1.395 + 1.396 +ConcurrentMark::ConcurrentMark(ReservedSpace rs, 1.397 + int max_regions) : 1.398 + _markBitMap1(rs, MinObjAlignment - 1), 1.399 + _markBitMap2(rs, MinObjAlignment - 1), 1.400 + 1.401 + _parallel_marking_threads(0), 1.402 + _sleep_factor(0.0), 1.403 + _marking_task_overhead(1.0), 1.404 + _cleanup_sleep_factor(0.0), 1.405 + _cleanup_task_overhead(1.0), 1.406 + _region_bm(max_regions, false /* in_resource_area*/), 1.407 + _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> 1.408 + CardTableModRefBS::card_shift, 1.409 + false /* in_resource_area*/), 1.410 + _prevMarkBitMap(&_markBitMap1), 1.411 + _nextMarkBitMap(&_markBitMap2), 1.412 + _at_least_one_mark_complete(false), 1.413 + 1.414 + _markStack(this), 1.415 + _regionStack(), 1.416 + // _finger set in set_non_marking_state 1.417 + 1.418 + _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), 1.419 + // _active_tasks set in set_non_marking_state 1.420 + // _tasks set inside the constructor 1.421 + _task_queues(new CMTaskQueueSet((int) _max_task_num)), 1.422 + _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), 1.423 + 1.424 + _has_overflown(false), 1.425 + _concurrent(false), 1.426 + 1.427 + // _verbose_level set below 1.428 + 1.429 + _init_times(), 1.430 + _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), 1.431 + _cleanup_times(), 1.432 + _total_counting_time(0.0), 1.433 + _total_rs_scrub_time(0.0), 1.434 + 1.435 + _parallel_workers(NULL), 1.436 + _cleanup_co_tracker(G1CLGroup) 1.437 +{ 1.438 + CMVerboseLevel verbose_level = 1.439 + (CMVerboseLevel) G1MarkingVerboseLevel; 1.440 + if (verbose_level < no_verbose) 1.441 + verbose_level = no_verbose; 1.442 + if (verbose_level > high_verbose) 1.443 + verbose_level = high_verbose; 1.444 + _verbose_level = verbose_level; 1.445 + 1.446 + if (verbose_low()) 1.447 + gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " 1.448 + "heap end = "PTR_FORMAT, _heap_start, _heap_end); 1.449 + 1.450 + _markStack.allocate(G1CMStackSize); 1.451 + _regionStack.allocate(G1CMRegionStackSize); 1.452 + 1.453 + // Create & start a ConcurrentMark thread. 1.454 + if (G1ConcMark) { 1.455 + _cmThread = new ConcurrentMarkThread(this); 1.456 + assert(cmThread() != NULL, "CM Thread should have been created"); 1.457 + assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); 1.458 + } else { 1.459 + _cmThread = NULL; 1.460 + } 1.461 + _g1h = G1CollectedHeap::heap(); 1.462 + assert(CGC_lock != NULL, "Where's the CGC_lock?"); 1.463 + assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); 1.464 + assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); 1.465 + 1.466 + SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); 1.467 + satb_qs.set_buffer_size(G1SATBLogBufferSize); 1.468 + 1.469 + int size = (int) MAX2(ParallelGCThreads, (size_t)1); 1.470 + _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); 1.471 + for (int i = 0 ; i < size; i++) { 1.472 + _par_cleanup_thread_state[i] = new ParCleanupThreadState; 1.473 + } 1.474 + 1.475 + _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); 1.476 + _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); 1.477 + 1.478 + // so that the assertion in MarkingTaskQueue::task_queue doesn't fail 1.479 + _active_tasks = _max_task_num; 1.480 + for (int i = 0; i < (int) _max_task_num; ++i) { 1.481 + CMTaskQueue* task_queue = new CMTaskQueue(); 1.482 + task_queue->initialize(); 1.483 + _task_queues->register_queue(i, task_queue); 1.484 + 1.485 + _tasks[i] = new CMTask(i, this, task_queue, _task_queues); 1.486 + _accum_task_vtime[i] = 0.0; 1.487 + } 1.488 + 1.489 + if (ParallelMarkingThreads > ParallelGCThreads) { 1.490 + vm_exit_during_initialization("Can't have more ParallelMarkingThreads " 1.491 + "than ParallelGCThreads."); 1.492 + } 1.493 + if (ParallelGCThreads == 0) { 1.494 + // if we are not running with any parallel GC threads we will not 1.495 + // spawn any marking threads either 1.496 + _parallel_marking_threads = 0; 1.497 + _sleep_factor = 0.0; 1.498 + _marking_task_overhead = 1.0; 1.499 + } else { 1.500 + if (ParallelMarkingThreads > 0) { 1.501 + // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc 1.502 + // if both are set 1.503 + 1.504 + _parallel_marking_threads = ParallelMarkingThreads; 1.505 + _sleep_factor = 0.0; 1.506 + _marking_task_overhead = 1.0; 1.507 + } else if (G1MarkingOverheadPerc > 0) { 1.508 + // we will calculate the number of parallel marking threads 1.509 + // based on a target overhead with respect to the soft real-time 1.510 + // goal 1.511 + 1.512 + double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; 1.513 + double overall_cm_overhead = 1.514 + (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; 1.515 + double cpu_ratio = 1.0 / (double) os::processor_count(); 1.516 + double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); 1.517 + double marking_task_overhead = 1.518 + overall_cm_overhead / marking_thread_num * 1.519 + (double) os::processor_count(); 1.520 + double sleep_factor = 1.521 + (1.0 - marking_task_overhead) / marking_task_overhead; 1.522 + 1.523 + _parallel_marking_threads = (size_t) marking_thread_num; 1.524 + _sleep_factor = sleep_factor; 1.525 + _marking_task_overhead = marking_task_overhead; 1.526 + } else { 1.527 + _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); 1.528 + _sleep_factor = 0.0; 1.529 + _marking_task_overhead = 1.0; 1.530 + } 1.531 + 1.532 + if (parallel_marking_threads() > 1) 1.533 + _cleanup_task_overhead = 1.0; 1.534 + else 1.535 + _cleanup_task_overhead = marking_task_overhead(); 1.536 + _cleanup_sleep_factor = 1.537 + (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); 1.538 + 1.539 +#if 0 1.540 + gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); 1.541 + gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); 1.542 + gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); 1.543 + gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); 1.544 + gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); 1.545 +#endif 1.546 + 1.547 + guarantee( parallel_marking_threads() > 0, "peace of mind" ); 1.548 + _parallel_workers = new WorkGang("Parallel Marking Threads", 1.549 + (int) parallel_marking_threads(), false, true); 1.550 + if (_parallel_workers == NULL) 1.551 + vm_exit_during_initialization("Failed necessary allocation."); 1.552 + } 1.553 + 1.554 + // so that the call below can read a sensible value 1.555 + _heap_start = (HeapWord*) rs.base(); 1.556 + set_non_marking_state(); 1.557 +} 1.558 + 1.559 +void ConcurrentMark::update_g1_committed(bool force) { 1.560 + // If concurrent marking is not in progress, then we do not need to 1.561 + // update _heap_end. This has a subtle and important 1.562 + // side-effect. Imagine that two evacuation pauses happen between 1.563 + // marking completion and remark. The first one can grow the 1.564 + // heap (hence now the finger is below the heap end). Then, the 1.565 + // second one could unnecessarily push regions on the region 1.566 + // stack. This causes the invariant that the region stack is empty 1.567 + // at the beginning of remark to be false. By ensuring that we do 1.568 + // not observe heap expansions after marking is complete, then we do 1.569 + // not have this problem. 1.570 + if (!concurrent_marking_in_progress() && !force) 1.571 + return; 1.572 + 1.573 + MemRegion committed = _g1h->g1_committed(); 1.574 + tmp_guarantee_CM( committed.start() == _heap_start, 1.575 + "start shouldn't change" ); 1.576 + HeapWord* new_end = committed.end(); 1.577 + if (new_end > _heap_end) { 1.578 + // The heap has been expanded. 1.579 + 1.580 + _heap_end = new_end; 1.581 + } 1.582 + // Notice that the heap can also shrink. However, this only happens 1.583 + // during a Full GC (at least currently) and the entire marking 1.584 + // phase will bail out and the task will not be restarted. So, let's 1.585 + // do nothing. 1.586 +} 1.587 + 1.588 +void ConcurrentMark::reset() { 1.589 + // Starting values for these two. This should be called in a STW 1.590 + // phase. CM will be notified of any future g1_committed expansions 1.591 + // will be at the end of evacuation pauses, when tasks are 1.592 + // inactive. 1.593 + MemRegion committed = _g1h->g1_committed(); 1.594 + _heap_start = committed.start(); 1.595 + _heap_end = committed.end(); 1.596 + 1.597 + guarantee( _heap_start != NULL && 1.598 + _heap_end != NULL && 1.599 + _heap_start < _heap_end, "heap bounds should look ok" ); 1.600 + 1.601 + // reset all the marking data structures and any necessary flags 1.602 + clear_marking_state(); 1.603 + 1.604 + if (verbose_low()) 1.605 + gclog_or_tty->print_cr("[global] resetting"); 1.606 + 1.607 + // We do reset all of them, since different phases will use 1.608 + // different number of active threads. So, it's easiest to have all 1.609 + // of them ready. 1.610 + for (int i = 0; i < (int) _max_task_num; ++i) 1.611 + _tasks[i]->reset(_nextMarkBitMap); 1.612 + 1.613 + // we need this to make sure that the flag is on during the evac 1.614 + // pause with initial mark piggy-backed 1.615 + set_concurrent_marking_in_progress(); 1.616 +} 1.617 + 1.618 +void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { 1.619 + guarantee( active_tasks <= _max_task_num, "we should not have more" ); 1.620 + 1.621 + _active_tasks = active_tasks; 1.622 + // Need to update the three data structures below according to the 1.623 + // number of active threads for this phase. 1.624 + _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); 1.625 + _first_overflow_barrier_sync.set_n_workers((int) active_tasks); 1.626 + _second_overflow_barrier_sync.set_n_workers((int) active_tasks); 1.627 + 1.628 + _concurrent = concurrent; 1.629 + // We propagate this to all tasks, not just the active ones. 1.630 + for (int i = 0; i < (int) _max_task_num; ++i) 1.631 + _tasks[i]->set_concurrent(concurrent); 1.632 + 1.633 + if (concurrent) { 1.634 + set_concurrent_marking_in_progress(); 1.635 + } else { 1.636 + // We currently assume that the concurrent flag has been set to 1.637 + // false before we start remark. At this point we should also be 1.638 + // in a STW phase. 1.639 + guarantee( !concurrent_marking_in_progress(), "invariant" ); 1.640 + guarantee( _finger == _heap_end, "only way to get here" ); 1.641 + update_g1_committed(true); 1.642 + } 1.643 +} 1.644 + 1.645 +void ConcurrentMark::set_non_marking_state() { 1.646 + // We set the global marking state to some default values when we're 1.647 + // not doing marking. 1.648 + clear_marking_state(); 1.649 + _active_tasks = 0; 1.650 + clear_concurrent_marking_in_progress(); 1.651 +} 1.652 + 1.653 +ConcurrentMark::~ConcurrentMark() { 1.654 + int size = (int) MAX2(ParallelGCThreads, (size_t)1); 1.655 + for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; 1.656 + FREE_C_HEAP_ARRAY(ParCleanupThreadState*, 1.657 + _par_cleanup_thread_state); 1.658 + 1.659 + for (int i = 0; i < (int) _max_task_num; ++i) { 1.660 + delete _task_queues->queue(i); 1.661 + delete _tasks[i]; 1.662 + } 1.663 + delete _task_queues; 1.664 + FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); 1.665 +} 1.666 + 1.667 +// This closure is used to mark refs into the g1 generation 1.668 +// from external roots in the CMS bit map. 1.669 +// Called at the first checkpoint. 1.670 +// 1.671 + 1.672 +#define PRINT_REACHABLE_AT_INITIAL_MARK 0 1.673 +#if PRINT_REACHABLE_AT_INITIAL_MARK 1.674 +static FILE* reachable_file = NULL; 1.675 + 1.676 +class PrintReachableClosure: public OopsInGenClosure { 1.677 + CMBitMap* _bm; 1.678 + int _level; 1.679 +public: 1.680 + PrintReachableClosure(CMBitMap* bm) : 1.681 + _bm(bm), _level(0) { 1.682 + guarantee(reachable_file != NULL, "pre-condition"); 1.683 + } 1.684 + void do_oop(oop* p) { 1.685 + oop obj = *p; 1.686 + HeapWord* obj_addr = (HeapWord*)obj; 1.687 + if (obj == NULL) return; 1.688 + fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", 1.689 + _level, p, (void*) obj, _bm->isMarked(obj_addr)); 1.690 + if (!_bm->isMarked(obj_addr)) { 1.691 + _bm->mark(obj_addr); 1.692 + _level++; 1.693 + obj->oop_iterate(this); 1.694 + _level--; 1.695 + } 1.696 + } 1.697 +}; 1.698 +#endif // PRINT_REACHABLE_AT_INITIAL_MARK 1.699 + 1.700 +#define SEND_HEAP_DUMP_TO_FILE 0 1.701 +#if SEND_HEAP_DUMP_TO_FILE 1.702 +static FILE* heap_dump_file = NULL; 1.703 +#endif // SEND_HEAP_DUMP_TO_FILE 1.704 + 1.705 +void ConcurrentMark::clearNextBitmap() { 1.706 + guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); 1.707 + 1.708 + // clear the mark bitmap (no grey objects to start with). 1.709 + // We need to do this in chunks and offer to yield in between 1.710 + // each chunk. 1.711 + HeapWord* start = _nextMarkBitMap->startWord(); 1.712 + HeapWord* end = _nextMarkBitMap->endWord(); 1.713 + HeapWord* cur = start; 1.714 + size_t chunkSize = M; 1.715 + while (cur < end) { 1.716 + HeapWord* next = cur + chunkSize; 1.717 + if (next > end) 1.718 + next = end; 1.719 + MemRegion mr(cur,next); 1.720 + _nextMarkBitMap->clearRange(mr); 1.721 + cur = next; 1.722 + do_yield_check(); 1.723 + } 1.724 +} 1.725 + 1.726 +class NoteStartOfMarkHRClosure: public HeapRegionClosure { 1.727 +public: 1.728 + bool doHeapRegion(HeapRegion* r) { 1.729 + if (!r->continuesHumongous()) { 1.730 + r->note_start_of_marking(true); 1.731 + } 1.732 + return false; 1.733 + } 1.734 +}; 1.735 + 1.736 +void ConcurrentMark::checkpointRootsInitialPre() { 1.737 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.738 + G1CollectorPolicy* g1p = g1h->g1_policy(); 1.739 + 1.740 + _has_aborted = false; 1.741 + 1.742 + // Find all the reachable objects... 1.743 +#if PRINT_REACHABLE_AT_INITIAL_MARK 1.744 + guarantee(reachable_file == NULL, "Protocol"); 1.745 + char fn_buf[100]; 1.746 + sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); 1.747 + reachable_file = fopen(fn_buf, "w"); 1.748 + // clear the mark bitmap (no grey objects to start with) 1.749 + _nextMarkBitMap->clearAll(); 1.750 + PrintReachableClosure prcl(_nextMarkBitMap); 1.751 + g1h->process_strong_roots( 1.752 + false, // fake perm gen collection 1.753 + SharedHeap::SO_AllClasses, 1.754 + &prcl, // Regular roots 1.755 + &prcl // Perm Gen Roots 1.756 + ); 1.757 + // The root iteration above "consumed" dirty cards in the perm gen. 1.758 + // Therefore, as a shortcut, we dirty all such cards. 1.759 + g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); 1.760 + fclose(reachable_file); 1.761 + reachable_file = NULL; 1.762 + // clear the mark bitmap again. 1.763 + _nextMarkBitMap->clearAll(); 1.764 + COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); 1.765 + COMPILER2_PRESENT(DerivedPointerTable::clear()); 1.766 +#endif // PRINT_REACHABLE_AT_INITIAL_MARK 1.767 + 1.768 + // Initialise marking structures. This has to be done in a STW phase. 1.769 + reset(); 1.770 +} 1.771 + 1.772 +class CMMarkRootsClosure: public OopsInGenClosure { 1.773 +private: 1.774 + ConcurrentMark* _cm; 1.775 + G1CollectedHeap* _g1h; 1.776 + bool _do_barrier; 1.777 + 1.778 +public: 1.779 + CMMarkRootsClosure(ConcurrentMark* cm, 1.780 + G1CollectedHeap* g1h, 1.781 + bool do_barrier) : _cm(cm), _g1h(g1h), 1.782 + _do_barrier(do_barrier) { } 1.783 + 1.784 + virtual void do_oop(narrowOop* p) { 1.785 + guarantee(false, "NYI"); 1.786 + } 1.787 + 1.788 + virtual void do_oop(oop* p) { 1.789 + oop thisOop = *p; 1.790 + if (thisOop != NULL) { 1.791 + assert(thisOop->is_oop() || thisOop->mark() == NULL, 1.792 + "expected an oop, possibly with mark word displaced"); 1.793 + HeapWord* addr = (HeapWord*)thisOop; 1.794 + if (_g1h->is_in_g1_reserved(addr)) { 1.795 + _cm->grayRoot(thisOop); 1.796 + } 1.797 + } 1.798 + if (_do_barrier) { 1.799 + assert(!_g1h->is_in_g1_reserved(p), 1.800 + "Should be called on external roots"); 1.801 + do_barrier(p); 1.802 + } 1.803 + } 1.804 +}; 1.805 + 1.806 +void ConcurrentMark::checkpointRootsInitialPost() { 1.807 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.808 + 1.809 + // For each region note start of marking. 1.810 + NoteStartOfMarkHRClosure startcl; 1.811 + g1h->heap_region_iterate(&startcl); 1.812 + 1.813 + // Start weak-reference discovery. 1.814 + ReferenceProcessor* rp = g1h->ref_processor(); 1.815 + rp->verify_no_references_recorded(); 1.816 + rp->enable_discovery(); // enable ("weak") refs discovery 1.817 + 1.818 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.819 + satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); 1.820 + satb_mq_set.set_active_all_threads(true); 1.821 + 1.822 + // update_g1_committed() will be called at the end of an evac pause 1.823 + // when marking is on. So, it's also called at the end of the 1.824 + // initial-mark pause to update the heap end, if the heap expands 1.825 + // during it. No need to call it here. 1.826 + 1.827 + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); 1.828 + 1.829 + size_t max_marking_threads = 1.830 + MAX2((size_t) 1, parallel_marking_threads()); 1.831 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.832 + _tasks[i]->enable_co_tracker(); 1.833 + if (i < (int) max_marking_threads) 1.834 + _tasks[i]->reset_co_tracker(marking_task_overhead()); 1.835 + else 1.836 + _tasks[i]->reset_co_tracker(0.0); 1.837 + } 1.838 +} 1.839 + 1.840 +// Checkpoint the roots into this generation from outside 1.841 +// this generation. [Note this initial checkpoint need only 1.842 +// be approximate -- we'll do a catch up phase subsequently.] 1.843 +void ConcurrentMark::checkpointRootsInitial() { 1.844 + assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); 1.845 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.846 + 1.847 + double start = os::elapsedTime(); 1.848 + GCOverheadReporter::recordSTWStart(start); 1.849 + 1.850 + // If there has not been a GC[n-1] since last GC[n] cycle completed, 1.851 + // precede our marking with a collection of all 1.852 + // younger generations to keep floating garbage to a minimum. 1.853 + // YSR: we won't do this for now -- it's an optimization to be 1.854 + // done post-beta. 1.855 + 1.856 + // YSR: ignoring weak refs for now; will do at bug fixing stage 1.857 + // EVM: assert(discoveredRefsAreClear()); 1.858 + 1.859 + 1.860 + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1.861 + g1p->record_concurrent_mark_init_start(); 1.862 + checkpointRootsInitialPre(); 1.863 + 1.864 + // YSR: when concurrent precleaning is in place, we'll 1.865 + // need to clear the cached card table here 1.866 + 1.867 + ResourceMark rm; 1.868 + HandleMark hm; 1.869 + 1.870 + g1h->ensure_parsability(false); 1.871 + g1h->perm_gen()->save_marks(); 1.872 + 1.873 + CMMarkRootsClosure notOlder(this, g1h, false); 1.874 + CMMarkRootsClosure older(this, g1h, true); 1.875 + 1.876 + g1h->set_marking_started(); 1.877 + g1h->rem_set()->prepare_for_younger_refs_iterate(false); 1.878 + 1.879 + g1h->process_strong_roots(false, // fake perm gen collection 1.880 + SharedHeap::SO_AllClasses, 1.881 + ¬Older, // Regular roots 1.882 + &older // Perm Gen Roots 1.883 + ); 1.884 + checkpointRootsInitialPost(); 1.885 + 1.886 + // Statistics. 1.887 + double end = os::elapsedTime(); 1.888 + _init_times.add((end - start) * 1000.0); 1.889 + GCOverheadReporter::recordSTWEnd(end); 1.890 + 1.891 + g1p->record_concurrent_mark_init_end(); 1.892 +} 1.893 + 1.894 +/* 1.895 + Notice that in the next two methods, we actually leave the STS 1.896 + during the barrier sync and join it immediately afterwards. If we 1.897 + do not do this, this then the following deadlock can occur: one 1.898 + thread could be in the barrier sync code, waiting for the other 1.899 + thread to also sync up, whereas another one could be trying to 1.900 + yield, while also waiting for the other threads to sync up too. 1.901 + 1.902 + Because the thread that does the sync barrier has left the STS, it 1.903 + is possible to be suspended for a Full GC or an evacuation pause 1.904 + could occur. This is actually safe, since the entering the sync 1.905 + barrier is one of the last things do_marking_step() does, and it 1.906 + doesn't manipulate any data structures afterwards. 1.907 +*/ 1.908 + 1.909 +void ConcurrentMark::enter_first_sync_barrier(int task_num) { 1.910 + if (verbose_low()) 1.911 + gclog_or_tty->print_cr("[%d] entering first barrier", task_num); 1.912 + 1.913 + ConcurrentGCThread::stsLeave(); 1.914 + _first_overflow_barrier_sync.enter(); 1.915 + ConcurrentGCThread::stsJoin(); 1.916 + // at this point everyone should have synced up and not be doing any 1.917 + // more work 1.918 + 1.919 + if (verbose_low()) 1.920 + gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); 1.921 + 1.922 + // let task 0 do this 1.923 + if (task_num == 0) { 1.924 + // task 0 is responsible for clearing the global data structures 1.925 + clear_marking_state(); 1.926 + 1.927 + if (PrintGC) { 1.928 + gclog_or_tty->date_stamp(PrintGCDateStamps); 1.929 + gclog_or_tty->stamp(PrintGCTimeStamps); 1.930 + gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); 1.931 + } 1.932 + } 1.933 + 1.934 + // after this, each task should reset its own data structures then 1.935 + // then go into the second barrier 1.936 +} 1.937 + 1.938 +void ConcurrentMark::enter_second_sync_barrier(int task_num) { 1.939 + if (verbose_low()) 1.940 + gclog_or_tty->print_cr("[%d] entering second barrier", task_num); 1.941 + 1.942 + ConcurrentGCThread::stsLeave(); 1.943 + _second_overflow_barrier_sync.enter(); 1.944 + ConcurrentGCThread::stsJoin(); 1.945 + // at this point everything should be re-initialised and ready to go 1.946 + 1.947 + if (verbose_low()) 1.948 + gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); 1.949 +} 1.950 + 1.951 +void ConcurrentMark::grayRoot(oop p) { 1.952 + HeapWord* addr = (HeapWord*) p; 1.953 + // We can't really check against _heap_start and _heap_end, since it 1.954 + // is possible during an evacuation pause with piggy-backed 1.955 + // initial-mark that the committed space is expanded during the 1.956 + // pause without CM observing this change. So the assertions below 1.957 + // is a bit conservative; but better than nothing. 1.958 + tmp_guarantee_CM( _g1h->g1_committed().contains(addr), 1.959 + "address should be within the heap bounds" ); 1.960 + 1.961 + if (!_nextMarkBitMap->isMarked(addr)) 1.962 + _nextMarkBitMap->parMark(addr); 1.963 +} 1.964 + 1.965 +void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { 1.966 + // The objects on the region have already been marked "in bulk" by 1.967 + // the caller. We only need to decide whether to push the region on 1.968 + // the region stack or not. 1.969 + 1.970 + if (!concurrent_marking_in_progress() || !_should_gray_objects) 1.971 + // We're done with marking and waiting for remark. We do not need to 1.972 + // push anything else on the region stack. 1.973 + return; 1.974 + 1.975 + HeapWord* finger = _finger; 1.976 + 1.977 + if (verbose_low()) 1.978 + gclog_or_tty->print_cr("[global] attempting to push " 1.979 + "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " 1.980 + PTR_FORMAT, mr.start(), mr.end(), finger); 1.981 + 1.982 + if (mr.start() < finger) { 1.983 + // The finger is always heap region aligned and it is not possible 1.984 + // for mr to span heap regions. 1.985 + tmp_guarantee_CM( mr.end() <= finger, "invariant" ); 1.986 + 1.987 + tmp_guarantee_CM( mr.start() <= mr.end() && 1.988 + _heap_start <= mr.start() && 1.989 + mr.end() <= _heap_end, 1.990 + "region boundaries should fall within the committed space" ); 1.991 + if (verbose_low()) 1.992 + gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " 1.993 + "below the finger, pushing it", 1.994 + mr.start(), mr.end()); 1.995 + 1.996 + if (!region_stack_push(mr)) { 1.997 + if (verbose_low()) 1.998 + gclog_or_tty->print_cr("[global] region stack has overflown."); 1.999 + } 1.1000 + } 1.1001 +} 1.1002 + 1.1003 +void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { 1.1004 + // The object is not marked by the caller. We need to at least mark 1.1005 + // it and maybe push in on the stack. 1.1006 + 1.1007 + HeapWord* addr = (HeapWord*)p; 1.1008 + if (!_nextMarkBitMap->isMarked(addr)) { 1.1009 + // We definitely need to mark it, irrespective whether we bail out 1.1010 + // because we're done with marking. 1.1011 + if (_nextMarkBitMap->parMark(addr)) { 1.1012 + if (!concurrent_marking_in_progress() || !_should_gray_objects) 1.1013 + // If we're done with concurrent marking and we're waiting for 1.1014 + // remark, then we're not pushing anything on the stack. 1.1015 + return; 1.1016 + 1.1017 + // No OrderAccess:store_load() is needed. It is implicit in the 1.1018 + // CAS done in parMark(addr) above 1.1019 + HeapWord* finger = _finger; 1.1020 + 1.1021 + if (addr < finger) { 1.1022 + if (!mark_stack_push(oop(addr))) { 1.1023 + if (verbose_low()) 1.1024 + gclog_or_tty->print_cr("[global] global stack overflow " 1.1025 + "during parMark"); 1.1026 + } 1.1027 + } 1.1028 + } 1.1029 + } 1.1030 +} 1.1031 + 1.1032 +class CMConcurrentMarkingTask: public AbstractGangTask { 1.1033 +private: 1.1034 + ConcurrentMark* _cm; 1.1035 + ConcurrentMarkThread* _cmt; 1.1036 + 1.1037 +public: 1.1038 + void work(int worker_i) { 1.1039 + guarantee( Thread::current()->is_ConcurrentGC_thread(), 1.1040 + "this should only be done by a conc GC thread" ); 1.1041 + 1.1042 + double start_vtime = os::elapsedVTime(); 1.1043 + 1.1044 + ConcurrentGCThread::stsJoin(); 1.1045 + 1.1046 + guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); 1.1047 + CMTask* the_task = _cm->task(worker_i); 1.1048 + the_task->start_co_tracker(); 1.1049 + the_task->record_start_time(); 1.1050 + if (!_cm->has_aborted()) { 1.1051 + do { 1.1052 + double start_vtime_sec = os::elapsedVTime(); 1.1053 + double start_time_sec = os::elapsedTime(); 1.1054 + the_task->do_marking_step(10.0); 1.1055 + double end_time_sec = os::elapsedTime(); 1.1056 + double end_vtime_sec = os::elapsedVTime(); 1.1057 + double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; 1.1058 + double elapsed_time_sec = end_time_sec - start_time_sec; 1.1059 + _cm->clear_has_overflown(); 1.1060 + 1.1061 + bool ret = _cm->do_yield_check(worker_i); 1.1062 + 1.1063 + jlong sleep_time_ms; 1.1064 + if (!_cm->has_aborted() && the_task->has_aborted()) { 1.1065 + sleep_time_ms = 1.1066 + (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); 1.1067 + ConcurrentGCThread::stsLeave(); 1.1068 + os::sleep(Thread::current(), sleep_time_ms, false); 1.1069 + ConcurrentGCThread::stsJoin(); 1.1070 + } 1.1071 + double end_time2_sec = os::elapsedTime(); 1.1072 + double elapsed_time2_sec = end_time2_sec - start_time_sec; 1.1073 + 1.1074 + the_task->update_co_tracker(); 1.1075 + 1.1076 +#if 0 1.1077 + gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " 1.1078 + "overhead %1.4lf", 1.1079 + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1.1080 + the_task->conc_overhead(os::elapsedTime()) * 8.0); 1.1081 + gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", 1.1082 + elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); 1.1083 +#endif 1.1084 + } while (!_cm->has_aborted() && the_task->has_aborted()); 1.1085 + } 1.1086 + the_task->record_end_time(); 1.1087 + guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); 1.1088 + 1.1089 + ConcurrentGCThread::stsLeave(); 1.1090 + 1.1091 + double end_vtime = os::elapsedVTime(); 1.1092 + the_task->update_co_tracker(true); 1.1093 + _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); 1.1094 + } 1.1095 + 1.1096 + CMConcurrentMarkingTask(ConcurrentMark* cm, 1.1097 + ConcurrentMarkThread* cmt) : 1.1098 + AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } 1.1099 + 1.1100 + ~CMConcurrentMarkingTask() { } 1.1101 +}; 1.1102 + 1.1103 +void ConcurrentMark::markFromRoots() { 1.1104 + // we might be tempted to assert that: 1.1105 + // assert(asynch == !SafepointSynchronize::is_at_safepoint(), 1.1106 + // "inconsistent argument?"); 1.1107 + // However that wouldn't be right, because it's possible that 1.1108 + // a safepoint is indeed in progress as a younger generation 1.1109 + // stop-the-world GC happens even as we mark in this generation. 1.1110 + 1.1111 + _restart_for_overflow = false; 1.1112 + 1.1113 + set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); 1.1114 + 1.1115 + CMConcurrentMarkingTask markingTask(this, cmThread()); 1.1116 + if (parallel_marking_threads() > 0) 1.1117 + _parallel_workers->run_task(&markingTask); 1.1118 + else 1.1119 + markingTask.work(0); 1.1120 + print_stats(); 1.1121 +} 1.1122 + 1.1123 +void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { 1.1124 + // world is stopped at this checkpoint 1.1125 + assert(SafepointSynchronize::is_at_safepoint(), 1.1126 + "world should be stopped"); 1.1127 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.1128 + 1.1129 + // If a full collection has happened, we shouldn't do this. 1.1130 + if (has_aborted()) { 1.1131 + g1h->set_marking_complete(); // So bitmap clearing isn't confused 1.1132 + return; 1.1133 + } 1.1134 + 1.1135 + G1CollectorPolicy* g1p = g1h->g1_policy(); 1.1136 + g1p->record_concurrent_mark_remark_start(); 1.1137 + 1.1138 + double start = os::elapsedTime(); 1.1139 + GCOverheadReporter::recordSTWStart(start); 1.1140 + 1.1141 + checkpointRootsFinalWork(); 1.1142 + 1.1143 + double mark_work_end = os::elapsedTime(); 1.1144 + 1.1145 + weakRefsWork(clear_all_soft_refs); 1.1146 + 1.1147 + if (has_overflown()) { 1.1148 + // Oops. We overflowed. Restart concurrent marking. 1.1149 + _restart_for_overflow = true; 1.1150 + // Clear the flag. We do not need it any more. 1.1151 + clear_has_overflown(); 1.1152 + if (G1TraceMarkStackOverflow) 1.1153 + gclog_or_tty->print_cr("\nRemark led to restart for overflow."); 1.1154 + } else { 1.1155 + // We're done with marking. 1.1156 + JavaThread::satb_mark_queue_set().set_active_all_threads(false); 1.1157 + } 1.1158 + 1.1159 +#if VERIFY_OBJS_PROCESSED 1.1160 + _scan_obj_cl.objs_processed = 0; 1.1161 + ThreadLocalObjQueue::objs_enqueued = 0; 1.1162 +#endif 1.1163 + 1.1164 + // Statistics 1.1165 + double now = os::elapsedTime(); 1.1166 + _remark_mark_times.add((mark_work_end - start) * 1000.0); 1.1167 + _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); 1.1168 + _remark_times.add((now - start) * 1000.0); 1.1169 + 1.1170 + GCOverheadReporter::recordSTWEnd(now); 1.1171 + for (int i = 0; i < (int)_max_task_num; ++i) 1.1172 + _tasks[i]->disable_co_tracker(); 1.1173 + _cleanup_co_tracker.enable(); 1.1174 + _cleanup_co_tracker.reset(cleanup_task_overhead()); 1.1175 + g1p->record_concurrent_mark_remark_end(); 1.1176 +} 1.1177 + 1.1178 + 1.1179 +#define CARD_BM_TEST_MODE 0 1.1180 + 1.1181 +class CalcLiveObjectsClosure: public HeapRegionClosure { 1.1182 + 1.1183 + CMBitMapRO* _bm; 1.1184 + ConcurrentMark* _cm; 1.1185 + COTracker* _co_tracker; 1.1186 + bool _changed; 1.1187 + bool _yield; 1.1188 + size_t _words_done; 1.1189 + size_t _tot_live; 1.1190 + size_t _tot_used; 1.1191 + size_t _regions_done; 1.1192 + double _start_vtime_sec; 1.1193 + 1.1194 + BitMap* _region_bm; 1.1195 + BitMap* _card_bm; 1.1196 + intptr_t _bottom_card_num; 1.1197 + bool _final; 1.1198 + 1.1199 + void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { 1.1200 + for (intptr_t i = start_card_num; i <= last_card_num; i++) { 1.1201 +#if CARD_BM_TEST_MODE 1.1202 + guarantee(_card_bm->at(i - _bottom_card_num), 1.1203 + "Should already be set."); 1.1204 +#else 1.1205 + _card_bm->par_at_put(i - _bottom_card_num, 1); 1.1206 +#endif 1.1207 + } 1.1208 + } 1.1209 + 1.1210 +public: 1.1211 + CalcLiveObjectsClosure(bool final, 1.1212 + CMBitMapRO *bm, ConcurrentMark *cm, 1.1213 + BitMap* region_bm, BitMap* card_bm, 1.1214 + COTracker* co_tracker) : 1.1215 + _bm(bm), _cm(cm), _changed(false), _yield(true), 1.1216 + _words_done(0), _tot_live(0), _tot_used(0), 1.1217 + _region_bm(region_bm), _card_bm(card_bm), 1.1218 + _final(final), _co_tracker(co_tracker), 1.1219 + _regions_done(0), _start_vtime_sec(0.0) 1.1220 + { 1.1221 + _bottom_card_num = 1.1222 + intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> 1.1223 + CardTableModRefBS::card_shift); 1.1224 + } 1.1225 + 1.1226 + bool doHeapRegion(HeapRegion* hr) { 1.1227 + if (_co_tracker != NULL) 1.1228 + _co_tracker->update(); 1.1229 + 1.1230 + if (!_final && _regions_done == 0) 1.1231 + _start_vtime_sec = os::elapsedVTime(); 1.1232 + 1.1233 + if (hr->continuesHumongous()) return false; 1.1234 + 1.1235 + HeapWord* nextTop = hr->next_top_at_mark_start(); 1.1236 + HeapWord* start = hr->top_at_conc_mark_count(); 1.1237 + assert(hr->bottom() <= start && start <= hr->end() && 1.1238 + hr->bottom() <= nextTop && nextTop <= hr->end() && 1.1239 + start <= nextTop, 1.1240 + "Preconditions."); 1.1241 + // Otherwise, record the number of word's we'll examine. 1.1242 + size_t words_done = (nextTop - start); 1.1243 + // Find the first marked object at or after "start". 1.1244 + start = _bm->getNextMarkedWordAddress(start, nextTop); 1.1245 + size_t marked_bytes = 0; 1.1246 + 1.1247 + // Below, the term "card num" means the result of shifting an address 1.1248 + // by the card shift -- address 0 corresponds to card number 0. One 1.1249 + // must subtract the card num of the bottom of the heap to obtain a 1.1250 + // card table index. 1.1251 + // The first card num of the sequence of live cards currently being 1.1252 + // constructed. -1 ==> no sequence. 1.1253 + intptr_t start_card_num = -1; 1.1254 + // The last card num of the sequence of live cards currently being 1.1255 + // constructed. -1 ==> no sequence. 1.1256 + intptr_t last_card_num = -1; 1.1257 + 1.1258 + while (start < nextTop) { 1.1259 + if (_yield && _cm->do_yield_check()) { 1.1260 + // We yielded. It might be for a full collection, in which case 1.1261 + // all bets are off; terminate the traversal. 1.1262 + if (_cm->has_aborted()) { 1.1263 + _changed = false; 1.1264 + return true; 1.1265 + } else { 1.1266 + // Otherwise, it might be a collection pause, and the region 1.1267 + // we're looking at might be in the collection set. We'll 1.1268 + // abandon this region. 1.1269 + return false; 1.1270 + } 1.1271 + } 1.1272 + oop obj = oop(start); 1.1273 + int obj_sz = obj->size(); 1.1274 + // The card num of the start of the current object. 1.1275 + intptr_t obj_card_num = 1.1276 + intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); 1.1277 + 1.1278 + HeapWord* obj_last = start + obj_sz - 1; 1.1279 + intptr_t obj_last_card_num = 1.1280 + intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); 1.1281 + 1.1282 + if (obj_card_num != last_card_num) { 1.1283 + if (start_card_num == -1) { 1.1284 + assert(last_card_num == -1, "Both or neither."); 1.1285 + start_card_num = obj_card_num; 1.1286 + } else { 1.1287 + assert(last_card_num != -1, "Both or neither."); 1.1288 + assert(obj_card_num >= last_card_num, "Inv"); 1.1289 + if ((obj_card_num - last_card_num) > 1) { 1.1290 + // Mark the last run, and start a new one. 1.1291 + mark_card_num_range(start_card_num, last_card_num); 1.1292 + start_card_num = obj_card_num; 1.1293 + } 1.1294 + } 1.1295 +#if CARD_BM_TEST_MODE 1.1296 + /* 1.1297 + gclog_or_tty->print_cr("Setting bits from %d/%d.", 1.1298 + obj_card_num - _bottom_card_num, 1.1299 + obj_last_card_num - _bottom_card_num); 1.1300 + */ 1.1301 + for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { 1.1302 + _card_bm->par_at_put(j - _bottom_card_num, 1); 1.1303 + } 1.1304 +#endif 1.1305 + } 1.1306 + // In any case, we set the last card num. 1.1307 + last_card_num = obj_last_card_num; 1.1308 + 1.1309 + marked_bytes += obj_sz * HeapWordSize; 1.1310 + // Find the next marked object after this one. 1.1311 + start = _bm->getNextMarkedWordAddress(start + 1, nextTop); 1.1312 + _changed = true; 1.1313 + } 1.1314 + // Handle the last range, if any. 1.1315 + if (start_card_num != -1) 1.1316 + mark_card_num_range(start_card_num, last_card_num); 1.1317 + if (_final) { 1.1318 + // Mark the allocated-since-marking portion... 1.1319 + HeapWord* tp = hr->top(); 1.1320 + if (nextTop < tp) { 1.1321 + start_card_num = 1.1322 + intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); 1.1323 + last_card_num = 1.1324 + intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); 1.1325 + mark_card_num_range(start_card_num, last_card_num); 1.1326 + // This definitely means the region has live objects. 1.1327 + _region_bm->par_at_put(hr->hrs_index(), 1); 1.1328 + } 1.1329 + } 1.1330 + 1.1331 + hr->add_to_marked_bytes(marked_bytes); 1.1332 + // Update the live region bitmap. 1.1333 + if (marked_bytes > 0) { 1.1334 + _region_bm->par_at_put(hr->hrs_index(), 1); 1.1335 + } 1.1336 + hr->set_top_at_conc_mark_count(nextTop); 1.1337 + _tot_live += hr->next_live_bytes(); 1.1338 + _tot_used += hr->used(); 1.1339 + _words_done = words_done; 1.1340 + 1.1341 + if (!_final) { 1.1342 + ++_regions_done; 1.1343 + if (_regions_done % 10 == 0) { 1.1344 + double end_vtime_sec = os::elapsedVTime(); 1.1345 + double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; 1.1346 + if (elapsed_vtime_sec > (10.0 / 1000.0)) { 1.1347 + jlong sleep_time_ms = 1.1348 + (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); 1.1349 +#if 0 1.1350 + gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " 1.1351 + "overhead %1.4lf", 1.1352 + elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, 1.1353 + _co_tracker->concOverhead(os::elapsedTime())); 1.1354 +#endif 1.1355 + os::sleep(Thread::current(), sleep_time_ms, false); 1.1356 + _start_vtime_sec = end_vtime_sec; 1.1357 + } 1.1358 + } 1.1359 + } 1.1360 + 1.1361 + return false; 1.1362 + } 1.1363 + 1.1364 + bool changed() { return _changed; } 1.1365 + void reset() { _changed = false; _words_done = 0; } 1.1366 + void no_yield() { _yield = false; } 1.1367 + size_t words_done() { return _words_done; } 1.1368 + size_t tot_live() { return _tot_live; } 1.1369 + size_t tot_used() { return _tot_used; } 1.1370 +}; 1.1371 + 1.1372 + 1.1373 +void ConcurrentMark::calcDesiredRegions() { 1.1374 + guarantee( _cleanup_co_tracker.enabled(), "invariant" ); 1.1375 + _cleanup_co_tracker.start(); 1.1376 + 1.1377 + _region_bm.clear(); 1.1378 + _card_bm.clear(); 1.1379 + CalcLiveObjectsClosure calccl(false /*final*/, 1.1380 + nextMarkBitMap(), this, 1.1381 + &_region_bm, &_card_bm, 1.1382 + &_cleanup_co_tracker); 1.1383 + G1CollectedHeap *g1h = G1CollectedHeap::heap(); 1.1384 + g1h->heap_region_iterate(&calccl); 1.1385 + 1.1386 + do { 1.1387 + calccl.reset(); 1.1388 + g1h->heap_region_iterate(&calccl); 1.1389 + } while (calccl.changed()); 1.1390 + 1.1391 + _cleanup_co_tracker.update(true); 1.1392 +} 1.1393 + 1.1394 +class G1ParFinalCountTask: public AbstractGangTask { 1.1395 +protected: 1.1396 + G1CollectedHeap* _g1h; 1.1397 + CMBitMap* _bm; 1.1398 + size_t _n_workers; 1.1399 + size_t *_live_bytes; 1.1400 + size_t *_used_bytes; 1.1401 + BitMap* _region_bm; 1.1402 + BitMap* _card_bm; 1.1403 +public: 1.1404 + G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, 1.1405 + BitMap* region_bm, BitMap* card_bm) : 1.1406 + AbstractGangTask("G1 final counting"), _g1h(g1h), 1.1407 + _bm(bm), _region_bm(region_bm), _card_bm(card_bm) 1.1408 + { 1.1409 + if (ParallelGCThreads > 0) 1.1410 + _n_workers = _g1h->workers()->total_workers(); 1.1411 + else 1.1412 + _n_workers = 1; 1.1413 + _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1.1414 + _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); 1.1415 + } 1.1416 + 1.1417 + ~G1ParFinalCountTask() { 1.1418 + FREE_C_HEAP_ARRAY(size_t, _live_bytes); 1.1419 + FREE_C_HEAP_ARRAY(size_t, _used_bytes); 1.1420 + } 1.1421 + 1.1422 + void work(int i) { 1.1423 + CalcLiveObjectsClosure calccl(true /*final*/, 1.1424 + _bm, _g1h->concurrent_mark(), 1.1425 + _region_bm, _card_bm, 1.1426 + NULL /* CO tracker */); 1.1427 + calccl.no_yield(); 1.1428 + if (ParallelGCThreads > 0) { 1.1429 + _g1h->heap_region_par_iterate_chunked(&calccl, i, 1); 1.1430 + } else { 1.1431 + _g1h->heap_region_iterate(&calccl); 1.1432 + } 1.1433 + assert(calccl.complete(), "Shouldn't have yielded!"); 1.1434 + 1.1435 + guarantee( (size_t)i < _n_workers, "invariant" ); 1.1436 + _live_bytes[i] = calccl.tot_live(); 1.1437 + _used_bytes[i] = calccl.tot_used(); 1.1438 + } 1.1439 + size_t live_bytes() { 1.1440 + size_t live_bytes = 0; 1.1441 + for (size_t i = 0; i < _n_workers; ++i) 1.1442 + live_bytes += _live_bytes[i]; 1.1443 + return live_bytes; 1.1444 + } 1.1445 + size_t used_bytes() { 1.1446 + size_t used_bytes = 0; 1.1447 + for (size_t i = 0; i < _n_workers; ++i) 1.1448 + used_bytes += _used_bytes[i]; 1.1449 + return used_bytes; 1.1450 + } 1.1451 +}; 1.1452 + 1.1453 +class G1ParNoteEndTask; 1.1454 + 1.1455 +class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { 1.1456 + G1CollectedHeap* _g1; 1.1457 + int _worker_num; 1.1458 + size_t _max_live_bytes; 1.1459 + size_t _regions_claimed; 1.1460 + size_t _freed_bytes; 1.1461 + size_t _cleared_h_regions; 1.1462 + size_t _freed_regions; 1.1463 + UncleanRegionList* _unclean_region_list; 1.1464 + double _claimed_region_time; 1.1465 + double _max_region_time; 1.1466 + 1.1467 +public: 1.1468 + G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1.1469 + UncleanRegionList* list, 1.1470 + int worker_num); 1.1471 + size_t freed_bytes() { return _freed_bytes; } 1.1472 + size_t cleared_h_regions() { return _cleared_h_regions; } 1.1473 + size_t freed_regions() { return _freed_regions; } 1.1474 + UncleanRegionList* unclean_region_list() { 1.1475 + return _unclean_region_list; 1.1476 + } 1.1477 + 1.1478 + bool doHeapRegion(HeapRegion *r); 1.1479 + 1.1480 + size_t max_live_bytes() { return _max_live_bytes; } 1.1481 + size_t regions_claimed() { return _regions_claimed; } 1.1482 + double claimed_region_time_sec() { return _claimed_region_time; } 1.1483 + double max_region_time_sec() { return _max_region_time; } 1.1484 +}; 1.1485 + 1.1486 +class G1ParNoteEndTask: public AbstractGangTask { 1.1487 + friend class G1NoteEndOfConcMarkClosure; 1.1488 +protected: 1.1489 + G1CollectedHeap* _g1h; 1.1490 + size_t _max_live_bytes; 1.1491 + size_t _freed_bytes; 1.1492 + ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; 1.1493 +public: 1.1494 + G1ParNoteEndTask(G1CollectedHeap* g1h, 1.1495 + ConcurrentMark::ParCleanupThreadState** 1.1496 + par_cleanup_thread_state) : 1.1497 + AbstractGangTask("G1 note end"), _g1h(g1h), 1.1498 + _max_live_bytes(0), _freed_bytes(0), 1.1499 + _par_cleanup_thread_state(par_cleanup_thread_state) 1.1500 + {} 1.1501 + 1.1502 + void work(int i) { 1.1503 + double start = os::elapsedTime(); 1.1504 + G1NoteEndOfConcMarkClosure g1_note_end(_g1h, 1.1505 + &_par_cleanup_thread_state[i]->list, 1.1506 + i); 1.1507 + if (ParallelGCThreads > 0) { 1.1508 + _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, 2); 1.1509 + } else { 1.1510 + _g1h->heap_region_iterate(&g1_note_end); 1.1511 + } 1.1512 + assert(g1_note_end.complete(), "Shouldn't have yielded!"); 1.1513 + 1.1514 + // Now finish up freeing the current thread's regions. 1.1515 + _g1h->finish_free_region_work(g1_note_end.freed_bytes(), 1.1516 + g1_note_end.cleared_h_regions(), 1.1517 + 0, NULL); 1.1518 + { 1.1519 + MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); 1.1520 + _max_live_bytes += g1_note_end.max_live_bytes(); 1.1521 + _freed_bytes += g1_note_end.freed_bytes(); 1.1522 + } 1.1523 + double end = os::elapsedTime(); 1.1524 + if (G1PrintParCleanupStats) { 1.1525 + gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " 1.1526 + "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", 1.1527 + i, start, end, (end-start)*1000.0, 1.1528 + g1_note_end.regions_claimed(), 1.1529 + g1_note_end.claimed_region_time_sec()*1000.0, 1.1530 + g1_note_end.max_region_time_sec()*1000.0); 1.1531 + } 1.1532 + } 1.1533 + size_t max_live_bytes() { return _max_live_bytes; } 1.1534 + size_t freed_bytes() { return _freed_bytes; } 1.1535 +}; 1.1536 + 1.1537 +class G1ParScrubRemSetTask: public AbstractGangTask { 1.1538 +protected: 1.1539 + G1RemSet* _g1rs; 1.1540 + BitMap* _region_bm; 1.1541 + BitMap* _card_bm; 1.1542 +public: 1.1543 + G1ParScrubRemSetTask(G1CollectedHeap* g1h, 1.1544 + BitMap* region_bm, BitMap* card_bm) : 1.1545 + AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), 1.1546 + _region_bm(region_bm), _card_bm(card_bm) 1.1547 + {} 1.1548 + 1.1549 + void work(int i) { 1.1550 + if (ParallelGCThreads > 0) { 1.1551 + _g1rs->scrub_par(_region_bm, _card_bm, i, 3); 1.1552 + } else { 1.1553 + _g1rs->scrub(_region_bm, _card_bm); 1.1554 + } 1.1555 + } 1.1556 + 1.1557 +}; 1.1558 + 1.1559 +G1NoteEndOfConcMarkClosure:: 1.1560 +G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, 1.1561 + UncleanRegionList* list, 1.1562 + int worker_num) 1.1563 + : _g1(g1), _worker_num(worker_num), 1.1564 + _max_live_bytes(0), _regions_claimed(0), 1.1565 + _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), 1.1566 + _claimed_region_time(0.0), _max_region_time(0.0), 1.1567 + _unclean_region_list(list) 1.1568 +{} 1.1569 + 1.1570 +bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { 1.1571 + // We use a claim value of zero here because all regions 1.1572 + // were claimed with value 1 in the FinalCount task. 1.1573 + r->reset_gc_time_stamp(); 1.1574 + if (!r->continuesHumongous()) { 1.1575 + double start = os::elapsedTime(); 1.1576 + _regions_claimed++; 1.1577 + r->note_end_of_marking(); 1.1578 + _max_live_bytes += r->max_live_bytes(); 1.1579 + _g1->free_region_if_totally_empty_work(r, 1.1580 + _freed_bytes, 1.1581 + _cleared_h_regions, 1.1582 + _freed_regions, 1.1583 + _unclean_region_list, 1.1584 + true /*par*/); 1.1585 + double region_time = (os::elapsedTime() - start); 1.1586 + _claimed_region_time += region_time; 1.1587 + if (region_time > _max_region_time) _max_region_time = region_time; 1.1588 + } 1.1589 + return false; 1.1590 +} 1.1591 + 1.1592 +void ConcurrentMark::cleanup() { 1.1593 + // world is stopped at this checkpoint 1.1594 + assert(SafepointSynchronize::is_at_safepoint(), 1.1595 + "world should be stopped"); 1.1596 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.1597 + 1.1598 + // If a full collection has happened, we shouldn't do this. 1.1599 + if (has_aborted()) { 1.1600 + g1h->set_marking_complete(); // So bitmap clearing isn't confused 1.1601 + return; 1.1602 + } 1.1603 + 1.1604 + _cleanup_co_tracker.disable(); 1.1605 + 1.1606 + G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); 1.1607 + g1p->record_concurrent_mark_cleanup_start(); 1.1608 + 1.1609 + double start = os::elapsedTime(); 1.1610 + GCOverheadReporter::recordSTWStart(start); 1.1611 + 1.1612 + // Do counting once more with the world stopped for good measure. 1.1613 + G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), 1.1614 + &_region_bm, &_card_bm); 1.1615 + if (ParallelGCThreads > 0) { 1.1616 + int n_workers = g1h->workers()->total_workers(); 1.1617 + g1h->set_par_threads(n_workers); 1.1618 + g1h->workers()->run_task(&g1_par_count_task); 1.1619 + g1h->set_par_threads(0); 1.1620 + } else { 1.1621 + g1_par_count_task.work(0); 1.1622 + } 1.1623 + 1.1624 + size_t known_garbage_bytes = 1.1625 + g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); 1.1626 +#if 0 1.1627 + gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", 1.1628 + (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), 1.1629 + (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), 1.1630 + (double) known_garbage_bytes / (double) (1024 * 1024)); 1.1631 +#endif // 0 1.1632 + g1p->set_known_garbage_bytes(known_garbage_bytes); 1.1633 + 1.1634 + size_t start_used_bytes = g1h->used(); 1.1635 + _at_least_one_mark_complete = true; 1.1636 + g1h->set_marking_complete(); 1.1637 + 1.1638 + double count_end = os::elapsedTime(); 1.1639 + double this_final_counting_time = (count_end - start); 1.1640 + if (G1PrintParCleanupStats) { 1.1641 + gclog_or_tty->print_cr("Cleanup:"); 1.1642 + gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", 1.1643 + this_final_counting_time*1000.0); 1.1644 + } 1.1645 + _total_counting_time += this_final_counting_time; 1.1646 + 1.1647 + // Install newly created mark bitMap as "prev". 1.1648 + swapMarkBitMaps(); 1.1649 + 1.1650 + g1h->reset_gc_time_stamp(); 1.1651 + 1.1652 + // Note end of marking in all heap regions. 1.1653 + double note_end_start = os::elapsedTime(); 1.1654 + G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); 1.1655 + if (ParallelGCThreads > 0) { 1.1656 + int n_workers = g1h->workers()->total_workers(); 1.1657 + g1h->set_par_threads(n_workers); 1.1658 + g1h->workers()->run_task(&g1_par_note_end_task); 1.1659 + g1h->set_par_threads(0); 1.1660 + } else { 1.1661 + g1_par_note_end_task.work(0); 1.1662 + } 1.1663 + g1h->set_unclean_regions_coming(true); 1.1664 + double note_end_end = os::elapsedTime(); 1.1665 + // Tell the mutators that there might be unclean regions coming... 1.1666 + if (G1PrintParCleanupStats) { 1.1667 + gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", 1.1668 + (note_end_end - note_end_start)*1000.0); 1.1669 + } 1.1670 + 1.1671 + // Now we "scrub" remembered sets. Note that we must do this before the 1.1672 + // call below, since it affects the metric by which we sort the heap 1.1673 + // regions. 1.1674 + if (G1ScrubRemSets) { 1.1675 + double rs_scrub_start = os::elapsedTime(); 1.1676 + G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); 1.1677 + if (ParallelGCThreads > 0) { 1.1678 + int n_workers = g1h->workers()->total_workers(); 1.1679 + g1h->set_par_threads(n_workers); 1.1680 + g1h->workers()->run_task(&g1_par_scrub_rs_task); 1.1681 + g1h->set_par_threads(0); 1.1682 + } else { 1.1683 + g1_par_scrub_rs_task.work(0); 1.1684 + } 1.1685 + 1.1686 + double rs_scrub_end = os::elapsedTime(); 1.1687 + double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); 1.1688 + _total_rs_scrub_time += this_rs_scrub_time; 1.1689 + } 1.1690 + 1.1691 + // this will also free any regions totally full of garbage objects, 1.1692 + // and sort the regions. 1.1693 + g1h->g1_policy()->record_concurrent_mark_cleanup_end( 1.1694 + g1_par_note_end_task.freed_bytes(), 1.1695 + g1_par_note_end_task.max_live_bytes()); 1.1696 + 1.1697 + // Statistics. 1.1698 + double end = os::elapsedTime(); 1.1699 + _cleanup_times.add((end - start) * 1000.0); 1.1700 + GCOverheadReporter::recordSTWEnd(end); 1.1701 + 1.1702 + // G1CollectedHeap::heap()->print(); 1.1703 + // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", 1.1704 + // G1CollectedHeap::heap()->get_gc_time_stamp()); 1.1705 + 1.1706 + if (PrintGC || PrintGCDetails) { 1.1707 + g1h->print_size_transition(gclog_or_tty, 1.1708 + start_used_bytes, 1.1709 + g1h->used(), 1.1710 + g1h->capacity()); 1.1711 + } 1.1712 + 1.1713 + size_t cleaned_up_bytes = start_used_bytes - g1h->used(); 1.1714 + g1p->decrease_known_garbage_bytes(cleaned_up_bytes); 1.1715 + 1.1716 + // We need to make this be a "collection" so any collection pause that 1.1717 + // races with it goes around and waits for completeCleanup to finish. 1.1718 + g1h->increment_total_collections(); 1.1719 + 1.1720 +#ifndef PRODUCT 1.1721 + if (G1VerifyConcMark) { 1.1722 + G1CollectedHeap::heap()->prepare_for_verify(); 1.1723 + G1CollectedHeap::heap()->verify(true,false); 1.1724 + } 1.1725 +#endif 1.1726 +} 1.1727 + 1.1728 +void ConcurrentMark::completeCleanup() { 1.1729 + // A full collection intervened. 1.1730 + if (has_aborted()) return; 1.1731 + 1.1732 + int first = 0; 1.1733 + int last = (int)MAX2(ParallelGCThreads, (size_t)1); 1.1734 + for (int t = 0; t < last; t++) { 1.1735 + UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; 1.1736 + assert(list->well_formed(), "Inv"); 1.1737 + HeapRegion* hd = list->hd(); 1.1738 + while (hd != NULL) { 1.1739 + // Now finish up the other stuff. 1.1740 + hd->rem_set()->clear(); 1.1741 + HeapRegion* next_hd = hd->next_from_unclean_list(); 1.1742 + (void)list->pop(); 1.1743 + guarantee(list->hd() == next_hd, "how not?"); 1.1744 + _g1h->put_region_on_unclean_list(hd); 1.1745 + if (!hd->isHumongous()) { 1.1746 + // Add this to the _free_regions count by 1. 1.1747 + _g1h->finish_free_region_work(0, 0, 1, NULL); 1.1748 + } 1.1749 + hd = list->hd(); 1.1750 + guarantee(hd == next_hd, "how not?"); 1.1751 + } 1.1752 + } 1.1753 +} 1.1754 + 1.1755 + 1.1756 +class G1CMIsAliveClosure: public BoolObjectClosure { 1.1757 + G1CollectedHeap* _g1; 1.1758 + public: 1.1759 + G1CMIsAliveClosure(G1CollectedHeap* g1) : 1.1760 + _g1(g1) 1.1761 + {} 1.1762 + 1.1763 + void do_object(oop obj) { 1.1764 + assert(false, "not to be invoked"); 1.1765 + } 1.1766 + bool do_object_b(oop obj) { 1.1767 + HeapWord* addr = (HeapWord*)obj; 1.1768 + return addr != NULL && 1.1769 + (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); 1.1770 + } 1.1771 +}; 1.1772 + 1.1773 +class G1CMKeepAliveClosure: public OopClosure { 1.1774 + G1CollectedHeap* _g1; 1.1775 + ConcurrentMark* _cm; 1.1776 + CMBitMap* _bitMap; 1.1777 + public: 1.1778 + G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, 1.1779 + CMBitMap* bitMap) : 1.1780 + _g1(g1), _cm(cm), 1.1781 + _bitMap(bitMap) {} 1.1782 + 1.1783 + void do_oop(narrowOop* p) { 1.1784 + guarantee(false, "NYI"); 1.1785 + } 1.1786 + 1.1787 + void do_oop(oop* p) { 1.1788 + oop thisOop = *p; 1.1789 + HeapWord* addr = (HeapWord*)thisOop; 1.1790 + if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { 1.1791 + _bitMap->mark(addr); 1.1792 + _cm->mark_stack_push(thisOop); 1.1793 + } 1.1794 + } 1.1795 +}; 1.1796 + 1.1797 +class G1CMDrainMarkingStackClosure: public VoidClosure { 1.1798 + CMMarkStack* _markStack; 1.1799 + CMBitMap* _bitMap; 1.1800 + G1CMKeepAliveClosure* _oopClosure; 1.1801 + public: 1.1802 + G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, 1.1803 + G1CMKeepAliveClosure* oopClosure) : 1.1804 + _bitMap(bitMap), 1.1805 + _markStack(markStack), 1.1806 + _oopClosure(oopClosure) 1.1807 + {} 1.1808 + 1.1809 + void do_void() { 1.1810 + _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); 1.1811 + } 1.1812 +}; 1.1813 + 1.1814 +void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { 1.1815 + ResourceMark rm; 1.1816 + HandleMark hm; 1.1817 + ReferencePolicy* soft_ref_policy; 1.1818 + 1.1819 + // Process weak references. 1.1820 + if (clear_all_soft_refs) { 1.1821 + soft_ref_policy = new AlwaysClearPolicy(); 1.1822 + } else { 1.1823 +#ifdef COMPILER2 1.1824 + soft_ref_policy = new LRUMaxHeapPolicy(); 1.1825 +#else 1.1826 + soft_ref_policy = new LRUCurrentHeapPolicy(); 1.1827 +#endif 1.1828 + } 1.1829 + assert(_markStack.isEmpty(), "mark stack should be empty"); 1.1830 + 1.1831 + G1CollectedHeap* g1 = G1CollectedHeap::heap(); 1.1832 + G1CMIsAliveClosure g1IsAliveClosure(g1); 1.1833 + 1.1834 + G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap()); 1.1835 + G1CMDrainMarkingStackClosure 1.1836 + g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, 1.1837 + &g1KeepAliveClosure); 1.1838 + 1.1839 + // XXXYYY Also: copy the parallel ref processing code from CMS. 1.1840 + ReferenceProcessor* rp = g1->ref_processor(); 1.1841 + rp->process_discovered_references(soft_ref_policy, 1.1842 + &g1IsAliveClosure, 1.1843 + &g1KeepAliveClosure, 1.1844 + &g1DrainMarkingStackClosure, 1.1845 + NULL); 1.1846 + assert(_markStack.overflow() || _markStack.isEmpty(), 1.1847 + "mark stack should be empty (unless it overflowed)"); 1.1848 + if (_markStack.overflow()) { 1.1849 + set_has_overflown(); 1.1850 + } 1.1851 + 1.1852 + rp->enqueue_discovered_references(); 1.1853 + rp->verify_no_references_recorded(); 1.1854 + assert(!rp->discovery_enabled(), "should have been disabled"); 1.1855 + 1.1856 + // Now clean up stale oops in SymbolTable and StringTable 1.1857 + SymbolTable::unlink(&g1IsAliveClosure); 1.1858 + StringTable::unlink(&g1IsAliveClosure); 1.1859 +} 1.1860 + 1.1861 +void ConcurrentMark::swapMarkBitMaps() { 1.1862 + CMBitMapRO* temp = _prevMarkBitMap; 1.1863 + _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; 1.1864 + _nextMarkBitMap = (CMBitMap*) temp; 1.1865 +} 1.1866 + 1.1867 +class CMRemarkTask: public AbstractGangTask { 1.1868 +private: 1.1869 + ConcurrentMark *_cm; 1.1870 + 1.1871 +public: 1.1872 + void work(int worker_i) { 1.1873 + // Since all available tasks are actually started, we should 1.1874 + // only proceed if we're supposed to be actived. 1.1875 + if ((size_t)worker_i < _cm->active_tasks()) { 1.1876 + CMTask* task = _cm->task(worker_i); 1.1877 + task->record_start_time(); 1.1878 + do { 1.1879 + task->do_marking_step(1000000000.0 /* something very large */); 1.1880 + } while (task->has_aborted() && !_cm->has_overflown()); 1.1881 + // If we overflow, then we do not want to restart. We instead 1.1882 + // want to abort remark and do concurrent marking again. 1.1883 + task->record_end_time(); 1.1884 + } 1.1885 + } 1.1886 + 1.1887 + CMRemarkTask(ConcurrentMark* cm) : 1.1888 + AbstractGangTask("Par Remark"), _cm(cm) { } 1.1889 +}; 1.1890 + 1.1891 +void ConcurrentMark::checkpointRootsFinalWork() { 1.1892 + ResourceMark rm; 1.1893 + HandleMark hm; 1.1894 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.1895 + 1.1896 + g1h->ensure_parsability(false); 1.1897 + 1.1898 + if (ParallelGCThreads > 0) { 1.1899 + g1h->change_strong_roots_parity(); 1.1900 + // this is remark, so we'll use up all available threads 1.1901 + int active_workers = ParallelGCThreads; 1.1902 + set_phase(active_workers, false); 1.1903 + 1.1904 + CMRemarkTask remarkTask(this); 1.1905 + // We will start all available threads, even if we decide that the 1.1906 + // active_workers will be fewer. The extra ones will just bail out 1.1907 + // immediately. 1.1908 + int n_workers = g1h->workers()->total_workers(); 1.1909 + g1h->set_par_threads(n_workers); 1.1910 + g1h->workers()->run_task(&remarkTask); 1.1911 + g1h->set_par_threads(0); 1.1912 + 1.1913 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.1914 + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); 1.1915 + } else { 1.1916 + g1h->change_strong_roots_parity(); 1.1917 + // this is remark, so we'll use up all available threads 1.1918 + int active_workers = 1; 1.1919 + set_phase(active_workers, false); 1.1920 + 1.1921 + CMRemarkTask remarkTask(this); 1.1922 + // We will start all available threads, even if we decide that the 1.1923 + // active_workers will be fewer. The extra ones will just bail out 1.1924 + // immediately. 1.1925 + remarkTask.work(0); 1.1926 + 1.1927 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.1928 + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); 1.1929 + } 1.1930 + 1.1931 + print_stats(); 1.1932 + 1.1933 + if (!restart_for_overflow()) 1.1934 + set_non_marking_state(); 1.1935 + 1.1936 +#if VERIFY_OBJS_PROCESSED 1.1937 + if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { 1.1938 + gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", 1.1939 + _scan_obj_cl.objs_processed, 1.1940 + ThreadLocalObjQueue::objs_enqueued); 1.1941 + guarantee(_scan_obj_cl.objs_processed == 1.1942 + ThreadLocalObjQueue::objs_enqueued, 1.1943 + "Different number of objs processed and enqueued."); 1.1944 + } 1.1945 +#endif 1.1946 +} 1.1947 + 1.1948 +class ReachablePrinterOopClosure: public OopClosure { 1.1949 +private: 1.1950 + G1CollectedHeap* _g1h; 1.1951 + CMBitMapRO* _bitmap; 1.1952 + outputStream* _out; 1.1953 + 1.1954 +public: 1.1955 + ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : 1.1956 + _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } 1.1957 + 1.1958 + void do_oop(narrowOop* p) { 1.1959 + guarantee(false, "NYI"); 1.1960 + } 1.1961 + 1.1962 + void do_oop(oop* p) { 1.1963 + oop obj = *p; 1.1964 + const char* str = NULL; 1.1965 + const char* str2 = ""; 1.1966 + 1.1967 + if (!_g1h->is_in_g1_reserved(obj)) 1.1968 + str = "outside G1 reserved"; 1.1969 + else { 1.1970 + HeapRegion* hr = _g1h->heap_region_containing(obj); 1.1971 + guarantee( hr != NULL, "invariant" ); 1.1972 + if (hr->obj_allocated_since_prev_marking(obj)) { 1.1973 + str = "over TAMS"; 1.1974 + if (_bitmap->isMarked((HeapWord*) obj)) 1.1975 + str2 = " AND MARKED"; 1.1976 + } else if (_bitmap->isMarked((HeapWord*) obj)) 1.1977 + str = "marked"; 1.1978 + else 1.1979 + str = "#### NOT MARKED ####"; 1.1980 + } 1.1981 + 1.1982 + _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", 1.1983 + p, (void*) obj, str, str2); 1.1984 + } 1.1985 +}; 1.1986 + 1.1987 +class ReachablePrinterClosure: public BitMapClosure { 1.1988 +private: 1.1989 + CMBitMapRO* _bitmap; 1.1990 + outputStream* _out; 1.1991 + 1.1992 +public: 1.1993 + ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : 1.1994 + _bitmap(bitmap), _out(out) { } 1.1995 + 1.1996 + bool do_bit(size_t offset) { 1.1997 + HeapWord* addr = _bitmap->offsetToHeapWord(offset); 1.1998 + ReachablePrinterOopClosure oopCl(_bitmap, _out); 1.1999 + 1.2000 + _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); 1.2001 + oop(addr)->oop_iterate(&oopCl); 1.2002 + _out->print_cr(""); 1.2003 + 1.2004 + return true; 1.2005 + } 1.2006 +}; 1.2007 + 1.2008 +class ObjInRegionReachablePrinterClosure : public ObjectClosure { 1.2009 +private: 1.2010 + CMBitMapRO* _bitmap; 1.2011 + outputStream* _out; 1.2012 + 1.2013 +public: 1.2014 + void do_object(oop o) { 1.2015 + ReachablePrinterOopClosure oopCl(_bitmap, _out); 1.2016 + 1.2017 + _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); 1.2018 + o->oop_iterate(&oopCl); 1.2019 + _out->print_cr(""); 1.2020 + } 1.2021 + 1.2022 + ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : 1.2023 + _bitmap(bitmap), _out(out) { } 1.2024 +}; 1.2025 + 1.2026 +class RegionReachablePrinterClosure : public HeapRegionClosure { 1.2027 +private: 1.2028 + CMBitMapRO* _bitmap; 1.2029 + outputStream* _out; 1.2030 + 1.2031 +public: 1.2032 + bool doHeapRegion(HeapRegion* hr) { 1.2033 + HeapWord* b = hr->bottom(); 1.2034 + HeapWord* e = hr->end(); 1.2035 + HeapWord* t = hr->top(); 1.2036 + HeapWord* p = hr->prev_top_at_mark_start(); 1.2037 + _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " 1.2038 + "PTAMS: "PTR_FORMAT, b, e, t, p); 1.2039 + _out->print_cr(""); 1.2040 + 1.2041 + ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); 1.2042 + hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); 1.2043 + 1.2044 + return false; 1.2045 + } 1.2046 + 1.2047 + RegionReachablePrinterClosure(CMBitMapRO* bitmap, 1.2048 + outputStream* out) : 1.2049 + _bitmap(bitmap), _out(out) { } 1.2050 +}; 1.2051 + 1.2052 +void ConcurrentMark::print_prev_bitmap_reachable() { 1.2053 + outputStream* out = gclog_or_tty; 1.2054 + 1.2055 +#if SEND_HEAP_DUMP_TO_FILE 1.2056 + guarantee(heap_dump_file == NULL, "Protocol"); 1.2057 + char fn_buf[100]; 1.2058 + sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); 1.2059 + heap_dump_file = fopen(fn_buf, "w"); 1.2060 + fileStream fstream(heap_dump_file); 1.2061 + out = &fstream; 1.2062 +#endif // SEND_HEAP_DUMP_TO_FILE 1.2063 + 1.2064 + RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); 1.2065 + out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); 1.2066 + _g1h->heap_region_iterate(&rcl); 1.2067 + out->print_cr(""); 1.2068 + 1.2069 + ReachablePrinterClosure cl(_prevMarkBitMap, out); 1.2070 + out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); 1.2071 + _prevMarkBitMap->iterate(&cl); 1.2072 + out->print_cr(""); 1.2073 + 1.2074 +#if SEND_HEAP_DUMP_TO_FILE 1.2075 + fclose(heap_dump_file); 1.2076 + heap_dump_file = NULL; 1.2077 +#endif // SEND_HEAP_DUMP_TO_FILE 1.2078 +} 1.2079 + 1.2080 +// This note is for drainAllSATBBuffers and the code in between. 1.2081 +// In the future we could reuse a task to do this work during an 1.2082 +// evacuation pause (since now tasks are not active and can be claimed 1.2083 +// during an evacuation pause). This was a late change to the code and 1.2084 +// is currently not being taken advantage of. 1.2085 + 1.2086 +class CMGlobalObjectClosure : public ObjectClosure { 1.2087 +private: 1.2088 + ConcurrentMark* _cm; 1.2089 + 1.2090 +public: 1.2091 + void do_object(oop obj) { 1.2092 + _cm->deal_with_reference(obj); 1.2093 + } 1.2094 + 1.2095 + CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } 1.2096 +}; 1.2097 + 1.2098 +void ConcurrentMark::deal_with_reference(oop obj) { 1.2099 + if (verbose_high()) 1.2100 + gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, 1.2101 + (void*) obj); 1.2102 + 1.2103 + 1.2104 + HeapWord* objAddr = (HeapWord*) obj; 1.2105 + if (_g1h->is_in_g1_reserved(objAddr)) { 1.2106 + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); 1.2107 + HeapRegion* hr = _g1h->heap_region_containing(obj); 1.2108 + if (_g1h->is_obj_ill(obj, hr)) { 1.2109 + if (verbose_high()) 1.2110 + gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " 1.2111 + "marked", (void*) obj); 1.2112 + 1.2113 + // we need to mark it first 1.2114 + if (_nextMarkBitMap->parMark(objAddr)) { 1.2115 + // No OrderAccess:store_load() is needed. It is implicit in the 1.2116 + // CAS done in parMark(objAddr) above 1.2117 + HeapWord* finger = _finger; 1.2118 + if (objAddr < finger) { 1.2119 + if (verbose_high()) 1.2120 + gclog_or_tty->print_cr("[global] below the global finger " 1.2121 + "("PTR_FORMAT"), pushing it", finger); 1.2122 + if (!mark_stack_push(obj)) { 1.2123 + if (verbose_low()) 1.2124 + gclog_or_tty->print_cr("[global] global stack overflow during " 1.2125 + "deal_with_reference"); 1.2126 + } 1.2127 + } 1.2128 + } 1.2129 + } 1.2130 + } 1.2131 +} 1.2132 + 1.2133 +void ConcurrentMark::drainAllSATBBuffers() { 1.2134 + CMGlobalObjectClosure oc(this); 1.2135 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.2136 + satb_mq_set.set_closure(&oc); 1.2137 + 1.2138 + while (satb_mq_set.apply_closure_to_completed_buffer()) { 1.2139 + if (verbose_medium()) 1.2140 + gclog_or_tty->print_cr("[global] processed an SATB buffer"); 1.2141 + } 1.2142 + 1.2143 + // no need to check whether we should do this, as this is only 1.2144 + // called during an evacuation pause 1.2145 + satb_mq_set.iterate_closure_all_threads(); 1.2146 + 1.2147 + satb_mq_set.set_closure(NULL); 1.2148 + guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); 1.2149 +} 1.2150 + 1.2151 +void ConcurrentMark::markPrev(oop p) { 1.2152 + // Note we are overriding the read-only view of the prev map here, via 1.2153 + // the cast. 1.2154 + ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); 1.2155 +} 1.2156 + 1.2157 +void ConcurrentMark::clear(oop p) { 1.2158 + assert(p != NULL && p->is_oop(), "expected an oop"); 1.2159 + HeapWord* addr = (HeapWord*)p; 1.2160 + assert(addr >= _nextMarkBitMap->startWord() || 1.2161 + addr < _nextMarkBitMap->endWord(), "in a region"); 1.2162 + 1.2163 + _nextMarkBitMap->clear(addr); 1.2164 +} 1.2165 + 1.2166 +void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { 1.2167 + // Note we are overriding the read-only view of the prev map here, via 1.2168 + // the cast. 1.2169 + ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); 1.2170 + _nextMarkBitMap->clearRange(mr); 1.2171 +} 1.2172 + 1.2173 +HeapRegion* 1.2174 +ConcurrentMark::claim_region(int task_num) { 1.2175 + // "checkpoint" the finger 1.2176 + HeapWord* finger = _finger; 1.2177 + 1.2178 + // _heap_end will not change underneath our feet; it only changes at 1.2179 + // yield points. 1.2180 + while (finger < _heap_end) { 1.2181 + tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); 1.2182 + 1.2183 + // is the gap between reading the finger and doing the CAS too long? 1.2184 + 1.2185 + HeapRegion* curr_region = _g1h->heap_region_containing(finger); 1.2186 + HeapWord* bottom = curr_region->bottom(); 1.2187 + HeapWord* end = curr_region->end(); 1.2188 + HeapWord* limit = curr_region->next_top_at_mark_start(); 1.2189 + 1.2190 + if (verbose_low()) 1.2191 + gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " 1.2192 + "["PTR_FORMAT", "PTR_FORMAT"), " 1.2193 + "limit = "PTR_FORMAT, 1.2194 + task_num, curr_region, bottom, end, limit); 1.2195 + 1.2196 + HeapWord* res = 1.2197 + (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); 1.2198 + if (res == finger) { 1.2199 + // we succeeded 1.2200 + 1.2201 + // notice that _finger == end cannot be guaranteed here since, 1.2202 + // someone else might have moved the finger even further 1.2203 + guarantee( _finger >= end, "the finger should have moved forward" ); 1.2204 + 1.2205 + if (verbose_low()) 1.2206 + gclog_or_tty->print_cr("[%d] we were successful with region = " 1.2207 + PTR_FORMAT, task_num, curr_region); 1.2208 + 1.2209 + if (limit > bottom) { 1.2210 + if (verbose_low()) 1.2211 + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " 1.2212 + "returning it ", task_num, curr_region); 1.2213 + return curr_region; 1.2214 + } else { 1.2215 + tmp_guarantee_CM( limit == bottom, 1.2216 + "the region limit should be at bottom" ); 1.2217 + if (verbose_low()) 1.2218 + gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " 1.2219 + "returning NULL", task_num, curr_region); 1.2220 + // we return NULL and the caller should try calling 1.2221 + // claim_region() again. 1.2222 + return NULL; 1.2223 + } 1.2224 + } else { 1.2225 + guarantee( _finger > finger, "the finger should have moved forward" ); 1.2226 + if (verbose_low()) 1.2227 + gclog_or_tty->print_cr("[%d] somebody else moved the finger, " 1.2228 + "global finger = "PTR_FORMAT", " 1.2229 + "our finger = "PTR_FORMAT, 1.2230 + task_num, _finger, finger); 1.2231 + 1.2232 + // read it again 1.2233 + finger = _finger; 1.2234 + } 1.2235 + } 1.2236 + 1.2237 + return NULL; 1.2238 +} 1.2239 + 1.2240 +void ConcurrentMark::oops_do(OopClosure* cl) { 1.2241 + if (_markStack.size() > 0 && verbose_low()) 1.2242 + gclog_or_tty->print_cr("[global] scanning the global marking stack, " 1.2243 + "size = %d", _markStack.size()); 1.2244 + // we first iterate over the contents of the mark stack... 1.2245 + _markStack.oops_do(cl); 1.2246 + 1.2247 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.2248 + OopTaskQueue* queue = _task_queues->queue((int)i); 1.2249 + 1.2250 + if (queue->size() > 0 && verbose_low()) 1.2251 + gclog_or_tty->print_cr("[global] scanning task queue of task %d, " 1.2252 + "size = %d", i, queue->size()); 1.2253 + 1.2254 + // ...then over the contents of the all the task queues. 1.2255 + queue->oops_do(cl); 1.2256 + } 1.2257 + 1.2258 + // finally, invalidate any entries that in the region stack that 1.2259 + // point into the collection set 1.2260 + if (_regionStack.invalidate_entries_into_cset()) { 1.2261 + // otherwise, any gray objects copied during the evacuation pause 1.2262 + // might not be visited. 1.2263 + guarantee( _should_gray_objects, "invariant" ); 1.2264 + } 1.2265 +} 1.2266 + 1.2267 +void ConcurrentMark::clear_marking_state() { 1.2268 + _markStack.setEmpty(); 1.2269 + _markStack.clear_overflow(); 1.2270 + _regionStack.setEmpty(); 1.2271 + _regionStack.clear_overflow(); 1.2272 + clear_has_overflown(); 1.2273 + _finger = _heap_start; 1.2274 + 1.2275 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.2276 + OopTaskQueue* queue = _task_queues->queue(i); 1.2277 + queue->set_empty(); 1.2278 + } 1.2279 +} 1.2280 + 1.2281 +void ConcurrentMark::print_stats() { 1.2282 + if (verbose_stats()) { 1.2283 + gclog_or_tty->print_cr("---------------------------------------------------------------------"); 1.2284 + for (size_t i = 0; i < _active_tasks; ++i) { 1.2285 + _tasks[i]->print_stats(); 1.2286 + gclog_or_tty->print_cr("---------------------------------------------------------------------"); 1.2287 + } 1.2288 + } 1.2289 +} 1.2290 + 1.2291 +class CSMarkOopClosure: public OopClosure { 1.2292 + friend class CSMarkBitMapClosure; 1.2293 + 1.2294 + G1CollectedHeap* _g1h; 1.2295 + CMBitMap* _bm; 1.2296 + ConcurrentMark* _cm; 1.2297 + oop* _ms; 1.2298 + jint* _array_ind_stack; 1.2299 + int _ms_size; 1.2300 + int _ms_ind; 1.2301 + int _array_increment; 1.2302 + 1.2303 + bool push(oop obj, int arr_ind = 0) { 1.2304 + if (_ms_ind == _ms_size) { 1.2305 + gclog_or_tty->print_cr("Mark stack is full."); 1.2306 + return false; 1.2307 + } 1.2308 + _ms[_ms_ind] = obj; 1.2309 + if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; 1.2310 + _ms_ind++; 1.2311 + return true; 1.2312 + } 1.2313 + 1.2314 + oop pop() { 1.2315 + if (_ms_ind == 0) return NULL; 1.2316 + else { 1.2317 + _ms_ind--; 1.2318 + return _ms[_ms_ind]; 1.2319 + } 1.2320 + } 1.2321 + 1.2322 + bool drain() { 1.2323 + while (_ms_ind > 0) { 1.2324 + oop obj = pop(); 1.2325 + assert(obj != NULL, "Since index was non-zero."); 1.2326 + if (obj->is_objArray()) { 1.2327 + jint arr_ind = _array_ind_stack[_ms_ind]; 1.2328 + objArrayOop aobj = objArrayOop(obj); 1.2329 + jint len = aobj->length(); 1.2330 + jint next_arr_ind = arr_ind + _array_increment; 1.2331 + if (next_arr_ind < len) { 1.2332 + push(obj, next_arr_ind); 1.2333 + } 1.2334 + // Now process this portion of this one. 1.2335 + int lim = MIN2(next_arr_ind, len); 1.2336 + assert(!UseCompressedOops, "This needs to be fixed"); 1.2337 + for (int j = arr_ind; j < lim; j++) { 1.2338 + do_oop(aobj->obj_at_addr<oop>(j)); 1.2339 + } 1.2340 + 1.2341 + } else { 1.2342 + obj->oop_iterate(this); 1.2343 + } 1.2344 + if (abort()) return false; 1.2345 + } 1.2346 + return true; 1.2347 + } 1.2348 + 1.2349 +public: 1.2350 + CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : 1.2351 + _g1h(G1CollectedHeap::heap()), 1.2352 + _cm(cm), 1.2353 + _bm(cm->nextMarkBitMap()), 1.2354 + _ms_size(ms_size), _ms_ind(0), 1.2355 + _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), 1.2356 + _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), 1.2357 + _array_increment(MAX2(ms_size/8, 16)) 1.2358 + {} 1.2359 + 1.2360 + ~CSMarkOopClosure() { 1.2361 + FREE_C_HEAP_ARRAY(oop, _ms); 1.2362 + FREE_C_HEAP_ARRAY(jint, _array_ind_stack); 1.2363 + } 1.2364 + 1.2365 + void do_oop(narrowOop* p) { 1.2366 + guarantee(false, "NYI"); 1.2367 + } 1.2368 + 1.2369 + void do_oop(oop* p) { 1.2370 + oop obj = *p; 1.2371 + if (obj == NULL) return; 1.2372 + if (obj->is_forwarded()) { 1.2373 + // If the object has already been forwarded, we have to make sure 1.2374 + // that it's marked. So follow the forwarding pointer. Note that 1.2375 + // this does the right thing for self-forwarding pointers in the 1.2376 + // evacuation failure case. 1.2377 + obj = obj->forwardee(); 1.2378 + } 1.2379 + HeapRegion* hr = _g1h->heap_region_containing(obj); 1.2380 + if (hr != NULL) { 1.2381 + if (hr->in_collection_set()) { 1.2382 + if (_g1h->is_obj_ill(obj)) { 1.2383 + _bm->mark((HeapWord*)obj); 1.2384 + if (!push(obj)) { 1.2385 + gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); 1.2386 + set_abort(); 1.2387 + } 1.2388 + } 1.2389 + } else { 1.2390 + // Outside the collection set; we need to gray it 1.2391 + _cm->deal_with_reference(obj); 1.2392 + } 1.2393 + } 1.2394 + } 1.2395 +}; 1.2396 + 1.2397 +class CSMarkBitMapClosure: public BitMapClosure { 1.2398 + G1CollectedHeap* _g1h; 1.2399 + CMBitMap* _bitMap; 1.2400 + ConcurrentMark* _cm; 1.2401 + CSMarkOopClosure _oop_cl; 1.2402 +public: 1.2403 + CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : 1.2404 + _g1h(G1CollectedHeap::heap()), 1.2405 + _bitMap(cm->nextMarkBitMap()), 1.2406 + _oop_cl(cm, ms_size) 1.2407 + {} 1.2408 + 1.2409 + ~CSMarkBitMapClosure() {} 1.2410 + 1.2411 + bool do_bit(size_t offset) { 1.2412 + // convert offset into a HeapWord* 1.2413 + HeapWord* addr = _bitMap->offsetToHeapWord(offset); 1.2414 + assert(_bitMap->endWord() && addr < _bitMap->endWord(), 1.2415 + "address out of range"); 1.2416 + assert(_bitMap->isMarked(addr), "tautology"); 1.2417 + oop obj = oop(addr); 1.2418 + if (!obj->is_forwarded()) { 1.2419 + if (!_oop_cl.push(obj)) return false; 1.2420 + if (!_oop_cl.drain()) return false; 1.2421 + } 1.2422 + // Otherwise... 1.2423 + return true; 1.2424 + } 1.2425 +}; 1.2426 + 1.2427 + 1.2428 +class CompleteMarkingInCSHRClosure: public HeapRegionClosure { 1.2429 + CMBitMap* _bm; 1.2430 + CSMarkBitMapClosure _bit_cl; 1.2431 + enum SomePrivateConstants { 1.2432 + MSSize = 1000 1.2433 + }; 1.2434 + bool _completed; 1.2435 +public: 1.2436 + CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : 1.2437 + _bm(cm->nextMarkBitMap()), 1.2438 + _bit_cl(cm, MSSize), 1.2439 + _completed(true) 1.2440 + {} 1.2441 + 1.2442 + ~CompleteMarkingInCSHRClosure() {} 1.2443 + 1.2444 + bool doHeapRegion(HeapRegion* r) { 1.2445 + if (!r->evacuation_failed()) { 1.2446 + MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); 1.2447 + if (!mr.is_empty()) { 1.2448 + if (!_bm->iterate(&_bit_cl, mr)) { 1.2449 + _completed = false; 1.2450 + return true; 1.2451 + } 1.2452 + } 1.2453 + } 1.2454 + return false; 1.2455 + } 1.2456 + 1.2457 + bool completed() { return _completed; } 1.2458 +}; 1.2459 + 1.2460 +class ClearMarksInHRClosure: public HeapRegionClosure { 1.2461 + CMBitMap* _bm; 1.2462 +public: 1.2463 + ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } 1.2464 + 1.2465 + bool doHeapRegion(HeapRegion* r) { 1.2466 + if (!r->used_region().is_empty() && !r->evacuation_failed()) { 1.2467 + MemRegion usedMR = r->used_region(); 1.2468 + _bm->clearRange(r->used_region()); 1.2469 + } 1.2470 + return false; 1.2471 + } 1.2472 +}; 1.2473 + 1.2474 +void ConcurrentMark::complete_marking_in_collection_set() { 1.2475 + G1CollectedHeap* g1h = G1CollectedHeap::heap(); 1.2476 + 1.2477 + if (!g1h->mark_in_progress()) { 1.2478 + g1h->g1_policy()->record_mark_closure_time(0.0); 1.2479 + return; 1.2480 + } 1.2481 + 1.2482 + int i = 1; 1.2483 + double start = os::elapsedTime(); 1.2484 + while (true) { 1.2485 + i++; 1.2486 + CompleteMarkingInCSHRClosure cmplt(this); 1.2487 + g1h->collection_set_iterate(&cmplt); 1.2488 + if (cmplt.completed()) break; 1.2489 + } 1.2490 + double end_time = os::elapsedTime(); 1.2491 + double elapsed_time_ms = (end_time - start) * 1000.0; 1.2492 + g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); 1.2493 + if (PrintGCDetails) { 1.2494 + gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); 1.2495 + } 1.2496 + 1.2497 + ClearMarksInHRClosure clr(nextMarkBitMap()); 1.2498 + g1h->collection_set_iterate(&clr); 1.2499 +} 1.2500 + 1.2501 +// The next two methods deal with the following optimisation. Some 1.2502 +// objects are gray by being marked and located above the finger. If 1.2503 +// they are copied, during an evacuation pause, below the finger then 1.2504 +// the need to be pushed on the stack. The observation is that, if 1.2505 +// there are no regions in the collection set located above the 1.2506 +// finger, then the above cannot happen, hence we do not need to 1.2507 +// explicitly gray any objects when copying them to below the 1.2508 +// finger. The global stack will be scanned to ensure that, if it 1.2509 +// points to objects being copied, it will update their 1.2510 +// location. There is a tricky situation with the gray objects in 1.2511 +// region stack that are being coped, however. See the comment in 1.2512 +// newCSet(). 1.2513 + 1.2514 +void ConcurrentMark::newCSet() { 1.2515 + if (!concurrent_marking_in_progress()) 1.2516 + // nothing to do if marking is not in progress 1.2517 + return; 1.2518 + 1.2519 + // find what the lowest finger is among the global and local fingers 1.2520 + _min_finger = _finger; 1.2521 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.2522 + CMTask* task = _tasks[i]; 1.2523 + HeapWord* task_finger = task->finger(); 1.2524 + if (task_finger != NULL && task_finger < _min_finger) 1.2525 + _min_finger = task_finger; 1.2526 + } 1.2527 + 1.2528 + _should_gray_objects = false; 1.2529 + 1.2530 + // This fixes a very subtle and fustrating bug. It might be the case 1.2531 + // that, during en evacuation pause, heap regions that contain 1.2532 + // objects that are gray (by being in regions contained in the 1.2533 + // region stack) are included in the collection set. Since such gray 1.2534 + // objects will be moved, and because it's not easy to redirect 1.2535 + // region stack entries to point to a new location (because objects 1.2536 + // in one region might be scattered to multiple regions after they 1.2537 + // are copied), one option is to ensure that all marked objects 1.2538 + // copied during a pause are pushed on the stack. Notice, however, 1.2539 + // that this problem can only happen when the region stack is not 1.2540 + // empty during an evacuation pause. So, we make the fix a bit less 1.2541 + // conservative and ensure that regions are pushed on the stack, 1.2542 + // irrespective whether all collection set regions are below the 1.2543 + // finger, if the region stack is not empty. This is expected to be 1.2544 + // a rare case, so I don't think it's necessary to be smarted about it. 1.2545 + if (!region_stack_empty()) 1.2546 + _should_gray_objects = true; 1.2547 +} 1.2548 + 1.2549 +void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { 1.2550 + if (!concurrent_marking_in_progress()) 1.2551 + return; 1.2552 + 1.2553 + HeapWord* region_end = hr->end(); 1.2554 + if (region_end > _min_finger) 1.2555 + _should_gray_objects = true; 1.2556 +} 1.2557 + 1.2558 +void ConcurrentMark::disable_co_trackers() { 1.2559 + if (has_aborted()) { 1.2560 + if (_cleanup_co_tracker.enabled()) 1.2561 + _cleanup_co_tracker.disable(); 1.2562 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.2563 + CMTask* task = _tasks[i]; 1.2564 + if (task->co_tracker_enabled()) 1.2565 + task->disable_co_tracker(); 1.2566 + } 1.2567 + } else { 1.2568 + guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); 1.2569 + for (int i = 0; i < (int)_max_task_num; ++i) { 1.2570 + CMTask* task = _tasks[i]; 1.2571 + guarantee( !task->co_tracker_enabled(), "invariant" ); 1.2572 + } 1.2573 + } 1.2574 +} 1.2575 + 1.2576 +// abandon current marking iteration due to a Full GC 1.2577 +void ConcurrentMark::abort() { 1.2578 + // If we're not marking, nothing to do. 1.2579 + if (!G1ConcMark) return; 1.2580 + 1.2581 + // Clear all marks to force marking thread to do nothing 1.2582 + _nextMarkBitMap->clearAll(); 1.2583 + // Empty mark stack 1.2584 + clear_marking_state(); 1.2585 + for (int i = 0; i < (int)_max_task_num; ++i) 1.2586 + _tasks[i]->clear_region_fields(); 1.2587 + _has_aborted = true; 1.2588 + 1.2589 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.2590 + satb_mq_set.abandon_partial_marking(); 1.2591 + satb_mq_set.set_active_all_threads(false); 1.2592 +} 1.2593 + 1.2594 +static void print_ms_time_info(const char* prefix, const char* name, 1.2595 + NumberSeq& ns) { 1.2596 + gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", 1.2597 + prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); 1.2598 + if (ns.num() > 0) { 1.2599 + gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", 1.2600 + prefix, ns.sd(), ns.maximum()); 1.2601 + } 1.2602 +} 1.2603 + 1.2604 +void ConcurrentMark::print_summary_info() { 1.2605 + gclog_or_tty->print_cr(" Concurrent marking:"); 1.2606 + print_ms_time_info(" ", "init marks", _init_times); 1.2607 + print_ms_time_info(" ", "remarks", _remark_times); 1.2608 + { 1.2609 + print_ms_time_info(" ", "final marks", _remark_mark_times); 1.2610 + print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); 1.2611 + 1.2612 + } 1.2613 + print_ms_time_info(" ", "cleanups", _cleanup_times); 1.2614 + gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", 1.2615 + _total_counting_time, 1.2616 + (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / 1.2617 + (double)_cleanup_times.num() 1.2618 + : 0.0)); 1.2619 + if (G1ScrubRemSets) { 1.2620 + gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", 1.2621 + _total_rs_scrub_time, 1.2622 + (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / 1.2623 + (double)_cleanup_times.num() 1.2624 + : 0.0)); 1.2625 + } 1.2626 + gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", 1.2627 + (_init_times.sum() + _remark_times.sum() + 1.2628 + _cleanup_times.sum())/1000.0); 1.2629 + gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " 1.2630 + "(%8.2f s marking, %8.2f s counting).", 1.2631 + cmThread()->vtime_accum(), 1.2632 + cmThread()->vtime_mark_accum(), 1.2633 + cmThread()->vtime_count_accum()); 1.2634 +} 1.2635 + 1.2636 +// Closures 1.2637 +// XXX: there seems to be a lot of code duplication here; 1.2638 +// should refactor and consolidate the shared code. 1.2639 + 1.2640 +// This closure is used to mark refs into the CMS generation in 1.2641 +// the CMS bit map. Called at the first checkpoint. 1.2642 + 1.2643 +// We take a break if someone is trying to stop the world. 1.2644 +bool ConcurrentMark::do_yield_check(int worker_i) { 1.2645 + if (should_yield()) { 1.2646 + if (worker_i == 0) 1.2647 + _g1h->g1_policy()->record_concurrent_pause(); 1.2648 + cmThread()->yield(); 1.2649 + if (worker_i == 0) 1.2650 + _g1h->g1_policy()->record_concurrent_pause_end(); 1.2651 + return true; 1.2652 + } else { 1.2653 + return false; 1.2654 + } 1.2655 +} 1.2656 + 1.2657 +bool ConcurrentMark::should_yield() { 1.2658 + return cmThread()->should_yield(); 1.2659 +} 1.2660 + 1.2661 +bool ConcurrentMark::containing_card_is_marked(void* p) { 1.2662 + size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); 1.2663 + return _card_bm.at(offset >> CardTableModRefBS::card_shift); 1.2664 +} 1.2665 + 1.2666 +bool ConcurrentMark::containing_cards_are_marked(void* start, 1.2667 + void* last) { 1.2668 + return 1.2669 + containing_card_is_marked(start) && 1.2670 + containing_card_is_marked(last); 1.2671 +} 1.2672 + 1.2673 +#ifndef PRODUCT 1.2674 +// for debugging purposes 1.2675 +void ConcurrentMark::print_finger() { 1.2676 + gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, 1.2677 + _heap_start, _heap_end, _finger); 1.2678 + for (int i = 0; i < (int) _max_task_num; ++i) { 1.2679 + gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); 1.2680 + } 1.2681 + gclog_or_tty->print_cr(""); 1.2682 +} 1.2683 +#endif 1.2684 + 1.2685 +// Closure for iteration over bitmaps 1.2686 +class CMBitMapClosure : public BitMapClosure { 1.2687 +private: 1.2688 + // the bitmap that is being iterated over 1.2689 + CMBitMap* _nextMarkBitMap; 1.2690 + ConcurrentMark* _cm; 1.2691 + CMTask* _task; 1.2692 + // true if we're scanning a heap region claimed by the task (so that 1.2693 + // we move the finger along), false if we're not, i.e. currently when 1.2694 + // scanning a heap region popped from the region stack (so that we 1.2695 + // do not move the task finger along; it'd be a mistake if we did so). 1.2696 + bool _scanning_heap_region; 1.2697 + 1.2698 +public: 1.2699 + CMBitMapClosure(CMTask *task, 1.2700 + ConcurrentMark* cm, 1.2701 + CMBitMap* nextMarkBitMap) 1.2702 + : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } 1.2703 + 1.2704 + void set_scanning_heap_region(bool scanning_heap_region) { 1.2705 + _scanning_heap_region = scanning_heap_region; 1.2706 + } 1.2707 + 1.2708 + bool do_bit(size_t offset) { 1.2709 + HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); 1.2710 + tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" ); 1.2711 + tmp_guarantee_CM( addr < _cm->finger(), "invariant" ); 1.2712 + 1.2713 + if (_scanning_heap_region) { 1.2714 + statsOnly( _task->increase_objs_found_on_bitmap() ); 1.2715 + tmp_guarantee_CM( addr >= _task->finger(), "invariant" ); 1.2716 + // We move that task's local finger along. 1.2717 + _task->move_finger_to(addr); 1.2718 + } else { 1.2719 + // We move the task's region finger along. 1.2720 + _task->move_region_finger_to(addr); 1.2721 + } 1.2722 + 1.2723 + _task->scan_object(oop(addr)); 1.2724 + // we only partially drain the local queue and global stack 1.2725 + _task->drain_local_queue(true); 1.2726 + _task->drain_global_stack(true); 1.2727 + 1.2728 + // if the has_aborted flag has been raised, we need to bail out of 1.2729 + // the iteration 1.2730 + return !_task->has_aborted(); 1.2731 + } 1.2732 +}; 1.2733 + 1.2734 +// Closure for iterating over objects, currently only used for 1.2735 +// processing SATB buffers. 1.2736 +class CMObjectClosure : public ObjectClosure { 1.2737 +private: 1.2738 + CMTask* _task; 1.2739 + 1.2740 +public: 1.2741 + void do_object(oop obj) { 1.2742 + _task->deal_with_reference(obj); 1.2743 + } 1.2744 + 1.2745 + CMObjectClosure(CMTask* task) : _task(task) { } 1.2746 +}; 1.2747 + 1.2748 +// Closure for iterating over object fields 1.2749 +class CMOopClosure : public OopClosure { 1.2750 +private: 1.2751 + G1CollectedHeap* _g1h; 1.2752 + ConcurrentMark* _cm; 1.2753 + CMTask* _task; 1.2754 + 1.2755 +public: 1.2756 + void do_oop(narrowOop* p) { 1.2757 + guarantee(false, "NYI"); 1.2758 + } 1.2759 + 1.2760 + void do_oop(oop* p) { 1.2761 + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" ); 1.2762 + 1.2763 + oop obj = *p; 1.2764 + if (_cm->verbose_high()) 1.2765 + gclog_or_tty->print_cr("[%d] we're looking at location " 1.2766 + "*"PTR_FORMAT" = "PTR_FORMAT, 1.2767 + _task->task_id(), p, (void*) obj); 1.2768 + _task->deal_with_reference(obj); 1.2769 + } 1.2770 + 1.2771 + CMOopClosure(G1CollectedHeap* g1h, 1.2772 + ConcurrentMark* cm, 1.2773 + CMTask* task) 1.2774 + : _g1h(g1h), _cm(cm), _task(task) { } 1.2775 +}; 1.2776 + 1.2777 +void CMTask::setup_for_region(HeapRegion* hr) { 1.2778 + tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(), 1.2779 + "claim_region() should have filtered out continues humongous regions" ); 1.2780 + 1.2781 + if (_cm->verbose_low()) 1.2782 + gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, 1.2783 + _task_id, hr); 1.2784 + 1.2785 + _curr_region = hr; 1.2786 + _finger = hr->bottom(); 1.2787 + update_region_limit(); 1.2788 +} 1.2789 + 1.2790 +void CMTask::update_region_limit() { 1.2791 + HeapRegion* hr = _curr_region; 1.2792 + HeapWord* bottom = hr->bottom(); 1.2793 + HeapWord* limit = hr->next_top_at_mark_start(); 1.2794 + 1.2795 + if (limit == bottom) { 1.2796 + if (_cm->verbose_low()) 1.2797 + gclog_or_tty->print_cr("[%d] found an empty region " 1.2798 + "["PTR_FORMAT", "PTR_FORMAT")", 1.2799 + _task_id, bottom, limit); 1.2800 + // The region was collected underneath our feet. 1.2801 + // We set the finger to bottom to ensure that the bitmap 1.2802 + // iteration that will follow this will not do anything. 1.2803 + // (this is not a condition that holds when we set the region up, 1.2804 + // as the region is not supposed to be empty in the first place) 1.2805 + _finger = bottom; 1.2806 + } else if (limit >= _region_limit) { 1.2807 + tmp_guarantee_CM( limit >= _finger, "peace of mind" ); 1.2808 + } else { 1.2809 + tmp_guarantee_CM( limit < _region_limit, "only way to get here" ); 1.2810 + // This can happen under some pretty unusual circumstances. An 1.2811 + // evacuation pause empties the region underneath our feet (NTAMS 1.2812 + // at bottom). We then do some allocation in the region (NTAMS 1.2813 + // stays at bottom), followed by the region being used as a GC 1.2814 + // alloc region (NTAMS will move to top() and the objects 1.2815 + // originally below it will be grayed). All objects now marked in 1.2816 + // the region are explicitly grayed, if below the global finger, 1.2817 + // and we do not need in fact to scan anything else. So, we simply 1.2818 + // set _finger to be limit to ensure that the bitmap iteration 1.2819 + // doesn't do anything. 1.2820 + _finger = limit; 1.2821 + } 1.2822 + 1.2823 + _region_limit = limit; 1.2824 +} 1.2825 + 1.2826 +void CMTask::giveup_current_region() { 1.2827 + tmp_guarantee_CM( _curr_region != NULL, "invariant" ); 1.2828 + if (_cm->verbose_low()) 1.2829 + gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, 1.2830 + _task_id, _curr_region); 1.2831 + clear_region_fields(); 1.2832 +} 1.2833 + 1.2834 +void CMTask::clear_region_fields() { 1.2835 + // Values for these three fields that indicate that we're not 1.2836 + // holding on to a region. 1.2837 + _curr_region = NULL; 1.2838 + _finger = NULL; 1.2839 + _region_limit = NULL; 1.2840 + 1.2841 + _region_finger = NULL; 1.2842 +} 1.2843 + 1.2844 +void CMTask::reset(CMBitMap* nextMarkBitMap) { 1.2845 + guarantee( nextMarkBitMap != NULL, "invariant" ); 1.2846 + 1.2847 + if (_cm->verbose_low()) 1.2848 + gclog_or_tty->print_cr("[%d] resetting", _task_id); 1.2849 + 1.2850 + _nextMarkBitMap = nextMarkBitMap; 1.2851 + clear_region_fields(); 1.2852 + 1.2853 + _calls = 0; 1.2854 + _elapsed_time_ms = 0.0; 1.2855 + _termination_time_ms = 0.0; 1.2856 + _termination_start_time_ms = 0.0; 1.2857 + 1.2858 +#if _MARKING_STATS_ 1.2859 + _local_pushes = 0; 1.2860 + _local_pops = 0; 1.2861 + _local_max_size = 0; 1.2862 + _objs_scanned = 0; 1.2863 + _global_pushes = 0; 1.2864 + _global_pops = 0; 1.2865 + _global_max_size = 0; 1.2866 + _global_transfers_to = 0; 1.2867 + _global_transfers_from = 0; 1.2868 + _region_stack_pops = 0; 1.2869 + _regions_claimed = 0; 1.2870 + _objs_found_on_bitmap = 0; 1.2871 + _satb_buffers_processed = 0; 1.2872 + _steal_attempts = 0; 1.2873 + _steals = 0; 1.2874 + _aborted = 0; 1.2875 + _aborted_overflow = 0; 1.2876 + _aborted_cm_aborted = 0; 1.2877 + _aborted_yield = 0; 1.2878 + _aborted_timed_out = 0; 1.2879 + _aborted_satb = 0; 1.2880 + _aborted_termination = 0; 1.2881 +#endif // _MARKING_STATS_ 1.2882 +} 1.2883 + 1.2884 +bool CMTask::should_exit_termination() { 1.2885 + regular_clock_call(); 1.2886 + // This is called when we are in the termination protocol. We should 1.2887 + // quit if, for some reason, this task wants to abort or the global 1.2888 + // stack is not empty (this means that we can get work from it). 1.2889 + return !_cm->mark_stack_empty() || has_aborted(); 1.2890 +} 1.2891 + 1.2892 +// This determines whether the method below will check both the local 1.2893 +// and global fingers when determining whether to push on the stack a 1.2894 +// gray object (value 1) or whether it will only check the global one 1.2895 +// (value 0). The tradeoffs are that the former will be a bit more 1.2896 +// accurate and possibly push less on the stack, but it might also be 1.2897 +// a little bit slower. 1.2898 + 1.2899 +#define _CHECK_BOTH_FINGERS_ 1 1.2900 + 1.2901 +void CMTask::deal_with_reference(oop obj) { 1.2902 + if (_cm->verbose_high()) 1.2903 + gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, 1.2904 + _task_id, (void*) obj); 1.2905 + 1.2906 + ++_refs_reached; 1.2907 + 1.2908 + HeapWord* objAddr = (HeapWord*) obj; 1.2909 + if (_g1h->is_in_g1_reserved(objAddr)) { 1.2910 + tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); 1.2911 + HeapRegion* hr = _g1h->heap_region_containing(obj); 1.2912 + if (_g1h->is_obj_ill(obj, hr)) { 1.2913 + if (_cm->verbose_high()) 1.2914 + gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", 1.2915 + _task_id, (void*) obj); 1.2916 + 1.2917 + // we need to mark it first 1.2918 + if (_nextMarkBitMap->parMark(objAddr)) { 1.2919 + // No OrderAccess:store_load() is needed. It is implicit in the 1.2920 + // CAS done in parMark(objAddr) above 1.2921 + HeapWord* global_finger = _cm->finger(); 1.2922 + 1.2923 +#if _CHECK_BOTH_FINGERS_ 1.2924 + // we will check both the local and global fingers 1.2925 + 1.2926 + if (_finger != NULL && objAddr < _finger) { 1.2927 + if (_cm->verbose_high()) 1.2928 + gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " 1.2929 + "pushing it", _task_id, _finger); 1.2930 + push(obj); 1.2931 + } else if (_curr_region != NULL && objAddr < _region_limit) { 1.2932 + // do nothing 1.2933 + } else if (objAddr < global_finger) { 1.2934 + // Notice that the global finger might be moving forward 1.2935 + // concurrently. This is not a problem. In the worst case, we 1.2936 + // mark the object while it is above the global finger and, by 1.2937 + // the time we read the global finger, it has moved forward 1.2938 + // passed this object. In this case, the object will probably 1.2939 + // be visited when a task is scanning the region and will also 1.2940 + // be pushed on the stack. So, some duplicate work, but no 1.2941 + // correctness problems. 1.2942 + 1.2943 + if (_cm->verbose_high()) 1.2944 + gclog_or_tty->print_cr("[%d] below the global finger " 1.2945 + "("PTR_FORMAT"), pushing it", 1.2946 + _task_id, global_finger); 1.2947 + push(obj); 1.2948 + } else { 1.2949 + // do nothing 1.2950 + } 1.2951 +#else // _CHECK_BOTH_FINGERS_ 1.2952 + // we will only check the global finger 1.2953 + 1.2954 + if (objAddr < global_finger) { 1.2955 + // see long comment above 1.2956 + 1.2957 + if (_cm->verbose_high()) 1.2958 + gclog_or_tty->print_cr("[%d] below the global finger " 1.2959 + "("PTR_FORMAT"), pushing it", 1.2960 + _task_id, global_finger); 1.2961 + push(obj); 1.2962 + } 1.2963 +#endif // _CHECK_BOTH_FINGERS_ 1.2964 + } 1.2965 + } 1.2966 + } 1.2967 +} 1.2968 + 1.2969 +void CMTask::push(oop obj) { 1.2970 + HeapWord* objAddr = (HeapWord*) obj; 1.2971 + tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" ); 1.2972 + tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" ); 1.2973 + tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" ); 1.2974 + 1.2975 + if (_cm->verbose_high()) 1.2976 + gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); 1.2977 + 1.2978 + if (!_task_queue->push(obj)) { 1.2979 + // The local task queue looks full. We need to push some entries 1.2980 + // to the global stack. 1.2981 + 1.2982 + if (_cm->verbose_medium()) 1.2983 + gclog_or_tty->print_cr("[%d] task queue overflow, " 1.2984 + "moving entries to the global stack", 1.2985 + _task_id); 1.2986 + move_entries_to_global_stack(); 1.2987 + 1.2988 + // this should succeed since, even if we overflow the global 1.2989 + // stack, we should have definitely removed some entries from the 1.2990 + // local queue. So, there must be space on it. 1.2991 + bool success = _task_queue->push(obj); 1.2992 + tmp_guarantee_CM( success, "invariant" ); 1.2993 + } 1.2994 + 1.2995 + statsOnly( int tmp_size = _task_queue->size(); 1.2996 + if (tmp_size > _local_max_size) 1.2997 + _local_max_size = tmp_size; 1.2998 + ++_local_pushes ); 1.2999 +} 1.3000 + 1.3001 +void CMTask::reached_limit() { 1.3002 + tmp_guarantee_CM( _words_scanned >= _words_scanned_limit || 1.3003 + _refs_reached >= _refs_reached_limit , 1.3004 + "shouldn't have been called otherwise" ); 1.3005 + regular_clock_call(); 1.3006 +} 1.3007 + 1.3008 +void CMTask::regular_clock_call() { 1.3009 + if (has_aborted()) 1.3010 + return; 1.3011 + 1.3012 + // First, we need to recalculate the words scanned and refs reached 1.3013 + // limits for the next clock call. 1.3014 + recalculate_limits(); 1.3015 + 1.3016 + // During the regular clock call we do the following 1.3017 + 1.3018 + // (1) If an overflow has been flagged, then we abort. 1.3019 + if (_cm->has_overflown()) { 1.3020 + set_has_aborted(); 1.3021 + return; 1.3022 + } 1.3023 + 1.3024 + // If we are not concurrent (i.e. we're doing remark) we don't need 1.3025 + // to check anything else. The other steps are only needed during 1.3026 + // the concurrent marking phase. 1.3027 + if (!concurrent()) 1.3028 + return; 1.3029 + 1.3030 + // (2) If marking has been aborted for Full GC, then we also abort. 1.3031 + if (_cm->has_aborted()) { 1.3032 + set_has_aborted(); 1.3033 + statsOnly( ++_aborted_cm_aborted ); 1.3034 + return; 1.3035 + } 1.3036 + 1.3037 + double curr_time_ms = os::elapsedVTime() * 1000.0; 1.3038 + 1.3039 + // (3) If marking stats are enabled, then we update the step history. 1.3040 +#if _MARKING_STATS_ 1.3041 + if (_words_scanned >= _words_scanned_limit) 1.3042 + ++_clock_due_to_scanning; 1.3043 + if (_refs_reached >= _refs_reached_limit) 1.3044 + ++_clock_due_to_marking; 1.3045 + 1.3046 + double last_interval_ms = curr_time_ms - _interval_start_time_ms; 1.3047 + _interval_start_time_ms = curr_time_ms; 1.3048 + _all_clock_intervals_ms.add(last_interval_ms); 1.3049 + 1.3050 + if (_cm->verbose_medium()) { 1.3051 + gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " 1.3052 + "scanned = %d%s, refs reached = %d%s", 1.3053 + _task_id, last_interval_ms, 1.3054 + _words_scanned, 1.3055 + (_words_scanned >= _words_scanned_limit) ? " (*)" : "", 1.3056 + _refs_reached, 1.3057 + (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); 1.3058 + } 1.3059 +#endif // _MARKING_STATS_ 1.3060 + 1.3061 + // (4) We check whether we should yield. If we have to, then we abort. 1.3062 + if (_cm->should_yield()) { 1.3063 + // We should yield. To do this we abort the task. The caller is 1.3064 + // responsible for yielding. 1.3065 + set_has_aborted(); 1.3066 + statsOnly( ++_aborted_yield ); 1.3067 + return; 1.3068 + } 1.3069 + 1.3070 + // (5) We check whether we've reached our time quota. If we have, 1.3071 + // then we abort. 1.3072 + double elapsed_time_ms = curr_time_ms - _start_time_ms; 1.3073 + if (elapsed_time_ms > _time_target_ms) { 1.3074 + set_has_aborted(); 1.3075 + _has_aborted_timed_out = true; 1.3076 + statsOnly( ++_aborted_timed_out ); 1.3077 + return; 1.3078 + } 1.3079 + 1.3080 + // (6) Finally, we check whether there are enough completed STAB 1.3081 + // buffers available for processing. If there are, we abort. 1.3082 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.3083 + if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { 1.3084 + if (_cm->verbose_low()) 1.3085 + gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", 1.3086 + _task_id); 1.3087 + // we do need to process SATB buffers, we'll abort and restart 1.3088 + // the marking task to do so 1.3089 + set_has_aborted(); 1.3090 + statsOnly( ++_aborted_satb ); 1.3091 + return; 1.3092 + } 1.3093 +} 1.3094 + 1.3095 +void CMTask::recalculate_limits() { 1.3096 + _real_words_scanned_limit = _words_scanned + words_scanned_period; 1.3097 + _words_scanned_limit = _real_words_scanned_limit; 1.3098 + 1.3099 + _real_refs_reached_limit = _refs_reached + refs_reached_period; 1.3100 + _refs_reached_limit = _real_refs_reached_limit; 1.3101 +} 1.3102 + 1.3103 +void CMTask::decrease_limits() { 1.3104 + // This is called when we believe that we're going to do an infrequent 1.3105 + // operation which will increase the per byte scanned cost (i.e. move 1.3106 + // entries to/from the global stack). It basically tries to decrease the 1.3107 + // scanning limit so that the clock is called earlier. 1.3108 + 1.3109 + if (_cm->verbose_medium()) 1.3110 + gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); 1.3111 + 1.3112 + _words_scanned_limit = _real_words_scanned_limit - 1.3113 + 3 * words_scanned_period / 4; 1.3114 + _refs_reached_limit = _real_refs_reached_limit - 1.3115 + 3 * refs_reached_period / 4; 1.3116 +} 1.3117 + 1.3118 +void CMTask::move_entries_to_global_stack() { 1.3119 + // local array where we'll store the entries that will be popped 1.3120 + // from the local queue 1.3121 + oop buffer[global_stack_transfer_size]; 1.3122 + 1.3123 + int n = 0; 1.3124 + oop obj; 1.3125 + while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { 1.3126 + buffer[n] = obj; 1.3127 + ++n; 1.3128 + } 1.3129 + 1.3130 + if (n > 0) { 1.3131 + // we popped at least one entry from the local queue 1.3132 + 1.3133 + statsOnly( ++_global_transfers_to; _local_pops += n ); 1.3134 + 1.3135 + if (!_cm->mark_stack_push(buffer, n)) { 1.3136 + if (_cm->verbose_low()) 1.3137 + gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); 1.3138 + set_has_aborted(); 1.3139 + } else { 1.3140 + // the transfer was successful 1.3141 + 1.3142 + if (_cm->verbose_medium()) 1.3143 + gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", 1.3144 + _task_id, n); 1.3145 + statsOnly( int tmp_size = _cm->mark_stack_size(); 1.3146 + if (tmp_size > _global_max_size) 1.3147 + _global_max_size = tmp_size; 1.3148 + _global_pushes += n ); 1.3149 + } 1.3150 + } 1.3151 + 1.3152 + // this operation was quite expensive, so decrease the limits 1.3153 + decrease_limits(); 1.3154 +} 1.3155 + 1.3156 +void CMTask::get_entries_from_global_stack() { 1.3157 + // local array where we'll store the entries that will be popped 1.3158 + // from the global stack. 1.3159 + oop buffer[global_stack_transfer_size]; 1.3160 + int n; 1.3161 + _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); 1.3162 + tmp_guarantee_CM( n <= global_stack_transfer_size, 1.3163 + "we should not pop more than the given limit" ); 1.3164 + if (n > 0) { 1.3165 + // yes, we did actually pop at least one entry 1.3166 + 1.3167 + statsOnly( ++_global_transfers_from; _global_pops += n ); 1.3168 + if (_cm->verbose_medium()) 1.3169 + gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", 1.3170 + _task_id, n); 1.3171 + for (int i = 0; i < n; ++i) { 1.3172 + bool success = _task_queue->push(buffer[i]); 1.3173 + // We only call this when the local queue is empty or under a 1.3174 + // given target limit. So, we do not expect this push to fail. 1.3175 + tmp_guarantee_CM( success, "invariant" ); 1.3176 + } 1.3177 + 1.3178 + statsOnly( int tmp_size = _task_queue->size(); 1.3179 + if (tmp_size > _local_max_size) 1.3180 + _local_max_size = tmp_size; 1.3181 + _local_pushes += n ); 1.3182 + } 1.3183 + 1.3184 + // this operation was quite expensive, so decrease the limits 1.3185 + decrease_limits(); 1.3186 +} 1.3187 + 1.3188 +void CMTask::drain_local_queue(bool partially) { 1.3189 + if (has_aborted()) 1.3190 + return; 1.3191 + 1.3192 + // Decide what the target size is, depending whether we're going to 1.3193 + // drain it partially (so that other tasks can steal if they run out 1.3194 + // of things to do) or totally (at the very end). 1.3195 + size_t target_size; 1.3196 + if (partially) 1.3197 + target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); 1.3198 + else 1.3199 + target_size = 0; 1.3200 + 1.3201 + if (_task_queue->size() > target_size) { 1.3202 + if (_cm->verbose_high()) 1.3203 + gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", 1.3204 + _task_id, target_size); 1.3205 + 1.3206 + oop obj; 1.3207 + bool ret = _task_queue->pop_local(obj); 1.3208 + while (ret) { 1.3209 + statsOnly( ++_local_pops ); 1.3210 + 1.3211 + if (_cm->verbose_high()) 1.3212 + gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, 1.3213 + (void*) obj); 1.3214 + 1.3215 + tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj), 1.3216 + "invariant" ); 1.3217 + 1.3218 + scan_object(obj); 1.3219 + 1.3220 + if (_task_queue->size() <= target_size || has_aborted()) 1.3221 + ret = false; 1.3222 + else 1.3223 + ret = _task_queue->pop_local(obj); 1.3224 + } 1.3225 + 1.3226 + if (_cm->verbose_high()) 1.3227 + gclog_or_tty->print_cr("[%d] drained local queue, size = %d", 1.3228 + _task_id, _task_queue->size()); 1.3229 + } 1.3230 +} 1.3231 + 1.3232 +void CMTask::drain_global_stack(bool partially) { 1.3233 + if (has_aborted()) 1.3234 + return; 1.3235 + 1.3236 + // We have a policy to drain the local queue before we attempt to 1.3237 + // drain the global stack. 1.3238 + tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" ); 1.3239 + 1.3240 + // Decide what the target size is, depending whether we're going to 1.3241 + // drain it partially (so that other tasks can steal if they run out 1.3242 + // of things to do) or totally (at the very end). Notice that, 1.3243 + // because we move entries from the global stack in chunks or 1.3244 + // because another task might be doing the same, we might in fact 1.3245 + // drop below the target. But, this is not a problem. 1.3246 + size_t target_size; 1.3247 + if (partially) 1.3248 + target_size = _cm->partial_mark_stack_size_target(); 1.3249 + else 1.3250 + target_size = 0; 1.3251 + 1.3252 + if (_cm->mark_stack_size() > target_size) { 1.3253 + if (_cm->verbose_low()) 1.3254 + gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", 1.3255 + _task_id, target_size); 1.3256 + 1.3257 + while (!has_aborted() && _cm->mark_stack_size() > target_size) { 1.3258 + get_entries_from_global_stack(); 1.3259 + drain_local_queue(partially); 1.3260 + } 1.3261 + 1.3262 + if (_cm->verbose_low()) 1.3263 + gclog_or_tty->print_cr("[%d] drained global stack, size = %d", 1.3264 + _task_id, _cm->mark_stack_size()); 1.3265 + } 1.3266 +} 1.3267 + 1.3268 +// SATB Queue has several assumptions on whether to call the par or 1.3269 +// non-par versions of the methods. this is why some of the code is 1.3270 +// replicated. We should really get rid of the single-threaded version 1.3271 +// of the code to simplify things. 1.3272 +void CMTask::drain_satb_buffers() { 1.3273 + if (has_aborted()) 1.3274 + return; 1.3275 + 1.3276 + // We set this so that the regular clock knows that we're in the 1.3277 + // middle of draining buffers and doesn't set the abort flag when it 1.3278 + // notices that SATB buffers are available for draining. It'd be 1.3279 + // very counter productive if it did that. :-) 1.3280 + _draining_satb_buffers = true; 1.3281 + 1.3282 + CMObjectClosure oc(this); 1.3283 + SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); 1.3284 + if (ParallelGCThreads > 0) 1.3285 + satb_mq_set.set_par_closure(_task_id, &oc); 1.3286 + else 1.3287 + satb_mq_set.set_closure(&oc); 1.3288 + 1.3289 + // This keeps claiming and applying the closure to completed buffers 1.3290 + // until we run out of buffers or we need to abort. 1.3291 + if (ParallelGCThreads > 0) { 1.3292 + while (!has_aborted() && 1.3293 + satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { 1.3294 + if (_cm->verbose_medium()) 1.3295 + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 1.3296 + statsOnly( ++_satb_buffers_processed ); 1.3297 + regular_clock_call(); 1.3298 + } 1.3299 + } else { 1.3300 + while (!has_aborted() && 1.3301 + satb_mq_set.apply_closure_to_completed_buffer()) { 1.3302 + if (_cm->verbose_medium()) 1.3303 + gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); 1.3304 + statsOnly( ++_satb_buffers_processed ); 1.3305 + regular_clock_call(); 1.3306 + } 1.3307 + } 1.3308 + 1.3309 + if (!concurrent() && !has_aborted()) { 1.3310 + // We should only do this during remark. 1.3311 + if (ParallelGCThreads > 0) 1.3312 + satb_mq_set.par_iterate_closure_all_threads(_task_id); 1.3313 + else 1.3314 + satb_mq_set.iterate_closure_all_threads(); 1.3315 + } 1.3316 + 1.3317 + _draining_satb_buffers = false; 1.3318 + 1.3319 + tmp_guarantee_CM( has_aborted() || 1.3320 + concurrent() || 1.3321 + satb_mq_set.completed_buffers_num() == 0, "invariant" ); 1.3322 + 1.3323 + if (ParallelGCThreads > 0) 1.3324 + satb_mq_set.set_par_closure(_task_id, NULL); 1.3325 + else 1.3326 + satb_mq_set.set_closure(NULL); 1.3327 + 1.3328 + // again, this was a potentially expensive operation, decrease the 1.3329 + // limits to get the regular clock call early 1.3330 + decrease_limits(); 1.3331 +} 1.3332 + 1.3333 +void CMTask::drain_region_stack(BitMapClosure* bc) { 1.3334 + if (has_aborted()) 1.3335 + return; 1.3336 + 1.3337 + tmp_guarantee_CM( _region_finger == NULL, 1.3338 + "it should be NULL when we're not scanning a region" ); 1.3339 + 1.3340 + if (!_cm->region_stack_empty()) { 1.3341 + if (_cm->verbose_low()) 1.3342 + gclog_or_tty->print_cr("[%d] draining region stack, size = %d", 1.3343 + _task_id, _cm->region_stack_size()); 1.3344 + 1.3345 + MemRegion mr = _cm->region_stack_pop(); 1.3346 + // it returns MemRegion() if the pop fails 1.3347 + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 1.3348 + 1.3349 + while (mr.start() != NULL) { 1.3350 + if (_cm->verbose_medium()) 1.3351 + gclog_or_tty->print_cr("[%d] we are scanning region " 1.3352 + "["PTR_FORMAT", "PTR_FORMAT")", 1.3353 + _task_id, mr.start(), mr.end()); 1.3354 + tmp_guarantee_CM( mr.end() <= _cm->finger(), 1.3355 + "otherwise the region shouldn't be on the stack" ); 1.3356 + assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); 1.3357 + if (_nextMarkBitMap->iterate(bc, mr)) { 1.3358 + tmp_guarantee_CM( !has_aborted(), 1.3359 + "cannot abort the task without aborting the bitmap iteration" ); 1.3360 + 1.3361 + // We finished iterating over the region without aborting. 1.3362 + regular_clock_call(); 1.3363 + if (has_aborted()) 1.3364 + mr = MemRegion(); 1.3365 + else { 1.3366 + mr = _cm->region_stack_pop(); 1.3367 + // it returns MemRegion() if the pop fails 1.3368 + statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); 1.3369 + } 1.3370 + } else { 1.3371 + guarantee( has_aborted(), "currently the only way to do so" ); 1.3372 + 1.3373 + // The only way to abort the bitmap iteration is to return 1.3374 + // false from the do_bit() method. However, inside the 1.3375 + // do_bit() method we move the _region_finger to point to the 1.3376 + // object currently being looked at. So, if we bail out, we 1.3377 + // have definitely set _region_finger to something non-null. 1.3378 + guarantee( _region_finger != NULL, "invariant" ); 1.3379 + 1.3380 + // The iteration was actually aborted. So now _region_finger 1.3381 + // points to the address of the object we last scanned. If we 1.3382 + // leave it there, when we restart this task, we will rescan 1.3383 + // the object. It is easy to avoid this. We move the finger by 1.3384 + // enough to point to the next possible object header (the 1.3385 + // bitmap knows by how much we need to move it as it knows its 1.3386 + // granularity). 1.3387 + MemRegion newRegion = 1.3388 + MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); 1.3389 + 1.3390 + if (!newRegion.is_empty()) { 1.3391 + if (_cm->verbose_low()) { 1.3392 + gclog_or_tty->print_cr("[%d] pushing unscanned region" 1.3393 + "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", 1.3394 + _task_id, 1.3395 + newRegion.start(), newRegion.end()); 1.3396 + } 1.3397 + // Now push the part of the region we didn't scan on the 1.3398 + // region stack to make sure a task scans it later. 1.3399 + _cm->region_stack_push(newRegion); 1.3400 + } 1.3401 + // break from while 1.3402 + mr = MemRegion(); 1.3403 + } 1.3404 + _region_finger = NULL; 1.3405 + } 1.3406 + 1.3407 + // We only push regions on the region stack during evacuation 1.3408 + // pauses. So if we come out the above iteration because we region 1.3409 + // stack is empty, it will remain empty until the next yield 1.3410 + // point. So, the guarantee below is safe. 1.3411 + guarantee( has_aborted() || _cm->region_stack_empty(), 1.3412 + "only way to exit the loop" ); 1.3413 + 1.3414 + if (_cm->verbose_low()) 1.3415 + gclog_or_tty->print_cr("[%d] drained region stack, size = %d", 1.3416 + _task_id, _cm->region_stack_size()); 1.3417 + } 1.3418 +} 1.3419 + 1.3420 +void CMTask::print_stats() { 1.3421 + gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", 1.3422 + _task_id, _calls); 1.3423 + gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", 1.3424 + _elapsed_time_ms, _termination_time_ms); 1.3425 + gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 1.3426 + _step_times_ms.num(), _step_times_ms.avg(), 1.3427 + _step_times_ms.sd()); 1.3428 + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 1.3429 + _step_times_ms.maximum(), _step_times_ms.sum()); 1.3430 + 1.3431 +#if _MARKING_STATS_ 1.3432 + gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", 1.3433 + _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), 1.3434 + _all_clock_intervals_ms.sd()); 1.3435 + gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", 1.3436 + _all_clock_intervals_ms.maximum(), 1.3437 + _all_clock_intervals_ms.sum()); 1.3438 + gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", 1.3439 + _clock_due_to_scanning, _clock_due_to_marking); 1.3440 + gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", 1.3441 + _objs_scanned, _objs_found_on_bitmap); 1.3442 + gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", 1.3443 + _local_pushes, _local_pops, _local_max_size); 1.3444 + gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", 1.3445 + _global_pushes, _global_pops, _global_max_size); 1.3446 + gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", 1.3447 + _global_transfers_to,_global_transfers_from); 1.3448 + gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", 1.3449 + _regions_claimed, _region_stack_pops); 1.3450 + gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); 1.3451 + gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", 1.3452 + _steal_attempts, _steals); 1.3453 + gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); 1.3454 + gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", 1.3455 + _aborted_overflow, _aborted_cm_aborted, _aborted_yield); 1.3456 + gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", 1.3457 + _aborted_timed_out, _aborted_satb, _aborted_termination); 1.3458 +#endif // _MARKING_STATS_ 1.3459 +} 1.3460 + 1.3461 +/***************************************************************************** 1.3462 + 1.3463 + The do_marking_step(time_target_ms) method is the building block 1.3464 + of the parallel marking framework. It can be called in parallel 1.3465 + with other invocations of do_marking_step() on different tasks 1.3466 + (but only one per task, obviously) and concurrently with the 1.3467 + mutator threads, or during remark, hence it eliminates the need 1.3468 + for two versions of the code. When called during remark, it will 1.3469 + pick up from where the task left off during the concurrent marking 1.3470 + phase. Interestingly, tasks are also claimable during evacuation 1.3471 + pauses too, since do_marking_step() ensures that it aborts before 1.3472 + it needs to yield. 1.3473 + 1.3474 + The data structures that is uses to do marking work are the 1.3475 + following: 1.3476 + 1.3477 + (1) Marking Bitmap. If there are gray objects that appear only 1.3478 + on the bitmap (this happens either when dealing with an overflow 1.3479 + or when the initial marking phase has simply marked the roots 1.3480 + and didn't push them on the stack), then tasks claim heap 1.3481 + regions whose bitmap they then scan to find gray objects. A 1.3482 + global finger indicates where the end of the last claimed region 1.3483 + is. A local finger indicates how far into the region a task has 1.3484 + scanned. The two fingers are used to determine how to gray an 1.3485 + object (i.e. whether simply marking it is OK, as it will be 1.3486 + visited by a task in the future, or whether it needs to be also 1.3487 + pushed on a stack). 1.3488 + 1.3489 + (2) Local Queue. The local queue of the task which is accessed 1.3490 + reasonably efficiently by the task. Other tasks can steal from 1.3491 + it when they run out of work. Throughout the marking phase, a 1.3492 + task attempts to keep its local queue short but not totally 1.3493 + empty, so that entries are available for stealing by other 1.3494 + tasks. Only when there is no more work, a task will totally 1.3495 + drain its local queue. 1.3496 + 1.3497 + (3) Global Mark Stack. This handles local queue overflow. During 1.3498 + marking only sets of entries are moved between it and the local 1.3499 + queues, as access to it requires a mutex and more fine-grain 1.3500 + interaction with it which might cause contention. If it 1.3501 + overflows, then the marking phase should restart and iterate 1.3502 + over the bitmap to identify gray objects. Throughout the marking 1.3503 + phase, tasks attempt to keep the global mark stack at a small 1.3504 + length but not totally empty, so that entries are available for 1.3505 + popping by other tasks. Only when there is no more work, tasks 1.3506 + will totally drain the global mark stack. 1.3507 + 1.3508 + (4) Global Region Stack. Entries on it correspond to areas of 1.3509 + the bitmap that need to be scanned since they contain gray 1.3510 + objects. Pushes on the region stack only happen during 1.3511 + evacuation pauses and typically correspond to areas covered by 1.3512 + GC LABS. If it overflows, then the marking phase should restart 1.3513 + and iterate over the bitmap to identify gray objects. Tasks will 1.3514 + try to totally drain the region stack as soon as possible. 1.3515 + 1.3516 + (5) SATB Buffer Queue. This is where completed SATB buffers are 1.3517 + made available. Buffers are regularly removed from this queue 1.3518 + and scanned for roots, so that the queue doesn't get too 1.3519 + long. During remark, all completed buffers are processed, as 1.3520 + well as the filled in parts of any uncompleted buffers. 1.3521 + 1.3522 + The do_marking_step() method tries to abort when the time target 1.3523 + has been reached. There are a few other cases when the 1.3524 + do_marking_step() method also aborts: 1.3525 + 1.3526 + (1) When the marking phase has been aborted (after a Full GC). 1.3527 + 1.3528 + (2) When a global overflow (either on the global stack or the 1.3529 + region stack) has been triggered. Before the task aborts, it 1.3530 + will actually sync up with the other tasks to ensure that all 1.3531 + the marking data structures (local queues, stacks, fingers etc.) 1.3532 + are re-initialised so that when do_marking_step() completes, 1.3533 + the marking phase can immediately restart. 1.3534 + 1.3535 + (3) When enough completed SATB buffers are available. The 1.3536 + do_marking_step() method only tries to drain SATB buffers right 1.3537 + at the beginning. So, if enough buffers are available, the 1.3538 + marking step aborts and the SATB buffers are processed at 1.3539 + the beginning of the next invocation. 1.3540 + 1.3541 + (4) To yield. when we have to yield then we abort and yield 1.3542 + right at the end of do_marking_step(). This saves us from a lot 1.3543 + of hassle as, by yielding we might allow a Full GC. If this 1.3544 + happens then objects will be compacted underneath our feet, the 1.3545 + heap might shrink, etc. We save checking for this by just 1.3546 + aborting and doing the yield right at the end. 1.3547 + 1.3548 + From the above it follows that the do_marking_step() method should 1.3549 + be called in a loop (or, otherwise, regularly) until it completes. 1.3550 + 1.3551 + If a marking step completes without its has_aborted() flag being 1.3552 + true, it means it has completed the current marking phase (and 1.3553 + also all other marking tasks have done so and have all synced up). 1.3554 + 1.3555 + A method called regular_clock_call() is invoked "regularly" (in 1.3556 + sub ms intervals) throughout marking. It is this clock method that 1.3557 + checks all the abort conditions which were mentioned above and 1.3558 + decides when the task should abort. A work-based scheme is used to 1.3559 + trigger this clock method: when the number of object words the 1.3560 + marking phase has scanned or the number of references the marking 1.3561 + phase has visited reach a given limit. Additional invocations to 1.3562 + the method clock have been planted in a few other strategic places 1.3563 + too. The initial reason for the clock method was to avoid calling 1.3564 + vtime too regularly, as it is quite expensive. So, once it was in 1.3565 + place, it was natural to piggy-back all the other conditions on it 1.3566 + too and not constantly check them throughout the code. 1.3567 + 1.3568 + *****************************************************************************/ 1.3569 + 1.3570 +void CMTask::do_marking_step(double time_target_ms) { 1.3571 + guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" ); 1.3572 + guarantee( concurrent() == _cm->concurrent(), "they should be the same" ); 1.3573 + 1.3574 + guarantee( concurrent() || _cm->region_stack_empty(), 1.3575 + "the region stack should have been cleared before remark" ); 1.3576 + guarantee( _region_finger == NULL, 1.3577 + "this should be non-null only when a region is being scanned" ); 1.3578 + 1.3579 + G1CollectorPolicy* g1_policy = _g1h->g1_policy(); 1.3580 + guarantee( _task_queues != NULL, "invariant" ); 1.3581 + guarantee( _task_queue != NULL, "invariant" ); 1.3582 + guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" ); 1.3583 + 1.3584 + guarantee( !_claimed, 1.3585 + "only one thread should claim this task at any one time" ); 1.3586 + 1.3587 + // OK, this doesn't safeguard again all possible scenarios, as it is 1.3588 + // possible for two threads to set the _claimed flag at the same 1.3589 + // time. But it is only for debugging purposes anyway and it will 1.3590 + // catch most problems. 1.3591 + _claimed = true; 1.3592 + 1.3593 + _start_time_ms = os::elapsedVTime() * 1000.0; 1.3594 + statsOnly( _interval_start_time_ms = _start_time_ms ); 1.3595 + 1.3596 + double diff_prediction_ms = 1.3597 + g1_policy->get_new_prediction(&_marking_step_diffs_ms); 1.3598 + _time_target_ms = time_target_ms - diff_prediction_ms; 1.3599 + 1.3600 + // set up the variables that are used in the work-based scheme to 1.3601 + // call the regular clock method 1.3602 + _words_scanned = 0; 1.3603 + _refs_reached = 0; 1.3604 + recalculate_limits(); 1.3605 + 1.3606 + // clear all flags 1.3607 + clear_has_aborted(); 1.3608 + _has_aborted_timed_out = false; 1.3609 + _draining_satb_buffers = false; 1.3610 + 1.3611 + ++_calls; 1.3612 + 1.3613 + if (_cm->verbose_low()) 1.3614 + gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " 1.3615 + "target = %1.2lfms >>>>>>>>>>", 1.3616 + _task_id, _calls, _time_target_ms); 1.3617 + 1.3618 + // Set up the bitmap and oop closures. Anything that uses them is 1.3619 + // eventually called from this method, so it is OK to allocate these 1.3620 + // statically. 1.3621 + CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); 1.3622 + CMOopClosure oop_closure(_g1h, _cm, this); 1.3623 + set_oop_closure(&oop_closure); 1.3624 + 1.3625 + if (_cm->has_overflown()) { 1.3626 + // This can happen if the region stack or the mark stack overflows 1.3627 + // during a GC pause and this task, after a yield point, 1.3628 + // restarts. We have to abort as we need to get into the overflow 1.3629 + // protocol which happens right at the end of this task. 1.3630 + set_has_aborted(); 1.3631 + } 1.3632 + 1.3633 + // First drain any available SATB buffers. After this, we will not 1.3634 + // look at SATB buffers before the next invocation of this method. 1.3635 + // If enough completed SATB buffers are queued up, the regular clock 1.3636 + // will abort this task so that it restarts. 1.3637 + drain_satb_buffers(); 1.3638 + // ...then partially drain the local queue and the global stack 1.3639 + drain_local_queue(true); 1.3640 + drain_global_stack(true); 1.3641 + 1.3642 + // Then totally drain the region stack. We will not look at 1.3643 + // it again before the next invocation of this method. Entries on 1.3644 + // the region stack are only added during evacuation pauses, for 1.3645 + // which we have to yield. When we do, we abort the task anyway so 1.3646 + // it will look at the region stack again when it restarts. 1.3647 + bitmap_closure.set_scanning_heap_region(false); 1.3648 + drain_region_stack(&bitmap_closure); 1.3649 + // ...then partially drain the local queue and the global stack 1.3650 + drain_local_queue(true); 1.3651 + drain_global_stack(true); 1.3652 + 1.3653 + do { 1.3654 + if (!has_aborted() && _curr_region != NULL) { 1.3655 + // This means that we're already holding on to a region. 1.3656 + tmp_guarantee_CM( _finger != NULL, 1.3657 + "if region is not NULL, then the finger " 1.3658 + "should not be NULL either" ); 1.3659 + 1.3660 + // We might have restarted this task after an evacuation pause 1.3661 + // which might have evacuated the region we're holding on to 1.3662 + // underneath our feet. Let's read its limit again to make sure 1.3663 + // that we do not iterate over a region of the heap that 1.3664 + // contains garbage (update_region_limit() will also move 1.3665 + // _finger to the start of the region if it is found empty). 1.3666 + update_region_limit(); 1.3667 + // We will start from _finger not from the start of the region, 1.3668 + // as we might be restarting this task after aborting half-way 1.3669 + // through scanning this region. In this case, _finger points to 1.3670 + // the address where we last found a marked object. If this is a 1.3671 + // fresh region, _finger points to start(). 1.3672 + MemRegion mr = MemRegion(_finger, _region_limit); 1.3673 + 1.3674 + if (_cm->verbose_low()) 1.3675 + gclog_or_tty->print_cr("[%d] we're scanning part " 1.3676 + "["PTR_FORMAT", "PTR_FORMAT") " 1.3677 + "of region "PTR_FORMAT, 1.3678 + _task_id, _finger, _region_limit, _curr_region); 1.3679 + 1.3680 + // Let's iterate over the bitmap of the part of the 1.3681 + // region that is left. 1.3682 + bitmap_closure.set_scanning_heap_region(true); 1.3683 + if (mr.is_empty() || 1.3684 + _nextMarkBitMap->iterate(&bitmap_closure, mr)) { 1.3685 + // We successfully completed iterating over the region. Now, 1.3686 + // let's give up the region. 1.3687 + giveup_current_region(); 1.3688 + regular_clock_call(); 1.3689 + } else { 1.3690 + guarantee( has_aborted(), "currently the only way to do so" ); 1.3691 + // The only way to abort the bitmap iteration is to return 1.3692 + // false from the do_bit() method. However, inside the 1.3693 + // do_bit() method we move the _finger to point to the 1.3694 + // object currently being looked at. So, if we bail out, we 1.3695 + // have definitely set _finger to something non-null. 1.3696 + guarantee( _finger != NULL, "invariant" ); 1.3697 + 1.3698 + // Region iteration was actually aborted. So now _finger 1.3699 + // points to the address of the object we last scanned. If we 1.3700 + // leave it there, when we restart this task, we will rescan 1.3701 + // the object. It is easy to avoid this. We move the finger by 1.3702 + // enough to point to the next possible object header (the 1.3703 + // bitmap knows by how much we need to move it as it knows its 1.3704 + // granularity). 1.3705 + move_finger_to(_nextMarkBitMap->nextWord(_finger)); 1.3706 + } 1.3707 + } 1.3708 + // At this point we have either completed iterating over the 1.3709 + // region we were holding on to, or we have aborted. 1.3710 + 1.3711 + // We then partially drain the local queue and the global stack. 1.3712 + // (Do we really need this?) 1.3713 + drain_local_queue(true); 1.3714 + drain_global_stack(true); 1.3715 + 1.3716 + // Read the note on the claim_region() method on why it might 1.3717 + // return NULL with potentially more regions available for 1.3718 + // claiming and why we have to check out_of_regions() to determine 1.3719 + // whether we're done or not. 1.3720 + while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { 1.3721 + // We are going to try to claim a new region. We should have 1.3722 + // given up on the previous one. 1.3723 + tmp_guarantee_CM( _curr_region == NULL && 1.3724 + _finger == NULL && 1.3725 + _region_limit == NULL, "invariant" ); 1.3726 + if (_cm->verbose_low()) 1.3727 + gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); 1.3728 + HeapRegion* claimed_region = _cm->claim_region(_task_id); 1.3729 + if (claimed_region != NULL) { 1.3730 + // Yes, we managed to claim one 1.3731 + statsOnly( ++_regions_claimed ); 1.3732 + 1.3733 + if (_cm->verbose_low()) 1.3734 + gclog_or_tty->print_cr("[%d] we successfully claimed " 1.3735 + "region "PTR_FORMAT, 1.3736 + _task_id, claimed_region); 1.3737 + 1.3738 + setup_for_region(claimed_region); 1.3739 + tmp_guarantee_CM( _curr_region == claimed_region, "invariant" ); 1.3740 + } 1.3741 + // It is important to call the regular clock here. It might take 1.3742 + // a while to claim a region if, for example, we hit a large 1.3743 + // block of empty regions. So we need to call the regular clock 1.3744 + // method once round the loop to make sure it's called 1.3745 + // frequently enough. 1.3746 + regular_clock_call(); 1.3747 + } 1.3748 + 1.3749 + if (!has_aborted() && _curr_region == NULL) { 1.3750 + tmp_guarantee_CM( _cm->out_of_regions(), 1.3751 + "at this point we should be out of regions" ); 1.3752 + } 1.3753 + } while ( _curr_region != NULL && !has_aborted()); 1.3754 + 1.3755 + if (!has_aborted()) { 1.3756 + // We cannot check whether the global stack is empty, since other 1.3757 + // tasks might be pushing objects to it concurrently. 1.3758 + tmp_guarantee_CM( _cm->out_of_regions() && _cm->region_stack_empty(), 1.3759 + "at this point we should be out of regions" ); 1.3760 + 1.3761 + if (_cm->verbose_low()) 1.3762 + gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); 1.3763 + 1.3764 + // Try to reduce the number of available SATB buffers so that 1.3765 + // remark has less work to do. 1.3766 + drain_satb_buffers(); 1.3767 + } 1.3768 + 1.3769 + // Since we've done everything else, we can now totally drain the 1.3770 + // local queue and global stack. 1.3771 + drain_local_queue(false); 1.3772 + drain_global_stack(false); 1.3773 + 1.3774 + // Attempt at work stealing from other task's queues. 1.3775 + if (!has_aborted()) { 1.3776 + // We have not aborted. This means that we have finished all that 1.3777 + // we could. Let's try to do some stealing... 1.3778 + 1.3779 + // We cannot check whether the global stack is empty, since other 1.3780 + // tasks might be pushing objects to it concurrently. 1.3781 + guarantee( _cm->out_of_regions() && 1.3782 + _cm->region_stack_empty() && 1.3783 + _task_queue->size() == 0, "only way to reach here" ); 1.3784 + 1.3785 + if (_cm->verbose_low()) 1.3786 + gclog_or_tty->print_cr("[%d] starting to steal", _task_id); 1.3787 + 1.3788 + while (!has_aborted()) { 1.3789 + oop obj; 1.3790 + statsOnly( ++_steal_attempts ); 1.3791 + 1.3792 + if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { 1.3793 + if (_cm->verbose_medium()) 1.3794 + gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", 1.3795 + _task_id, (void*) obj); 1.3796 + 1.3797 + statsOnly( ++_steals ); 1.3798 + 1.3799 + tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), 1.3800 + "any stolen object should be marked" ); 1.3801 + scan_object(obj); 1.3802 + 1.3803 + // And since we're towards the end, let's totally drain the 1.3804 + // local queue and global stack. 1.3805 + drain_local_queue(false); 1.3806 + drain_global_stack(false); 1.3807 + } else { 1.3808 + break; 1.3809 + } 1.3810 + } 1.3811 + } 1.3812 + 1.3813 + // We still haven't aborted. Now, let's try to get into the 1.3814 + // termination protocol. 1.3815 + if (!has_aborted()) { 1.3816 + // We cannot check whether the global stack is empty, since other 1.3817 + // tasks might be concurrently pushing objects on it. 1.3818 + guarantee( _cm->out_of_regions() && 1.3819 + _cm->region_stack_empty() && 1.3820 + _task_queue->size() == 0, "only way to reach here" ); 1.3821 + 1.3822 + if (_cm->verbose_low()) 1.3823 + gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); 1.3824 + 1.3825 + _termination_start_time_ms = os::elapsedVTime() * 1000.0; 1.3826 + // The CMTask class also extends the TerminatorTerminator class, 1.3827 + // hence its should_exit_termination() method will also decide 1.3828 + // whether to exit the termination protocol or not. 1.3829 + bool finished = _cm->terminator()->offer_termination(this); 1.3830 + double termination_end_time_ms = os::elapsedVTime() * 1000.0; 1.3831 + _termination_time_ms += 1.3832 + termination_end_time_ms - _termination_start_time_ms; 1.3833 + 1.3834 + if (finished) { 1.3835 + // We're all done. 1.3836 + 1.3837 + if (_task_id == 0) { 1.3838 + // let's allow task 0 to do this 1.3839 + if (concurrent()) { 1.3840 + guarantee( _cm->concurrent_marking_in_progress(), "invariant" ); 1.3841 + // we need to set this to false before the next 1.3842 + // safepoint. This way we ensure that the marking phase 1.3843 + // doesn't observe any more heap expansions. 1.3844 + _cm->clear_concurrent_marking_in_progress(); 1.3845 + } 1.3846 + } 1.3847 + 1.3848 + // We can now guarantee that the global stack is empty, since 1.3849 + // all other tasks have finished. 1.3850 + guarantee( _cm->out_of_regions() && 1.3851 + _cm->region_stack_empty() && 1.3852 + _cm->mark_stack_empty() && 1.3853 + _task_queue->size() == 0 && 1.3854 + !_cm->has_overflown() && 1.3855 + !_cm->mark_stack_overflow() && 1.3856 + !_cm->region_stack_overflow(), 1.3857 + "only way to reach here" ); 1.3858 + 1.3859 + if (_cm->verbose_low()) 1.3860 + gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); 1.3861 + } else { 1.3862 + // Apparently there's more work to do. Let's abort this task. It 1.3863 + // will restart it and we can hopefully find more things to do. 1.3864 + 1.3865 + if (_cm->verbose_low()) 1.3866 + gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); 1.3867 + 1.3868 + set_has_aborted(); 1.3869 + statsOnly( ++_aborted_termination ); 1.3870 + } 1.3871 + } 1.3872 + 1.3873 + // Mainly for debugging purposes to make sure that a pointer to the 1.3874 + // closure which was statically allocated in this frame doesn't 1.3875 + // escape it by accident. 1.3876 + set_oop_closure(NULL); 1.3877 + double end_time_ms = os::elapsedVTime() * 1000.0; 1.3878 + double elapsed_time_ms = end_time_ms - _start_time_ms; 1.3879 + // Update the step history. 1.3880 + _step_times_ms.add(elapsed_time_ms); 1.3881 + 1.3882 + if (has_aborted()) { 1.3883 + // The task was aborted for some reason. 1.3884 + 1.3885 + statsOnly( ++_aborted ); 1.3886 + 1.3887 + if (_has_aborted_timed_out) { 1.3888 + double diff_ms = elapsed_time_ms - _time_target_ms; 1.3889 + // Keep statistics of how well we did with respect to hitting 1.3890 + // our target only if we actually timed out (if we aborted for 1.3891 + // other reasons, then the results might get skewed). 1.3892 + _marking_step_diffs_ms.add(diff_ms); 1.3893 + } 1.3894 + 1.3895 + if (_cm->has_overflown()) { 1.3896 + // This is the interesting one. We aborted because a global 1.3897 + // overflow was raised. This means we have to restart the 1.3898 + // marking phase and start iterating over regions. However, in 1.3899 + // order to do this we have to make sure that all tasks stop 1.3900 + // what they are doing and re-initialise in a safe manner. We 1.3901 + // will achieve this with the use of two barrier sync points. 1.3902 + 1.3903 + if (_cm->verbose_low()) 1.3904 + gclog_or_tty->print_cr("[%d] detected overflow", _task_id); 1.3905 + 1.3906 + _cm->enter_first_sync_barrier(_task_id); 1.3907 + // When we exit this sync barrier we know that all tasks have 1.3908 + // stopped doing marking work. So, it's now safe to 1.3909 + // re-initialise our data structures. At the end of this method, 1.3910 + // task 0 will clear the global data structures. 1.3911 + 1.3912 + statsOnly( ++_aborted_overflow ); 1.3913 + 1.3914 + // We clear the local state of this task... 1.3915 + clear_region_fields(); 1.3916 + 1.3917 + // ...and enter the second barrier. 1.3918 + _cm->enter_second_sync_barrier(_task_id); 1.3919 + // At this point everything has bee re-initialised and we're 1.3920 + // ready to restart. 1.3921 + } 1.3922 + 1.3923 + if (_cm->verbose_low()) { 1.3924 + gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " 1.3925 + "elapsed = %1.2lfms <<<<<<<<<<", 1.3926 + _task_id, _time_target_ms, elapsed_time_ms); 1.3927 + if (_cm->has_aborted()) 1.3928 + gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", 1.3929 + _task_id); 1.3930 + } 1.3931 + } else { 1.3932 + if (_cm->verbose_low()) 1.3933 + gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " 1.3934 + "elapsed = %1.2lfms <<<<<<<<<<", 1.3935 + _task_id, _time_target_ms, elapsed_time_ms); 1.3936 + } 1.3937 + 1.3938 + _claimed = false; 1.3939 +} 1.3940 + 1.3941 +CMTask::CMTask(int task_id, 1.3942 + ConcurrentMark* cm, 1.3943 + CMTaskQueue* task_queue, 1.3944 + CMTaskQueueSet* task_queues) 1.3945 + : _g1h(G1CollectedHeap::heap()), 1.3946 + _co_tracker(G1CMGroup), 1.3947 + _task_id(task_id), _cm(cm), 1.3948 + _claimed(false), 1.3949 + _nextMarkBitMap(NULL), _hash_seed(17), 1.3950 + _task_queue(task_queue), 1.3951 + _task_queues(task_queues), 1.3952 + _oop_closure(NULL) { 1.3953 + guarantee( task_queue != NULL, "invariant" ); 1.3954 + guarantee( task_queues != NULL, "invariant" ); 1.3955 + 1.3956 + statsOnly( _clock_due_to_scanning = 0; 1.3957 + _clock_due_to_marking = 0 ); 1.3958 + 1.3959 + _marking_step_diffs_ms.add(0.5); 1.3960 +}