ysr@777: /* ysr@777: * Copyright 2001-2007 Sun Microsystems, Inc. All Rights Reserved. ysr@777: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ysr@777: * ysr@777: * This code is free software; you can redistribute it and/or modify it ysr@777: * under the terms of the GNU General Public License version 2 only, as ysr@777: * published by the Free Software Foundation. ysr@777: * ysr@777: * This code is distributed in the hope that it will be useful, but WITHOUT ysr@777: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ysr@777: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ysr@777: * version 2 for more details (a copy is included in the LICENSE file that ysr@777: * accompanied this code). ysr@777: * ysr@777: * You should have received a copy of the GNU General Public License version ysr@777: * 2 along with this work; if not, write to the Free Software Foundation, ysr@777: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ysr@777: * ysr@777: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, ysr@777: * CA 95054 USA or visit www.sun.com if you need additional information or ysr@777: * have any questions. ysr@777: * ysr@777: */ ysr@777: ysr@777: #include "incls/_precompiled.incl" ysr@777: #include "incls/_concurrentMark.cpp.incl" ysr@777: ysr@777: // ysr@777: // CMS Bit Map Wrapper ysr@777: ysr@777: CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter): ysr@777: _bm((uintptr_t*)NULL,0), ysr@777: _shifter(shifter) { ysr@777: _bmStartWord = (HeapWord*)(rs.base()); ysr@777: _bmWordSize = rs.size()/HeapWordSize; // rs.size() is in bytes ysr@777: ReservedSpace brs(ReservedSpace::allocation_align_size_up( ysr@777: (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1)); ysr@777: ysr@777: guarantee(brs.is_reserved(), "couldn't allocate CMS bit map"); ysr@777: // For now we'll just commit all of the bit map up fromt. ysr@777: // Later on we'll try to be more parsimonious with swap. ysr@777: guarantee(_virtual_space.initialize(brs, brs.size()), ysr@777: "couldn't reseve backing store for CMS bit map"); ysr@777: assert(_virtual_space.committed_size() == brs.size(), ysr@777: "didn't reserve backing store for all of CMS bit map?"); ysr@777: _bm.set_map((uintptr_t*)_virtual_space.low()); ysr@777: assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >= ysr@777: _bmWordSize, "inconsistency in bit map sizing"); ysr@777: _bm.set_size(_bmWordSize >> _shifter); ysr@777: } ysr@777: ysr@777: HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr, ysr@777: HeapWord* limit) const { ysr@777: // First we must round addr *up* to a possible object boundary. ysr@777: addr = (HeapWord*)align_size_up((intptr_t)addr, ysr@777: HeapWordSize << _shifter); ysr@777: size_t addrOffset = heapWordToOffset(addr); ysr@777: if (limit == NULL) limit = _bmStartWord + _bmWordSize; ysr@777: size_t limitOffset = heapWordToOffset(limit); ysr@777: size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset); ysr@777: HeapWord* nextAddr = offsetToHeapWord(nextOffset); ysr@777: assert(nextAddr >= addr, "get_next_one postcondition"); ysr@777: assert(nextAddr == limit || isMarked(nextAddr), ysr@777: "get_next_one postcondition"); ysr@777: return nextAddr; ysr@777: } ysr@777: ysr@777: HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr, ysr@777: HeapWord* limit) const { ysr@777: size_t addrOffset = heapWordToOffset(addr); ysr@777: if (limit == NULL) limit = _bmStartWord + _bmWordSize; ysr@777: size_t limitOffset = heapWordToOffset(limit); ysr@777: size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset); ysr@777: HeapWord* nextAddr = offsetToHeapWord(nextOffset); ysr@777: assert(nextAddr >= addr, "get_next_one postcondition"); ysr@777: assert(nextAddr == limit || !isMarked(nextAddr), ysr@777: "get_next_one postcondition"); ysr@777: return nextAddr; ysr@777: } ysr@777: ysr@777: int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const { ysr@777: assert((diff & ((1 << _shifter) - 1)) == 0, "argument check"); ysr@777: return (int) (diff >> _shifter); ysr@777: } ysr@777: ysr@777: bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) { ysr@777: HeapWord* left = MAX2(_bmStartWord, mr.start()); ysr@777: HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end()); ysr@777: if (right > left) { ysr@777: // Right-open interval [leftOffset, rightOffset). ysr@777: return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right)); ysr@777: } else { ysr@777: return true; ysr@777: } ysr@777: } ysr@777: ysr@777: void CMBitMapRO::mostly_disjoint_range_union(BitMap* from_bitmap, ysr@777: size_t from_start_index, ysr@777: HeapWord* to_start_word, ysr@777: size_t word_num) { ysr@777: _bm.mostly_disjoint_range_union(from_bitmap, ysr@777: from_start_index, ysr@777: heapWordToOffset(to_start_word), ysr@777: word_num); ysr@777: } ysr@777: ysr@777: #ifndef PRODUCT ysr@777: bool CMBitMapRO::covers(ReservedSpace rs) const { ysr@777: // assert(_bm.map() == _virtual_space.low(), "map inconsistency"); ysr@777: assert(((size_t)_bm.size() * (1 << _shifter)) == _bmWordSize, ysr@777: "size inconsistency"); ysr@777: return _bmStartWord == (HeapWord*)(rs.base()) && ysr@777: _bmWordSize == rs.size()>>LogHeapWordSize; ysr@777: } ysr@777: #endif ysr@777: ysr@777: void CMBitMap::clearAll() { ysr@777: _bm.clear(); ysr@777: return; ysr@777: } ysr@777: ysr@777: void CMBitMap::markRange(MemRegion mr) { ysr@777: mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); ysr@777: assert(!mr.is_empty(), "unexpected empty region"); ysr@777: assert((offsetToHeapWord(heapWordToOffset(mr.end())) == ysr@777: ((HeapWord *) mr.end())), ysr@777: "markRange memory region end is not card aligned"); ysr@777: // convert address range into offset range ysr@777: _bm.at_put_range(heapWordToOffset(mr.start()), ysr@777: heapWordToOffset(mr.end()), true); ysr@777: } ysr@777: ysr@777: void CMBitMap::clearRange(MemRegion mr) { ysr@777: mr.intersection(MemRegion(_bmStartWord, _bmWordSize)); ysr@777: assert(!mr.is_empty(), "unexpected empty region"); ysr@777: // convert address range into offset range ysr@777: _bm.at_put_range(heapWordToOffset(mr.start()), ysr@777: heapWordToOffset(mr.end()), false); ysr@777: } ysr@777: ysr@777: MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr, ysr@777: HeapWord* end_addr) { ysr@777: HeapWord* start = getNextMarkedWordAddress(addr); ysr@777: start = MIN2(start, end_addr); ysr@777: HeapWord* end = getNextUnmarkedWordAddress(start); ysr@777: end = MIN2(end, end_addr); ysr@777: assert(start <= end, "Consistency check"); ysr@777: MemRegion mr(start, end); ysr@777: if (!mr.is_empty()) { ysr@777: clearRange(mr); ysr@777: } ysr@777: return mr; ysr@777: } ysr@777: ysr@777: CMMarkStack::CMMarkStack(ConcurrentMark* cm) : ysr@777: _base(NULL), _cm(cm) ysr@777: #ifdef ASSERT ysr@777: , _drain_in_progress(false) ysr@777: , _drain_in_progress_yields(false) ysr@777: #endif ysr@777: {} ysr@777: ysr@777: void CMMarkStack::allocate(size_t size) { ysr@777: _base = NEW_C_HEAP_ARRAY(oop, size); ysr@777: if (_base == NULL) ysr@777: vm_exit_during_initialization("Failed to allocate " ysr@777: "CM region mark stack"); ysr@777: _index = 0; ysr@777: // QQQQ cast ... ysr@777: _capacity = (jint) size; ysr@777: _oops_do_bound = -1; ysr@777: NOT_PRODUCT(_max_depth = 0); ysr@777: } ysr@777: ysr@777: CMMarkStack::~CMMarkStack() { ysr@777: if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); ysr@777: } ysr@777: ysr@777: void CMMarkStack::par_push(oop ptr) { ysr@777: while (true) { ysr@777: if (isFull()) { ysr@777: _overflow = true; ysr@777: return; ysr@777: } ysr@777: // Otherwise... ysr@777: jint index = _index; ysr@777: jint next_index = index+1; ysr@777: jint res = Atomic::cmpxchg(next_index, &_index, index); ysr@777: if (res == index) { ysr@777: _base[index] = ptr; ysr@777: // Note that we don't maintain this atomically. We could, but it ysr@777: // doesn't seem necessary. ysr@777: NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); ysr@777: return; ysr@777: } ysr@777: // Otherwise, we need to try again. ysr@777: } ysr@777: } ysr@777: ysr@777: void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) { ysr@777: while (true) { ysr@777: if (isFull()) { ysr@777: _overflow = true; ysr@777: return; ysr@777: } ysr@777: // Otherwise... ysr@777: jint index = _index; ysr@777: jint next_index = index + n; ysr@777: if (next_index > _capacity) { ysr@777: _overflow = true; ysr@777: return; ysr@777: } ysr@777: jint res = Atomic::cmpxchg(next_index, &_index, index); ysr@777: if (res == index) { ysr@777: for (int i = 0; i < n; i++) { ysr@777: int ind = index + i; ysr@777: assert(ind < _capacity, "By overflow test above."); ysr@777: _base[ind] = ptr_arr[i]; ysr@777: } ysr@777: NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index)); ysr@777: return; ysr@777: } ysr@777: // Otherwise, we need to try again. ysr@777: } ysr@777: } ysr@777: ysr@777: ysr@777: void CMMarkStack::par_push_arr(oop* ptr_arr, int n) { ysr@777: MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); ysr@777: jint start = _index; ysr@777: jint next_index = start + n; ysr@777: if (next_index > _capacity) { ysr@777: _overflow = true; ysr@777: return; ysr@777: } ysr@777: // Otherwise. ysr@777: _index = next_index; ysr@777: for (int i = 0; i < n; i++) { ysr@777: int ind = start + i; ysr@777: guarantee(ind < _capacity, "By overflow test above."); ysr@777: _base[ind] = ptr_arr[i]; ysr@777: } ysr@777: } ysr@777: ysr@777: ysr@777: bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) { ysr@777: MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); ysr@777: jint index = _index; ysr@777: if (index == 0) { ysr@777: *n = 0; ysr@777: return false; ysr@777: } else { ysr@777: int k = MIN2(max, index); ysr@777: jint new_ind = index - k; ysr@777: for (int j = 0; j < k; j++) { ysr@777: ptr_arr[j] = _base[new_ind + j]; ysr@777: } ysr@777: _index = new_ind; ysr@777: *n = k; ysr@777: return true; ysr@777: } ysr@777: } ysr@777: ysr@777: ysr@777: CMRegionStack::CMRegionStack() : _base(NULL) {} ysr@777: ysr@777: void CMRegionStack::allocate(size_t size) { ysr@777: _base = NEW_C_HEAP_ARRAY(MemRegion, size); ysr@777: if (_base == NULL) ysr@777: vm_exit_during_initialization("Failed to allocate " ysr@777: "CM region mark stack"); ysr@777: _index = 0; ysr@777: // QQQQ cast ... ysr@777: _capacity = (jint) size; ysr@777: } ysr@777: ysr@777: CMRegionStack::~CMRegionStack() { ysr@777: if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base); ysr@777: } ysr@777: ysr@777: void CMRegionStack::push(MemRegion mr) { ysr@777: assert(mr.word_size() > 0, "Precondition"); ysr@777: while (true) { ysr@777: if (isFull()) { ysr@777: _overflow = true; ysr@777: return; ysr@777: } ysr@777: // Otherwise... ysr@777: jint index = _index; ysr@777: jint next_index = index+1; ysr@777: jint res = Atomic::cmpxchg(next_index, &_index, index); ysr@777: if (res == index) { ysr@777: _base[index] = mr; ysr@777: return; ysr@777: } ysr@777: // Otherwise, we need to try again. ysr@777: } ysr@777: } ysr@777: ysr@777: MemRegion CMRegionStack::pop() { ysr@777: while (true) { ysr@777: // Otherwise... ysr@777: jint index = _index; ysr@777: ysr@777: if (index == 0) { ysr@777: return MemRegion(); ysr@777: } ysr@777: jint next_index = index-1; ysr@777: jint res = Atomic::cmpxchg(next_index, &_index, index); ysr@777: if (res == index) { ysr@777: MemRegion mr = _base[next_index]; ysr@777: if (mr.start() != NULL) { ysr@777: tmp_guarantee_CM( mr.end() != NULL, "invariant" ); ysr@777: tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); ysr@777: return mr; ysr@777: } else { ysr@777: // that entry was invalidated... let's skip it ysr@777: tmp_guarantee_CM( mr.end() == NULL, "invariant" ); ysr@777: } ysr@777: } ysr@777: // Otherwise, we need to try again. ysr@777: } ysr@777: } ysr@777: ysr@777: bool CMRegionStack::invalidate_entries_into_cset() { ysr@777: bool result = false; ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: for (int i = 0; i < _oops_do_bound; ++i) { ysr@777: MemRegion mr = _base[i]; ysr@777: if (mr.start() != NULL) { ysr@777: tmp_guarantee_CM( mr.end() != NULL, "invariant"); ysr@777: tmp_guarantee_CM( mr.word_size() > 0, "invariant" ); ysr@777: HeapRegion* hr = g1h->heap_region_containing(mr.start()); ysr@777: tmp_guarantee_CM( hr != NULL, "invariant" ); ysr@777: if (hr->in_collection_set()) { ysr@777: // The region points into the collection set ysr@777: _base[i] = MemRegion(); ysr@777: result = true; ysr@777: } ysr@777: } else { ysr@777: // that entry was invalidated... let's skip it ysr@777: tmp_guarantee_CM( mr.end() == NULL, "invariant" ); ysr@777: } ysr@777: } ysr@777: return result; ysr@777: } ysr@777: ysr@777: template ysr@777: bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) { ysr@777: assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after ysr@777: || SafepointSynchronize::is_at_safepoint(), ysr@777: "Drain recursion must be yield-safe."); ysr@777: bool res = true; ysr@777: debug_only(_drain_in_progress = true); ysr@777: debug_only(_drain_in_progress_yields = yield_after); ysr@777: while (!isEmpty()) { ysr@777: oop newOop = pop(); ysr@777: assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop"); ysr@777: assert(newOop->is_oop(), "Expected an oop"); ysr@777: assert(bm == NULL || bm->isMarked((HeapWord*)newOop), ysr@777: "only grey objects on this stack"); ysr@777: // iterate over the oops in this oop, marking and pushing ysr@777: // the ones in CMS generation. ysr@777: newOop->oop_iterate(cl); ysr@777: if (yield_after && _cm->do_yield_check()) { ysr@777: res = false; break; ysr@777: } ysr@777: } ysr@777: debug_only(_drain_in_progress = false); ysr@777: return res; ysr@777: } ysr@777: ysr@777: void CMMarkStack::oops_do(OopClosure* f) { ysr@777: if (_index == 0) return; ysr@777: assert(_oops_do_bound != -1 && _oops_do_bound <= _index, ysr@777: "Bound must be set."); ysr@777: for (int i = 0; i < _oops_do_bound; i++) { ysr@777: f->do_oop(&_base[i]); ysr@777: } ysr@777: _oops_do_bound = -1; ysr@777: } ysr@777: ysr@777: bool ConcurrentMark::not_yet_marked(oop obj) const { ysr@777: return (_g1h->is_obj_ill(obj) ysr@777: || (_g1h->is_in_permanent(obj) ysr@777: && !nextMarkBitMap()->isMarked((HeapWord*)obj))); ysr@777: } ysr@777: ysr@777: #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away ysr@777: #pragma warning( disable:4355 ) // 'this' : used in base member initializer list ysr@777: #endif // _MSC_VER ysr@777: ysr@777: ConcurrentMark::ConcurrentMark(ReservedSpace rs, ysr@777: int max_regions) : ysr@777: _markBitMap1(rs, MinObjAlignment - 1), ysr@777: _markBitMap2(rs, MinObjAlignment - 1), ysr@777: ysr@777: _parallel_marking_threads(0), ysr@777: _sleep_factor(0.0), ysr@777: _marking_task_overhead(1.0), ysr@777: _cleanup_sleep_factor(0.0), ysr@777: _cleanup_task_overhead(1.0), ysr@777: _region_bm(max_regions, false /* in_resource_area*/), ysr@777: _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >> ysr@777: CardTableModRefBS::card_shift, ysr@777: false /* in_resource_area*/), ysr@777: _prevMarkBitMap(&_markBitMap1), ysr@777: _nextMarkBitMap(&_markBitMap2), ysr@777: _at_least_one_mark_complete(false), ysr@777: ysr@777: _markStack(this), ysr@777: _regionStack(), ysr@777: // _finger set in set_non_marking_state ysr@777: ysr@777: _max_task_num(MAX2(ParallelGCThreads, (size_t)1)), ysr@777: // _active_tasks set in set_non_marking_state ysr@777: // _tasks set inside the constructor ysr@777: _task_queues(new CMTaskQueueSet((int) _max_task_num)), ysr@777: _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)), ysr@777: ysr@777: _has_overflown(false), ysr@777: _concurrent(false), ysr@777: ysr@777: // _verbose_level set below ysr@777: ysr@777: _init_times(), ysr@777: _remark_times(), _remark_mark_times(), _remark_weak_ref_times(), ysr@777: _cleanup_times(), ysr@777: _total_counting_time(0.0), ysr@777: _total_rs_scrub_time(0.0), ysr@777: ysr@777: _parallel_workers(NULL), ysr@777: _cleanup_co_tracker(G1CLGroup) ysr@777: { ysr@777: CMVerboseLevel verbose_level = ysr@777: (CMVerboseLevel) G1MarkingVerboseLevel; ysr@777: if (verbose_level < no_verbose) ysr@777: verbose_level = no_verbose; ysr@777: if (verbose_level > high_verbose) ysr@777: verbose_level = high_verbose; ysr@777: _verbose_level = verbose_level; ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", " ysr@777: "heap end = "PTR_FORMAT, _heap_start, _heap_end); ysr@777: ysr@777: _markStack.allocate(G1CMStackSize); ysr@777: _regionStack.allocate(G1CMRegionStackSize); ysr@777: ysr@777: // Create & start a ConcurrentMark thread. ysr@777: if (G1ConcMark) { ysr@777: _cmThread = new ConcurrentMarkThread(this); ysr@777: assert(cmThread() != NULL, "CM Thread should have been created"); ysr@777: assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm"); ysr@777: } else { ysr@777: _cmThread = NULL; ysr@777: } ysr@777: _g1h = G1CollectedHeap::heap(); ysr@777: assert(CGC_lock != NULL, "Where's the CGC_lock?"); ysr@777: assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency"); ysr@777: assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency"); ysr@777: ysr@777: SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set(); ysr@777: satb_qs.set_buffer_size(G1SATBLogBufferSize); ysr@777: ysr@777: int size = (int) MAX2(ParallelGCThreads, (size_t)1); ysr@777: _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size); ysr@777: for (int i = 0 ; i < size; i++) { ysr@777: _par_cleanup_thread_state[i] = new ParCleanupThreadState; ysr@777: } ysr@777: ysr@777: _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num); ysr@777: _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num); ysr@777: ysr@777: // so that the assertion in MarkingTaskQueue::task_queue doesn't fail ysr@777: _active_tasks = _max_task_num; ysr@777: for (int i = 0; i < (int) _max_task_num; ++i) { ysr@777: CMTaskQueue* task_queue = new CMTaskQueue(); ysr@777: task_queue->initialize(); ysr@777: _task_queues->register_queue(i, task_queue); ysr@777: ysr@777: _tasks[i] = new CMTask(i, this, task_queue, _task_queues); ysr@777: _accum_task_vtime[i] = 0.0; ysr@777: } ysr@777: ysr@777: if (ParallelMarkingThreads > ParallelGCThreads) { ysr@777: vm_exit_during_initialization("Can't have more ParallelMarkingThreads " ysr@777: "than ParallelGCThreads."); ysr@777: } ysr@777: if (ParallelGCThreads == 0) { ysr@777: // if we are not running with any parallel GC threads we will not ysr@777: // spawn any marking threads either ysr@777: _parallel_marking_threads = 0; ysr@777: _sleep_factor = 0.0; ysr@777: _marking_task_overhead = 1.0; ysr@777: } else { ysr@777: if (ParallelMarkingThreads > 0) { ysr@777: // notice that ParallelMarkingThreads overwrites G1MarkingOverheadPerc ysr@777: // if both are set ysr@777: ysr@777: _parallel_marking_threads = ParallelMarkingThreads; ysr@777: _sleep_factor = 0.0; ysr@777: _marking_task_overhead = 1.0; ysr@777: } else if (G1MarkingOverheadPerc > 0) { ysr@777: // we will calculate the number of parallel marking threads ysr@777: // based on a target overhead with respect to the soft real-time ysr@777: // goal ysr@777: ysr@777: double marking_overhead = (double) G1MarkingOverheadPerc / 100.0; ysr@777: double overall_cm_overhead = ysr@777: (double) G1MaxPauseTimeMS * marking_overhead / (double) G1TimeSliceMS; ysr@777: double cpu_ratio = 1.0 / (double) os::processor_count(); ysr@777: double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio); ysr@777: double marking_task_overhead = ysr@777: overall_cm_overhead / marking_thread_num * ysr@777: (double) os::processor_count(); ysr@777: double sleep_factor = ysr@777: (1.0 - marking_task_overhead) / marking_task_overhead; ysr@777: ysr@777: _parallel_marking_threads = (size_t) marking_thread_num; ysr@777: _sleep_factor = sleep_factor; ysr@777: _marking_task_overhead = marking_task_overhead; ysr@777: } else { ysr@777: _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1); ysr@777: _sleep_factor = 0.0; ysr@777: _marking_task_overhead = 1.0; ysr@777: } ysr@777: ysr@777: if (parallel_marking_threads() > 1) ysr@777: _cleanup_task_overhead = 1.0; ysr@777: else ysr@777: _cleanup_task_overhead = marking_task_overhead(); ysr@777: _cleanup_sleep_factor = ysr@777: (1.0 - cleanup_task_overhead()) / cleanup_task_overhead(); ysr@777: ysr@777: #if 0 ysr@777: gclog_or_tty->print_cr("Marking Threads %d", parallel_marking_threads()); ysr@777: gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead()); ysr@777: gclog_or_tty->print_cr("CM Sleep Factor %1.4lf", sleep_factor()); ysr@777: gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead()); ysr@777: gclog_or_tty->print_cr("CL Sleep Factor %1.4lf", cleanup_sleep_factor()); ysr@777: #endif ysr@777: ysr@777: guarantee( parallel_marking_threads() > 0, "peace of mind" ); ysr@777: _parallel_workers = new WorkGang("Parallel Marking Threads", ysr@777: (int) parallel_marking_threads(), false, true); ysr@777: if (_parallel_workers == NULL) ysr@777: vm_exit_during_initialization("Failed necessary allocation."); ysr@777: } ysr@777: ysr@777: // so that the call below can read a sensible value ysr@777: _heap_start = (HeapWord*) rs.base(); ysr@777: set_non_marking_state(); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::update_g1_committed(bool force) { ysr@777: // If concurrent marking is not in progress, then we do not need to ysr@777: // update _heap_end. This has a subtle and important ysr@777: // side-effect. Imagine that two evacuation pauses happen between ysr@777: // marking completion and remark. The first one can grow the ysr@777: // heap (hence now the finger is below the heap end). Then, the ysr@777: // second one could unnecessarily push regions on the region ysr@777: // stack. This causes the invariant that the region stack is empty ysr@777: // at the beginning of remark to be false. By ensuring that we do ysr@777: // not observe heap expansions after marking is complete, then we do ysr@777: // not have this problem. ysr@777: if (!concurrent_marking_in_progress() && !force) ysr@777: return; ysr@777: ysr@777: MemRegion committed = _g1h->g1_committed(); ysr@777: tmp_guarantee_CM( committed.start() == _heap_start, ysr@777: "start shouldn't change" ); ysr@777: HeapWord* new_end = committed.end(); ysr@777: if (new_end > _heap_end) { ysr@777: // The heap has been expanded. ysr@777: ysr@777: _heap_end = new_end; ysr@777: } ysr@777: // Notice that the heap can also shrink. However, this only happens ysr@777: // during a Full GC (at least currently) and the entire marking ysr@777: // phase will bail out and the task will not be restarted. So, let's ysr@777: // do nothing. ysr@777: } ysr@777: ysr@777: void ConcurrentMark::reset() { ysr@777: // Starting values for these two. This should be called in a STW ysr@777: // phase. CM will be notified of any future g1_committed expansions ysr@777: // will be at the end of evacuation pauses, when tasks are ysr@777: // inactive. ysr@777: MemRegion committed = _g1h->g1_committed(); ysr@777: _heap_start = committed.start(); ysr@777: _heap_end = committed.end(); ysr@777: ysr@777: guarantee( _heap_start != NULL && ysr@777: _heap_end != NULL && ysr@777: _heap_start < _heap_end, "heap bounds should look ok" ); ysr@777: ysr@777: // reset all the marking data structures and any necessary flags ysr@777: clear_marking_state(); ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] resetting"); ysr@777: ysr@777: // We do reset all of them, since different phases will use ysr@777: // different number of active threads. So, it's easiest to have all ysr@777: // of them ready. ysr@777: for (int i = 0; i < (int) _max_task_num; ++i) ysr@777: _tasks[i]->reset(_nextMarkBitMap); ysr@777: ysr@777: // we need this to make sure that the flag is on during the evac ysr@777: // pause with initial mark piggy-backed ysr@777: set_concurrent_marking_in_progress(); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) { ysr@777: guarantee( active_tasks <= _max_task_num, "we should not have more" ); ysr@777: ysr@777: _active_tasks = active_tasks; ysr@777: // Need to update the three data structures below according to the ysr@777: // number of active threads for this phase. ysr@777: _terminator = ParallelTaskTerminator((int) active_tasks, _task_queues); ysr@777: _first_overflow_barrier_sync.set_n_workers((int) active_tasks); ysr@777: _second_overflow_barrier_sync.set_n_workers((int) active_tasks); ysr@777: ysr@777: _concurrent = concurrent; ysr@777: // We propagate this to all tasks, not just the active ones. ysr@777: for (int i = 0; i < (int) _max_task_num; ++i) ysr@777: _tasks[i]->set_concurrent(concurrent); ysr@777: ysr@777: if (concurrent) { ysr@777: set_concurrent_marking_in_progress(); ysr@777: } else { ysr@777: // We currently assume that the concurrent flag has been set to ysr@777: // false before we start remark. At this point we should also be ysr@777: // in a STW phase. ysr@777: guarantee( !concurrent_marking_in_progress(), "invariant" ); ysr@777: guarantee( _finger == _heap_end, "only way to get here" ); ysr@777: update_g1_committed(true); ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::set_non_marking_state() { ysr@777: // We set the global marking state to some default values when we're ysr@777: // not doing marking. ysr@777: clear_marking_state(); ysr@777: _active_tasks = 0; ysr@777: clear_concurrent_marking_in_progress(); ysr@777: } ysr@777: ysr@777: ConcurrentMark::~ConcurrentMark() { ysr@777: int size = (int) MAX2(ParallelGCThreads, (size_t)1); ysr@777: for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i]; ysr@777: FREE_C_HEAP_ARRAY(ParCleanupThreadState*, ysr@777: _par_cleanup_thread_state); ysr@777: ysr@777: for (int i = 0; i < (int) _max_task_num; ++i) { ysr@777: delete _task_queues->queue(i); ysr@777: delete _tasks[i]; ysr@777: } ysr@777: delete _task_queues; ysr@777: FREE_C_HEAP_ARRAY(CMTask*, _max_task_num); ysr@777: } ysr@777: ysr@777: // This closure is used to mark refs into the g1 generation ysr@777: // from external roots in the CMS bit map. ysr@777: // Called at the first checkpoint. ysr@777: // ysr@777: ysr@777: #define PRINT_REACHABLE_AT_INITIAL_MARK 0 ysr@777: #if PRINT_REACHABLE_AT_INITIAL_MARK ysr@777: static FILE* reachable_file = NULL; ysr@777: ysr@777: class PrintReachableClosure: public OopsInGenClosure { ysr@777: CMBitMap* _bm; ysr@777: int _level; ysr@777: public: ysr@777: PrintReachableClosure(CMBitMap* bm) : ysr@777: _bm(bm), _level(0) { ysr@777: guarantee(reachable_file != NULL, "pre-condition"); ysr@777: } ysr@777: void do_oop(oop* p) { ysr@777: oop obj = *p; ysr@777: HeapWord* obj_addr = (HeapWord*)obj; ysr@777: if (obj == NULL) return; ysr@777: fprintf(reachable_file, "%d: "PTR_FORMAT" -> "PTR_FORMAT" (%d)\n", ysr@777: _level, p, (void*) obj, _bm->isMarked(obj_addr)); ysr@777: if (!_bm->isMarked(obj_addr)) { ysr@777: _bm->mark(obj_addr); ysr@777: _level++; ysr@777: obj->oop_iterate(this); ysr@777: _level--; ysr@777: } ysr@777: } ysr@777: }; ysr@777: #endif // PRINT_REACHABLE_AT_INITIAL_MARK ysr@777: ysr@777: #define SEND_HEAP_DUMP_TO_FILE 0 ysr@777: #if SEND_HEAP_DUMP_TO_FILE ysr@777: static FILE* heap_dump_file = NULL; ysr@777: #endif // SEND_HEAP_DUMP_TO_FILE ysr@777: ysr@777: void ConcurrentMark::clearNextBitmap() { ysr@777: guarantee(!G1CollectedHeap::heap()->mark_in_progress(), "Precondition."); ysr@777: ysr@777: // clear the mark bitmap (no grey objects to start with). ysr@777: // We need to do this in chunks and offer to yield in between ysr@777: // each chunk. ysr@777: HeapWord* start = _nextMarkBitMap->startWord(); ysr@777: HeapWord* end = _nextMarkBitMap->endWord(); ysr@777: HeapWord* cur = start; ysr@777: size_t chunkSize = M; ysr@777: while (cur < end) { ysr@777: HeapWord* next = cur + chunkSize; ysr@777: if (next > end) ysr@777: next = end; ysr@777: MemRegion mr(cur,next); ysr@777: _nextMarkBitMap->clearRange(mr); ysr@777: cur = next; ysr@777: do_yield_check(); ysr@777: } ysr@777: } ysr@777: ysr@777: class NoteStartOfMarkHRClosure: public HeapRegionClosure { ysr@777: public: ysr@777: bool doHeapRegion(HeapRegion* r) { ysr@777: if (!r->continuesHumongous()) { ysr@777: r->note_start_of_marking(true); ysr@777: } ysr@777: return false; ysr@777: } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::checkpointRootsInitialPre() { ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: G1CollectorPolicy* g1p = g1h->g1_policy(); ysr@777: ysr@777: _has_aborted = false; ysr@777: ysr@777: // Find all the reachable objects... ysr@777: #if PRINT_REACHABLE_AT_INITIAL_MARK ysr@777: guarantee(reachable_file == NULL, "Protocol"); ysr@777: char fn_buf[100]; ysr@777: sprintf(fn_buf, "/tmp/reachable.txt.%d", os::current_process_id()); ysr@777: reachable_file = fopen(fn_buf, "w"); ysr@777: // clear the mark bitmap (no grey objects to start with) ysr@777: _nextMarkBitMap->clearAll(); ysr@777: PrintReachableClosure prcl(_nextMarkBitMap); ysr@777: g1h->process_strong_roots( ysr@777: false, // fake perm gen collection ysr@777: SharedHeap::SO_AllClasses, ysr@777: &prcl, // Regular roots ysr@777: &prcl // Perm Gen Roots ysr@777: ); ysr@777: // The root iteration above "consumed" dirty cards in the perm gen. ysr@777: // Therefore, as a shortcut, we dirty all such cards. ysr@777: g1h->rem_set()->invalidate(g1h->perm_gen()->used_region(), false); ysr@777: fclose(reachable_file); ysr@777: reachable_file = NULL; ysr@777: // clear the mark bitmap again. ysr@777: _nextMarkBitMap->clearAll(); ysr@777: COMPILER2_PRESENT(DerivedPointerTable::update_pointers()); ysr@777: COMPILER2_PRESENT(DerivedPointerTable::clear()); ysr@777: #endif // PRINT_REACHABLE_AT_INITIAL_MARK ysr@777: ysr@777: // Initialise marking structures. This has to be done in a STW phase. ysr@777: reset(); ysr@777: } ysr@777: ysr@777: class CMMarkRootsClosure: public OopsInGenClosure { ysr@777: private: ysr@777: ConcurrentMark* _cm; ysr@777: G1CollectedHeap* _g1h; ysr@777: bool _do_barrier; ysr@777: ysr@777: public: ysr@777: CMMarkRootsClosure(ConcurrentMark* cm, ysr@777: G1CollectedHeap* g1h, ysr@777: bool do_barrier) : _cm(cm), _g1h(g1h), ysr@777: _do_barrier(do_barrier) { } ysr@777: ysr@777: virtual void do_oop(narrowOop* p) { ysr@777: guarantee(false, "NYI"); ysr@777: } ysr@777: ysr@777: virtual void do_oop(oop* p) { ysr@777: oop thisOop = *p; ysr@777: if (thisOop != NULL) { ysr@777: assert(thisOop->is_oop() || thisOop->mark() == NULL, ysr@777: "expected an oop, possibly with mark word displaced"); ysr@777: HeapWord* addr = (HeapWord*)thisOop; ysr@777: if (_g1h->is_in_g1_reserved(addr)) { ysr@777: _cm->grayRoot(thisOop); ysr@777: } ysr@777: } ysr@777: if (_do_barrier) { ysr@777: assert(!_g1h->is_in_g1_reserved(p), ysr@777: "Should be called on external roots"); ysr@777: do_barrier(p); ysr@777: } ysr@777: } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::checkpointRootsInitialPost() { ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: // For each region note start of marking. ysr@777: NoteStartOfMarkHRClosure startcl; ysr@777: g1h->heap_region_iterate(&startcl); ysr@777: ysr@777: // Start weak-reference discovery. ysr@777: ReferenceProcessor* rp = g1h->ref_processor(); ysr@777: rp->verify_no_references_recorded(); ysr@777: rp->enable_discovery(); // enable ("weak") refs discovery ysr@777: ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: satb_mq_set.set_process_completed_threshold(G1SATBProcessCompletedThreshold); ysr@777: satb_mq_set.set_active_all_threads(true); ysr@777: ysr@777: // update_g1_committed() will be called at the end of an evac pause ysr@777: // when marking is on. So, it's also called at the end of the ysr@777: // initial-mark pause to update the heap end, if the heap expands ysr@777: // during it. No need to call it here. ysr@777: ysr@777: guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); ysr@777: ysr@777: size_t max_marking_threads = ysr@777: MAX2((size_t) 1, parallel_marking_threads()); ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: _tasks[i]->enable_co_tracker(); ysr@777: if (i < (int) max_marking_threads) ysr@777: _tasks[i]->reset_co_tracker(marking_task_overhead()); ysr@777: else ysr@777: _tasks[i]->reset_co_tracker(0.0); ysr@777: } ysr@777: } ysr@777: ysr@777: // Checkpoint the roots into this generation from outside ysr@777: // this generation. [Note this initial checkpoint need only ysr@777: // be approximate -- we'll do a catch up phase subsequently.] ysr@777: void ConcurrentMark::checkpointRootsInitial() { ysr@777: assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped"); ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: double start = os::elapsedTime(); ysr@777: GCOverheadReporter::recordSTWStart(start); ysr@777: ysr@777: // If there has not been a GC[n-1] since last GC[n] cycle completed, ysr@777: // precede our marking with a collection of all ysr@777: // younger generations to keep floating garbage to a minimum. ysr@777: // YSR: we won't do this for now -- it's an optimization to be ysr@777: // done post-beta. ysr@777: ysr@777: // YSR: ignoring weak refs for now; will do at bug fixing stage ysr@777: // EVM: assert(discoveredRefsAreClear()); ysr@777: ysr@777: ysr@777: G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); ysr@777: g1p->record_concurrent_mark_init_start(); ysr@777: checkpointRootsInitialPre(); ysr@777: ysr@777: // YSR: when concurrent precleaning is in place, we'll ysr@777: // need to clear the cached card table here ysr@777: ysr@777: ResourceMark rm; ysr@777: HandleMark hm; ysr@777: ysr@777: g1h->ensure_parsability(false); ysr@777: g1h->perm_gen()->save_marks(); ysr@777: ysr@777: CMMarkRootsClosure notOlder(this, g1h, false); ysr@777: CMMarkRootsClosure older(this, g1h, true); ysr@777: ysr@777: g1h->set_marking_started(); ysr@777: g1h->rem_set()->prepare_for_younger_refs_iterate(false); ysr@777: ysr@777: g1h->process_strong_roots(false, // fake perm gen collection ysr@777: SharedHeap::SO_AllClasses, ysr@777: ¬Older, // Regular roots ysr@777: &older // Perm Gen Roots ysr@777: ); ysr@777: checkpointRootsInitialPost(); ysr@777: ysr@777: // Statistics. ysr@777: double end = os::elapsedTime(); ysr@777: _init_times.add((end - start) * 1000.0); ysr@777: GCOverheadReporter::recordSTWEnd(end); ysr@777: ysr@777: g1p->record_concurrent_mark_init_end(); ysr@777: } ysr@777: ysr@777: /* ysr@777: Notice that in the next two methods, we actually leave the STS ysr@777: during the barrier sync and join it immediately afterwards. If we ysr@777: do not do this, this then the following deadlock can occur: one ysr@777: thread could be in the barrier sync code, waiting for the other ysr@777: thread to also sync up, whereas another one could be trying to ysr@777: yield, while also waiting for the other threads to sync up too. ysr@777: ysr@777: Because the thread that does the sync barrier has left the STS, it ysr@777: is possible to be suspended for a Full GC or an evacuation pause ysr@777: could occur. This is actually safe, since the entering the sync ysr@777: barrier is one of the last things do_marking_step() does, and it ysr@777: doesn't manipulate any data structures afterwards. ysr@777: */ ysr@777: ysr@777: void ConcurrentMark::enter_first_sync_barrier(int task_num) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] entering first barrier", task_num); ysr@777: ysr@777: ConcurrentGCThread::stsLeave(); ysr@777: _first_overflow_barrier_sync.enter(); ysr@777: ConcurrentGCThread::stsJoin(); ysr@777: // at this point everyone should have synced up and not be doing any ysr@777: // more work ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] leaving first barrier", task_num); ysr@777: ysr@777: // let task 0 do this ysr@777: if (task_num == 0) { ysr@777: // task 0 is responsible for clearing the global data structures ysr@777: clear_marking_state(); ysr@777: ysr@777: if (PrintGC) { ysr@777: gclog_or_tty->date_stamp(PrintGCDateStamps); ysr@777: gclog_or_tty->stamp(PrintGCTimeStamps); ysr@777: gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]"); ysr@777: } ysr@777: } ysr@777: ysr@777: // after this, each task should reset its own data structures then ysr@777: // then go into the second barrier ysr@777: } ysr@777: ysr@777: void ConcurrentMark::enter_second_sync_barrier(int task_num) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] entering second barrier", task_num); ysr@777: ysr@777: ConcurrentGCThread::stsLeave(); ysr@777: _second_overflow_barrier_sync.enter(); ysr@777: ConcurrentGCThread::stsJoin(); ysr@777: // at this point everything should be re-initialised and ready to go ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] leaving second barrier", task_num); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::grayRoot(oop p) { ysr@777: HeapWord* addr = (HeapWord*) p; ysr@777: // We can't really check against _heap_start and _heap_end, since it ysr@777: // is possible during an evacuation pause with piggy-backed ysr@777: // initial-mark that the committed space is expanded during the ysr@777: // pause without CM observing this change. So the assertions below ysr@777: // is a bit conservative; but better than nothing. ysr@777: tmp_guarantee_CM( _g1h->g1_committed().contains(addr), ysr@777: "address should be within the heap bounds" ); ysr@777: ysr@777: if (!_nextMarkBitMap->isMarked(addr)) ysr@777: _nextMarkBitMap->parMark(addr); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) { ysr@777: // The objects on the region have already been marked "in bulk" by ysr@777: // the caller. We only need to decide whether to push the region on ysr@777: // the region stack or not. ysr@777: ysr@777: if (!concurrent_marking_in_progress() || !_should_gray_objects) ysr@777: // We're done with marking and waiting for remark. We do not need to ysr@777: // push anything else on the region stack. ysr@777: return; ysr@777: ysr@777: HeapWord* finger = _finger; ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] attempting to push " ysr@777: "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at " ysr@777: PTR_FORMAT, mr.start(), mr.end(), finger); ysr@777: ysr@777: if (mr.start() < finger) { ysr@777: // The finger is always heap region aligned and it is not possible ysr@777: // for mr to span heap regions. ysr@777: tmp_guarantee_CM( mr.end() <= finger, "invariant" ); ysr@777: ysr@777: tmp_guarantee_CM( mr.start() <= mr.end() && ysr@777: _heap_start <= mr.start() && ysr@777: mr.end() <= _heap_end, ysr@777: "region boundaries should fall within the committed space" ); ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") " ysr@777: "below the finger, pushing it", ysr@777: mr.start(), mr.end()); ysr@777: ysr@777: if (!region_stack_push(mr)) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] region stack has overflown."); ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) { ysr@777: // The object is not marked by the caller. We need to at least mark ysr@777: // it and maybe push in on the stack. ysr@777: ysr@777: HeapWord* addr = (HeapWord*)p; ysr@777: if (!_nextMarkBitMap->isMarked(addr)) { ysr@777: // We definitely need to mark it, irrespective whether we bail out ysr@777: // because we're done with marking. ysr@777: if (_nextMarkBitMap->parMark(addr)) { ysr@777: if (!concurrent_marking_in_progress() || !_should_gray_objects) ysr@777: // If we're done with concurrent marking and we're waiting for ysr@777: // remark, then we're not pushing anything on the stack. ysr@777: return; ysr@777: ysr@777: // No OrderAccess:store_load() is needed. It is implicit in the ysr@777: // CAS done in parMark(addr) above ysr@777: HeapWord* finger = _finger; ysr@777: ysr@777: if (addr < finger) { ysr@777: if (!mark_stack_push(oop(addr))) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] global stack overflow " ysr@777: "during parMark"); ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: class CMConcurrentMarkingTask: public AbstractGangTask { ysr@777: private: ysr@777: ConcurrentMark* _cm; ysr@777: ConcurrentMarkThread* _cmt; ysr@777: ysr@777: public: ysr@777: void work(int worker_i) { ysr@777: guarantee( Thread::current()->is_ConcurrentGC_thread(), ysr@777: "this should only be done by a conc GC thread" ); ysr@777: ysr@777: double start_vtime = os::elapsedVTime(); ysr@777: ysr@777: ConcurrentGCThread::stsJoin(); ysr@777: ysr@777: guarantee( (size_t)worker_i < _cm->active_tasks(), "invariant" ); ysr@777: CMTask* the_task = _cm->task(worker_i); ysr@777: the_task->start_co_tracker(); ysr@777: the_task->record_start_time(); ysr@777: if (!_cm->has_aborted()) { ysr@777: do { ysr@777: double start_vtime_sec = os::elapsedVTime(); ysr@777: double start_time_sec = os::elapsedTime(); ysr@777: the_task->do_marking_step(10.0); ysr@777: double end_time_sec = os::elapsedTime(); ysr@777: double end_vtime_sec = os::elapsedVTime(); ysr@777: double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec; ysr@777: double elapsed_time_sec = end_time_sec - start_time_sec; ysr@777: _cm->clear_has_overflown(); ysr@777: ysr@777: bool ret = _cm->do_yield_check(worker_i); ysr@777: ysr@777: jlong sleep_time_ms; ysr@777: if (!_cm->has_aborted() && the_task->has_aborted()) { ysr@777: sleep_time_ms = ysr@777: (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0); ysr@777: ConcurrentGCThread::stsLeave(); ysr@777: os::sleep(Thread::current(), sleep_time_ms, false); ysr@777: ConcurrentGCThread::stsJoin(); ysr@777: } ysr@777: double end_time2_sec = os::elapsedTime(); ysr@777: double elapsed_time2_sec = end_time2_sec - start_time_sec; ysr@777: ysr@777: the_task->update_co_tracker(); ysr@777: ysr@777: #if 0 ysr@777: gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, " ysr@777: "overhead %1.4lf", ysr@777: elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, ysr@777: the_task->conc_overhead(os::elapsedTime()) * 8.0); ysr@777: gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms", ysr@777: elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0); ysr@777: #endif ysr@777: } while (!_cm->has_aborted() && the_task->has_aborted()); ysr@777: } ysr@777: the_task->record_end_time(); ysr@777: guarantee( !the_task->has_aborted() || _cm->has_aborted(), "invariant" ); ysr@777: ysr@777: ConcurrentGCThread::stsLeave(); ysr@777: ysr@777: double end_vtime = os::elapsedVTime(); ysr@777: the_task->update_co_tracker(true); ysr@777: _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime); ysr@777: } ysr@777: ysr@777: CMConcurrentMarkingTask(ConcurrentMark* cm, ysr@777: ConcurrentMarkThread* cmt) : ysr@777: AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { } ysr@777: ysr@777: ~CMConcurrentMarkingTask() { } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::markFromRoots() { ysr@777: // we might be tempted to assert that: ysr@777: // assert(asynch == !SafepointSynchronize::is_at_safepoint(), ysr@777: // "inconsistent argument?"); ysr@777: // However that wouldn't be right, because it's possible that ysr@777: // a safepoint is indeed in progress as a younger generation ysr@777: // stop-the-world GC happens even as we mark in this generation. ysr@777: ysr@777: _restart_for_overflow = false; ysr@777: ysr@777: set_phase(MAX2((size_t) 1, parallel_marking_threads()), true); ysr@777: ysr@777: CMConcurrentMarkingTask markingTask(this, cmThread()); ysr@777: if (parallel_marking_threads() > 0) ysr@777: _parallel_workers->run_task(&markingTask); ysr@777: else ysr@777: markingTask.work(0); ysr@777: print_stats(); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) { ysr@777: // world is stopped at this checkpoint ysr@777: assert(SafepointSynchronize::is_at_safepoint(), ysr@777: "world should be stopped"); ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: // If a full collection has happened, we shouldn't do this. ysr@777: if (has_aborted()) { ysr@777: g1h->set_marking_complete(); // So bitmap clearing isn't confused ysr@777: return; ysr@777: } ysr@777: ysr@777: G1CollectorPolicy* g1p = g1h->g1_policy(); ysr@777: g1p->record_concurrent_mark_remark_start(); ysr@777: ysr@777: double start = os::elapsedTime(); ysr@777: GCOverheadReporter::recordSTWStart(start); ysr@777: ysr@777: checkpointRootsFinalWork(); ysr@777: ysr@777: double mark_work_end = os::elapsedTime(); ysr@777: ysr@777: weakRefsWork(clear_all_soft_refs); ysr@777: ysr@777: if (has_overflown()) { ysr@777: // Oops. We overflowed. Restart concurrent marking. ysr@777: _restart_for_overflow = true; ysr@777: // Clear the flag. We do not need it any more. ysr@777: clear_has_overflown(); ysr@777: if (G1TraceMarkStackOverflow) ysr@777: gclog_or_tty->print_cr("\nRemark led to restart for overflow."); ysr@777: } else { ysr@777: // We're done with marking. ysr@777: JavaThread::satb_mark_queue_set().set_active_all_threads(false); ysr@777: } ysr@777: ysr@777: #if VERIFY_OBJS_PROCESSED ysr@777: _scan_obj_cl.objs_processed = 0; ysr@777: ThreadLocalObjQueue::objs_enqueued = 0; ysr@777: #endif ysr@777: ysr@777: // Statistics ysr@777: double now = os::elapsedTime(); ysr@777: _remark_mark_times.add((mark_work_end - start) * 1000.0); ysr@777: _remark_weak_ref_times.add((now - mark_work_end) * 1000.0); ysr@777: _remark_times.add((now - start) * 1000.0); ysr@777: ysr@777: GCOverheadReporter::recordSTWEnd(now); ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) ysr@777: _tasks[i]->disable_co_tracker(); ysr@777: _cleanup_co_tracker.enable(); ysr@777: _cleanup_co_tracker.reset(cleanup_task_overhead()); ysr@777: g1p->record_concurrent_mark_remark_end(); ysr@777: } ysr@777: ysr@777: ysr@777: #define CARD_BM_TEST_MODE 0 ysr@777: ysr@777: class CalcLiveObjectsClosure: public HeapRegionClosure { ysr@777: ysr@777: CMBitMapRO* _bm; ysr@777: ConcurrentMark* _cm; ysr@777: COTracker* _co_tracker; ysr@777: bool _changed; ysr@777: bool _yield; ysr@777: size_t _words_done; ysr@777: size_t _tot_live; ysr@777: size_t _tot_used; ysr@777: size_t _regions_done; ysr@777: double _start_vtime_sec; ysr@777: ysr@777: BitMap* _region_bm; ysr@777: BitMap* _card_bm; ysr@777: intptr_t _bottom_card_num; ysr@777: bool _final; ysr@777: ysr@777: void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) { ysr@777: for (intptr_t i = start_card_num; i <= last_card_num; i++) { ysr@777: #if CARD_BM_TEST_MODE ysr@777: guarantee(_card_bm->at(i - _bottom_card_num), ysr@777: "Should already be set."); ysr@777: #else ysr@777: _card_bm->par_at_put(i - _bottom_card_num, 1); ysr@777: #endif ysr@777: } ysr@777: } ysr@777: ysr@777: public: ysr@777: CalcLiveObjectsClosure(bool final, ysr@777: CMBitMapRO *bm, ConcurrentMark *cm, ysr@777: BitMap* region_bm, BitMap* card_bm, ysr@777: COTracker* co_tracker) : ysr@777: _bm(bm), _cm(cm), _changed(false), _yield(true), ysr@777: _words_done(0), _tot_live(0), _tot_used(0), ysr@777: _region_bm(region_bm), _card_bm(card_bm), ysr@777: _final(final), _co_tracker(co_tracker), ysr@777: _regions_done(0), _start_vtime_sec(0.0) ysr@777: { ysr@777: _bottom_card_num = ysr@777: intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >> ysr@777: CardTableModRefBS::card_shift); ysr@777: } ysr@777: ysr@777: bool doHeapRegion(HeapRegion* hr) { ysr@777: if (_co_tracker != NULL) ysr@777: _co_tracker->update(); ysr@777: ysr@777: if (!_final && _regions_done == 0) ysr@777: _start_vtime_sec = os::elapsedVTime(); ysr@777: ysr@777: if (hr->continuesHumongous()) return false; ysr@777: ysr@777: HeapWord* nextTop = hr->next_top_at_mark_start(); ysr@777: HeapWord* start = hr->top_at_conc_mark_count(); ysr@777: assert(hr->bottom() <= start && start <= hr->end() && ysr@777: hr->bottom() <= nextTop && nextTop <= hr->end() && ysr@777: start <= nextTop, ysr@777: "Preconditions."); ysr@777: // Otherwise, record the number of word's we'll examine. ysr@777: size_t words_done = (nextTop - start); ysr@777: // Find the first marked object at or after "start". ysr@777: start = _bm->getNextMarkedWordAddress(start, nextTop); ysr@777: size_t marked_bytes = 0; ysr@777: ysr@777: // Below, the term "card num" means the result of shifting an address ysr@777: // by the card shift -- address 0 corresponds to card number 0. One ysr@777: // must subtract the card num of the bottom of the heap to obtain a ysr@777: // card table index. ysr@777: // The first card num of the sequence of live cards currently being ysr@777: // constructed. -1 ==> no sequence. ysr@777: intptr_t start_card_num = -1; ysr@777: // The last card num of the sequence of live cards currently being ysr@777: // constructed. -1 ==> no sequence. ysr@777: intptr_t last_card_num = -1; ysr@777: ysr@777: while (start < nextTop) { ysr@777: if (_yield && _cm->do_yield_check()) { ysr@777: // We yielded. It might be for a full collection, in which case ysr@777: // all bets are off; terminate the traversal. ysr@777: if (_cm->has_aborted()) { ysr@777: _changed = false; ysr@777: return true; ysr@777: } else { ysr@777: // Otherwise, it might be a collection pause, and the region ysr@777: // we're looking at might be in the collection set. We'll ysr@777: // abandon this region. ysr@777: return false; ysr@777: } ysr@777: } ysr@777: oop obj = oop(start); ysr@777: int obj_sz = obj->size(); ysr@777: // The card num of the start of the current object. ysr@777: intptr_t obj_card_num = ysr@777: intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift); ysr@777: ysr@777: HeapWord* obj_last = start + obj_sz - 1; ysr@777: intptr_t obj_last_card_num = ysr@777: intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift); ysr@777: ysr@777: if (obj_card_num != last_card_num) { ysr@777: if (start_card_num == -1) { ysr@777: assert(last_card_num == -1, "Both or neither."); ysr@777: start_card_num = obj_card_num; ysr@777: } else { ysr@777: assert(last_card_num != -1, "Both or neither."); ysr@777: assert(obj_card_num >= last_card_num, "Inv"); ysr@777: if ((obj_card_num - last_card_num) > 1) { ysr@777: // Mark the last run, and start a new one. ysr@777: mark_card_num_range(start_card_num, last_card_num); ysr@777: start_card_num = obj_card_num; ysr@777: } ysr@777: } ysr@777: #if CARD_BM_TEST_MODE ysr@777: /* ysr@777: gclog_or_tty->print_cr("Setting bits from %d/%d.", ysr@777: obj_card_num - _bottom_card_num, ysr@777: obj_last_card_num - _bottom_card_num); ysr@777: */ ysr@777: for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) { ysr@777: _card_bm->par_at_put(j - _bottom_card_num, 1); ysr@777: } ysr@777: #endif ysr@777: } ysr@777: // In any case, we set the last card num. ysr@777: last_card_num = obj_last_card_num; ysr@777: ysr@777: marked_bytes += obj_sz * HeapWordSize; ysr@777: // Find the next marked object after this one. ysr@777: start = _bm->getNextMarkedWordAddress(start + 1, nextTop); ysr@777: _changed = true; ysr@777: } ysr@777: // Handle the last range, if any. ysr@777: if (start_card_num != -1) ysr@777: mark_card_num_range(start_card_num, last_card_num); ysr@777: if (_final) { ysr@777: // Mark the allocated-since-marking portion... ysr@777: HeapWord* tp = hr->top(); ysr@777: if (nextTop < tp) { ysr@777: start_card_num = ysr@777: intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift); ysr@777: last_card_num = ysr@777: intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift); ysr@777: mark_card_num_range(start_card_num, last_card_num); ysr@777: // This definitely means the region has live objects. ysr@777: _region_bm->par_at_put(hr->hrs_index(), 1); ysr@777: } ysr@777: } ysr@777: ysr@777: hr->add_to_marked_bytes(marked_bytes); ysr@777: // Update the live region bitmap. ysr@777: if (marked_bytes > 0) { ysr@777: _region_bm->par_at_put(hr->hrs_index(), 1); ysr@777: } ysr@777: hr->set_top_at_conc_mark_count(nextTop); ysr@777: _tot_live += hr->next_live_bytes(); ysr@777: _tot_used += hr->used(); ysr@777: _words_done = words_done; ysr@777: ysr@777: if (!_final) { ysr@777: ++_regions_done; ysr@777: if (_regions_done % 10 == 0) { ysr@777: double end_vtime_sec = os::elapsedVTime(); ysr@777: double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec; ysr@777: if (elapsed_vtime_sec > (10.0 / 1000.0)) { ysr@777: jlong sleep_time_ms = ysr@777: (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0); ysr@777: #if 0 ysr@777: gclog_or_tty->print_cr("CL: elapsed %1.4lf ms, sleep %1.4lf ms, " ysr@777: "overhead %1.4lf", ysr@777: elapsed_vtime_sec * 1000.0, (double) sleep_time_ms, ysr@777: _co_tracker->concOverhead(os::elapsedTime())); ysr@777: #endif ysr@777: os::sleep(Thread::current(), sleep_time_ms, false); ysr@777: _start_vtime_sec = end_vtime_sec; ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: return false; ysr@777: } ysr@777: ysr@777: bool changed() { return _changed; } ysr@777: void reset() { _changed = false; _words_done = 0; } ysr@777: void no_yield() { _yield = false; } ysr@777: size_t words_done() { return _words_done; } ysr@777: size_t tot_live() { return _tot_live; } ysr@777: size_t tot_used() { return _tot_used; } ysr@777: }; ysr@777: ysr@777: ysr@777: void ConcurrentMark::calcDesiredRegions() { ysr@777: guarantee( _cleanup_co_tracker.enabled(), "invariant" ); ysr@777: _cleanup_co_tracker.start(); ysr@777: ysr@777: _region_bm.clear(); ysr@777: _card_bm.clear(); ysr@777: CalcLiveObjectsClosure calccl(false /*final*/, ysr@777: nextMarkBitMap(), this, ysr@777: &_region_bm, &_card_bm, ysr@777: &_cleanup_co_tracker); ysr@777: G1CollectedHeap *g1h = G1CollectedHeap::heap(); ysr@777: g1h->heap_region_iterate(&calccl); ysr@777: ysr@777: do { ysr@777: calccl.reset(); ysr@777: g1h->heap_region_iterate(&calccl); ysr@777: } while (calccl.changed()); ysr@777: ysr@777: _cleanup_co_tracker.update(true); ysr@777: } ysr@777: ysr@777: class G1ParFinalCountTask: public AbstractGangTask { ysr@777: protected: ysr@777: G1CollectedHeap* _g1h; ysr@777: CMBitMap* _bm; ysr@777: size_t _n_workers; ysr@777: size_t *_live_bytes; ysr@777: size_t *_used_bytes; ysr@777: BitMap* _region_bm; ysr@777: BitMap* _card_bm; ysr@777: public: ysr@777: G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm, ysr@777: BitMap* region_bm, BitMap* card_bm) : ysr@777: AbstractGangTask("G1 final counting"), _g1h(g1h), ysr@777: _bm(bm), _region_bm(region_bm), _card_bm(card_bm) ysr@777: { ysr@777: if (ParallelGCThreads > 0) ysr@777: _n_workers = _g1h->workers()->total_workers(); ysr@777: else ysr@777: _n_workers = 1; ysr@777: _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); ysr@777: _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers); ysr@777: } ysr@777: ysr@777: ~G1ParFinalCountTask() { ysr@777: FREE_C_HEAP_ARRAY(size_t, _live_bytes); ysr@777: FREE_C_HEAP_ARRAY(size_t, _used_bytes); ysr@777: } ysr@777: ysr@777: void work(int i) { ysr@777: CalcLiveObjectsClosure calccl(true /*final*/, ysr@777: _bm, _g1h->concurrent_mark(), ysr@777: _region_bm, _card_bm, ysr@777: NULL /* CO tracker */); ysr@777: calccl.no_yield(); ysr@777: if (ParallelGCThreads > 0) { tonyp@790: _g1h->heap_region_par_iterate_chunked(&calccl, i, tonyp@790: HeapRegion::FinalCountClaimValue); ysr@777: } else { ysr@777: _g1h->heap_region_iterate(&calccl); ysr@777: } ysr@777: assert(calccl.complete(), "Shouldn't have yielded!"); ysr@777: ysr@777: guarantee( (size_t)i < _n_workers, "invariant" ); ysr@777: _live_bytes[i] = calccl.tot_live(); ysr@777: _used_bytes[i] = calccl.tot_used(); ysr@777: } ysr@777: size_t live_bytes() { ysr@777: size_t live_bytes = 0; ysr@777: for (size_t i = 0; i < _n_workers; ++i) ysr@777: live_bytes += _live_bytes[i]; ysr@777: return live_bytes; ysr@777: } ysr@777: size_t used_bytes() { ysr@777: size_t used_bytes = 0; ysr@777: for (size_t i = 0; i < _n_workers; ++i) ysr@777: used_bytes += _used_bytes[i]; ysr@777: return used_bytes; ysr@777: } ysr@777: }; ysr@777: ysr@777: class G1ParNoteEndTask; ysr@777: ysr@777: class G1NoteEndOfConcMarkClosure : public HeapRegionClosure { ysr@777: G1CollectedHeap* _g1; ysr@777: int _worker_num; ysr@777: size_t _max_live_bytes; ysr@777: size_t _regions_claimed; ysr@777: size_t _freed_bytes; ysr@777: size_t _cleared_h_regions; ysr@777: size_t _freed_regions; ysr@777: UncleanRegionList* _unclean_region_list; ysr@777: double _claimed_region_time; ysr@777: double _max_region_time; ysr@777: ysr@777: public: ysr@777: G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, ysr@777: UncleanRegionList* list, ysr@777: int worker_num); ysr@777: size_t freed_bytes() { return _freed_bytes; } ysr@777: size_t cleared_h_regions() { return _cleared_h_regions; } ysr@777: size_t freed_regions() { return _freed_regions; } ysr@777: UncleanRegionList* unclean_region_list() { ysr@777: return _unclean_region_list; ysr@777: } ysr@777: ysr@777: bool doHeapRegion(HeapRegion *r); ysr@777: ysr@777: size_t max_live_bytes() { return _max_live_bytes; } ysr@777: size_t regions_claimed() { return _regions_claimed; } ysr@777: double claimed_region_time_sec() { return _claimed_region_time; } ysr@777: double max_region_time_sec() { return _max_region_time; } ysr@777: }; ysr@777: ysr@777: class G1ParNoteEndTask: public AbstractGangTask { ysr@777: friend class G1NoteEndOfConcMarkClosure; ysr@777: protected: ysr@777: G1CollectedHeap* _g1h; ysr@777: size_t _max_live_bytes; ysr@777: size_t _freed_bytes; ysr@777: ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state; ysr@777: public: ysr@777: G1ParNoteEndTask(G1CollectedHeap* g1h, ysr@777: ConcurrentMark::ParCleanupThreadState** ysr@777: par_cleanup_thread_state) : ysr@777: AbstractGangTask("G1 note end"), _g1h(g1h), ysr@777: _max_live_bytes(0), _freed_bytes(0), ysr@777: _par_cleanup_thread_state(par_cleanup_thread_state) ysr@777: {} ysr@777: ysr@777: void work(int i) { ysr@777: double start = os::elapsedTime(); ysr@777: G1NoteEndOfConcMarkClosure g1_note_end(_g1h, ysr@777: &_par_cleanup_thread_state[i]->list, ysr@777: i); ysr@777: if (ParallelGCThreads > 0) { tonyp@790: _g1h->heap_region_par_iterate_chunked(&g1_note_end, i, tonyp@790: HeapRegion::NoteEndClaimValue); ysr@777: } else { ysr@777: _g1h->heap_region_iterate(&g1_note_end); ysr@777: } ysr@777: assert(g1_note_end.complete(), "Shouldn't have yielded!"); ysr@777: ysr@777: // Now finish up freeing the current thread's regions. ysr@777: _g1h->finish_free_region_work(g1_note_end.freed_bytes(), ysr@777: g1_note_end.cleared_h_regions(), ysr@777: 0, NULL); ysr@777: { ysr@777: MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); ysr@777: _max_live_bytes += g1_note_end.max_live_bytes(); ysr@777: _freed_bytes += g1_note_end.freed_bytes(); ysr@777: } ysr@777: double end = os::elapsedTime(); ysr@777: if (G1PrintParCleanupStats) { ysr@777: gclog_or_tty->print(" Worker thread %d [%8.3f..%8.3f = %8.3f ms] " ysr@777: "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n", ysr@777: i, start, end, (end-start)*1000.0, ysr@777: g1_note_end.regions_claimed(), ysr@777: g1_note_end.claimed_region_time_sec()*1000.0, ysr@777: g1_note_end.max_region_time_sec()*1000.0); ysr@777: } ysr@777: } ysr@777: size_t max_live_bytes() { return _max_live_bytes; } ysr@777: size_t freed_bytes() { return _freed_bytes; } ysr@777: }; ysr@777: ysr@777: class G1ParScrubRemSetTask: public AbstractGangTask { ysr@777: protected: ysr@777: G1RemSet* _g1rs; ysr@777: BitMap* _region_bm; ysr@777: BitMap* _card_bm; ysr@777: public: ysr@777: G1ParScrubRemSetTask(G1CollectedHeap* g1h, ysr@777: BitMap* region_bm, BitMap* card_bm) : ysr@777: AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), ysr@777: _region_bm(region_bm), _card_bm(card_bm) ysr@777: {} ysr@777: ysr@777: void work(int i) { ysr@777: if (ParallelGCThreads > 0) { tonyp@790: _g1rs->scrub_par(_region_bm, _card_bm, i, tonyp@790: HeapRegion::ScrubRemSetClaimValue); ysr@777: } else { ysr@777: _g1rs->scrub(_region_bm, _card_bm); ysr@777: } ysr@777: } ysr@777: ysr@777: }; ysr@777: ysr@777: G1NoteEndOfConcMarkClosure:: ysr@777: G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1, ysr@777: UncleanRegionList* list, ysr@777: int worker_num) ysr@777: : _g1(g1), _worker_num(worker_num), ysr@777: _max_live_bytes(0), _regions_claimed(0), ysr@777: _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0), ysr@777: _claimed_region_time(0.0), _max_region_time(0.0), ysr@777: _unclean_region_list(list) ysr@777: {} ysr@777: ysr@777: bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) { ysr@777: // We use a claim value of zero here because all regions ysr@777: // were claimed with value 1 in the FinalCount task. ysr@777: r->reset_gc_time_stamp(); ysr@777: if (!r->continuesHumongous()) { ysr@777: double start = os::elapsedTime(); ysr@777: _regions_claimed++; ysr@777: r->note_end_of_marking(); ysr@777: _max_live_bytes += r->max_live_bytes(); ysr@777: _g1->free_region_if_totally_empty_work(r, ysr@777: _freed_bytes, ysr@777: _cleared_h_regions, ysr@777: _freed_regions, ysr@777: _unclean_region_list, ysr@777: true /*par*/); ysr@777: double region_time = (os::elapsedTime() - start); ysr@777: _claimed_region_time += region_time; ysr@777: if (region_time > _max_region_time) _max_region_time = region_time; ysr@777: } ysr@777: return false; ysr@777: } ysr@777: ysr@777: void ConcurrentMark::cleanup() { ysr@777: // world is stopped at this checkpoint ysr@777: assert(SafepointSynchronize::is_at_safepoint(), ysr@777: "world should be stopped"); ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: // If a full collection has happened, we shouldn't do this. ysr@777: if (has_aborted()) { ysr@777: g1h->set_marking_complete(); // So bitmap clearing isn't confused ysr@777: return; ysr@777: } ysr@777: ysr@777: _cleanup_co_tracker.disable(); ysr@777: ysr@777: G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy(); ysr@777: g1p->record_concurrent_mark_cleanup_start(); ysr@777: ysr@777: double start = os::elapsedTime(); ysr@777: GCOverheadReporter::recordSTWStart(start); ysr@777: ysr@777: // Do counting once more with the world stopped for good measure. ysr@777: G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(), ysr@777: &_region_bm, &_card_bm); ysr@777: if (ParallelGCThreads > 0) { tonyp@790: assert(g1h->check_heap_region_claim_values( tonyp@790: HeapRegion::InitialClaimValue), tonyp@790: "sanity check"); tonyp@790: ysr@777: int n_workers = g1h->workers()->total_workers(); ysr@777: g1h->set_par_threads(n_workers); ysr@777: g1h->workers()->run_task(&g1_par_count_task); ysr@777: g1h->set_par_threads(0); tonyp@790: tonyp@790: assert(g1h->check_heap_region_claim_values( tonyp@790: HeapRegion::FinalCountClaimValue), tonyp@790: "sanity check"); ysr@777: } else { ysr@777: g1_par_count_task.work(0); ysr@777: } ysr@777: ysr@777: size_t known_garbage_bytes = ysr@777: g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes(); ysr@777: #if 0 ysr@777: gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf", ysr@777: (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024), ysr@777: (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024), ysr@777: (double) known_garbage_bytes / (double) (1024 * 1024)); ysr@777: #endif // 0 ysr@777: g1p->set_known_garbage_bytes(known_garbage_bytes); ysr@777: ysr@777: size_t start_used_bytes = g1h->used(); ysr@777: _at_least_one_mark_complete = true; ysr@777: g1h->set_marking_complete(); ysr@777: ysr@777: double count_end = os::elapsedTime(); ysr@777: double this_final_counting_time = (count_end - start); ysr@777: if (G1PrintParCleanupStats) { ysr@777: gclog_or_tty->print_cr("Cleanup:"); ysr@777: gclog_or_tty->print_cr(" Finalize counting: %8.3f ms", ysr@777: this_final_counting_time*1000.0); ysr@777: } ysr@777: _total_counting_time += this_final_counting_time; ysr@777: ysr@777: // Install newly created mark bitMap as "prev". ysr@777: swapMarkBitMaps(); ysr@777: ysr@777: g1h->reset_gc_time_stamp(); ysr@777: ysr@777: // Note end of marking in all heap regions. ysr@777: double note_end_start = os::elapsedTime(); ysr@777: G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state); ysr@777: if (ParallelGCThreads > 0) { ysr@777: int n_workers = g1h->workers()->total_workers(); ysr@777: g1h->set_par_threads(n_workers); ysr@777: g1h->workers()->run_task(&g1_par_note_end_task); ysr@777: g1h->set_par_threads(0); tonyp@790: tonyp@790: assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue), tonyp@790: "sanity check"); ysr@777: } else { ysr@777: g1_par_note_end_task.work(0); ysr@777: } ysr@777: g1h->set_unclean_regions_coming(true); ysr@777: double note_end_end = os::elapsedTime(); ysr@777: // Tell the mutators that there might be unclean regions coming... ysr@777: if (G1PrintParCleanupStats) { ysr@777: gclog_or_tty->print_cr(" note end of marking: %8.3f ms.", ysr@777: (note_end_end - note_end_start)*1000.0); ysr@777: } ysr@777: tonyp@790: ysr@777: // call below, since it affects the metric by which we sort the heap ysr@777: // regions. ysr@777: if (G1ScrubRemSets) { ysr@777: double rs_scrub_start = os::elapsedTime(); ysr@777: G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm); ysr@777: if (ParallelGCThreads > 0) { ysr@777: int n_workers = g1h->workers()->total_workers(); ysr@777: g1h->set_par_threads(n_workers); ysr@777: g1h->workers()->run_task(&g1_par_scrub_rs_task); ysr@777: g1h->set_par_threads(0); tonyp@790: tonyp@790: assert(g1h->check_heap_region_claim_values( tonyp@790: HeapRegion::ScrubRemSetClaimValue), tonyp@790: "sanity check"); ysr@777: } else { ysr@777: g1_par_scrub_rs_task.work(0); ysr@777: } ysr@777: ysr@777: double rs_scrub_end = os::elapsedTime(); ysr@777: double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start); ysr@777: _total_rs_scrub_time += this_rs_scrub_time; ysr@777: } ysr@777: ysr@777: // this will also free any regions totally full of garbage objects, ysr@777: // and sort the regions. ysr@777: g1h->g1_policy()->record_concurrent_mark_cleanup_end( ysr@777: g1_par_note_end_task.freed_bytes(), ysr@777: g1_par_note_end_task.max_live_bytes()); ysr@777: ysr@777: // Statistics. ysr@777: double end = os::elapsedTime(); ysr@777: _cleanup_times.add((end - start) * 1000.0); ysr@777: GCOverheadReporter::recordSTWEnd(end); ysr@777: ysr@777: // G1CollectedHeap::heap()->print(); ysr@777: // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d", ysr@777: // G1CollectedHeap::heap()->get_gc_time_stamp()); ysr@777: ysr@777: if (PrintGC || PrintGCDetails) { ysr@777: g1h->print_size_transition(gclog_or_tty, ysr@777: start_used_bytes, ysr@777: g1h->used(), ysr@777: g1h->capacity()); ysr@777: } ysr@777: ysr@777: size_t cleaned_up_bytes = start_used_bytes - g1h->used(); ysr@777: g1p->decrease_known_garbage_bytes(cleaned_up_bytes); ysr@777: ysr@777: // We need to make this be a "collection" so any collection pause that ysr@777: // races with it goes around and waits for completeCleanup to finish. ysr@777: g1h->increment_total_collections(); ysr@777: ysr@777: #ifndef PRODUCT ysr@777: if (G1VerifyConcMark) { ysr@777: G1CollectedHeap::heap()->prepare_for_verify(); ysr@777: G1CollectedHeap::heap()->verify(true,false); ysr@777: } ysr@777: #endif ysr@777: } ysr@777: ysr@777: void ConcurrentMark::completeCleanup() { ysr@777: // A full collection intervened. ysr@777: if (has_aborted()) return; ysr@777: ysr@777: int first = 0; ysr@777: int last = (int)MAX2(ParallelGCThreads, (size_t)1); ysr@777: for (int t = 0; t < last; t++) { ysr@777: UncleanRegionList* list = &_par_cleanup_thread_state[t]->list; ysr@777: assert(list->well_formed(), "Inv"); ysr@777: HeapRegion* hd = list->hd(); ysr@777: while (hd != NULL) { ysr@777: // Now finish up the other stuff. ysr@777: hd->rem_set()->clear(); ysr@777: HeapRegion* next_hd = hd->next_from_unclean_list(); ysr@777: (void)list->pop(); ysr@777: guarantee(list->hd() == next_hd, "how not?"); ysr@777: _g1h->put_region_on_unclean_list(hd); ysr@777: if (!hd->isHumongous()) { ysr@777: // Add this to the _free_regions count by 1. ysr@777: _g1h->finish_free_region_work(0, 0, 1, NULL); ysr@777: } ysr@777: hd = list->hd(); ysr@777: guarantee(hd == next_hd, "how not?"); ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: ysr@777: class G1CMIsAliveClosure: public BoolObjectClosure { ysr@777: G1CollectedHeap* _g1; ysr@777: public: ysr@777: G1CMIsAliveClosure(G1CollectedHeap* g1) : ysr@777: _g1(g1) ysr@777: {} ysr@777: ysr@777: void do_object(oop obj) { ysr@777: assert(false, "not to be invoked"); ysr@777: } ysr@777: bool do_object_b(oop obj) { ysr@777: HeapWord* addr = (HeapWord*)obj; ysr@777: return addr != NULL && ysr@777: (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj)); ysr@777: } ysr@777: }; ysr@777: ysr@777: class G1CMKeepAliveClosure: public OopClosure { ysr@777: G1CollectedHeap* _g1; ysr@777: ConcurrentMark* _cm; ysr@777: CMBitMap* _bitMap; ysr@777: public: ysr@777: G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm, ysr@777: CMBitMap* bitMap) : ysr@777: _g1(g1), _cm(cm), ysr@777: _bitMap(bitMap) {} ysr@777: ysr@777: void do_oop(narrowOop* p) { ysr@777: guarantee(false, "NYI"); ysr@777: } ysr@777: ysr@777: void do_oop(oop* p) { ysr@777: oop thisOop = *p; ysr@777: HeapWord* addr = (HeapWord*)thisOop; ysr@777: if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) { ysr@777: _bitMap->mark(addr); ysr@777: _cm->mark_stack_push(thisOop); ysr@777: } ysr@777: } ysr@777: }; ysr@777: ysr@777: class G1CMDrainMarkingStackClosure: public VoidClosure { ysr@777: CMMarkStack* _markStack; ysr@777: CMBitMap* _bitMap; ysr@777: G1CMKeepAliveClosure* _oopClosure; ysr@777: public: ysr@777: G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack, ysr@777: G1CMKeepAliveClosure* oopClosure) : ysr@777: _bitMap(bitMap), ysr@777: _markStack(markStack), ysr@777: _oopClosure(oopClosure) ysr@777: {} ysr@777: ysr@777: void do_void() { ysr@777: _markStack->drain((OopClosure*)_oopClosure, _bitMap, false); ysr@777: } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) { ysr@777: ResourceMark rm; ysr@777: HandleMark hm; ysr@777: ReferencePolicy* soft_ref_policy; ysr@777: ysr@777: // Process weak references. ysr@777: if (clear_all_soft_refs) { ysr@777: soft_ref_policy = new AlwaysClearPolicy(); ysr@777: } else { ysr@777: #ifdef COMPILER2 ysr@777: soft_ref_policy = new LRUMaxHeapPolicy(); ysr@777: #else ysr@777: soft_ref_policy = new LRUCurrentHeapPolicy(); ysr@777: #endif ysr@777: } ysr@777: assert(_markStack.isEmpty(), "mark stack should be empty"); ysr@777: ysr@777: G1CollectedHeap* g1 = G1CollectedHeap::heap(); ysr@777: G1CMIsAliveClosure g1IsAliveClosure(g1); ysr@777: ysr@777: G1CMKeepAliveClosure g1KeepAliveClosure(g1, this, nextMarkBitMap()); ysr@777: G1CMDrainMarkingStackClosure ysr@777: g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack, ysr@777: &g1KeepAliveClosure); ysr@777: ysr@777: // XXXYYY Also: copy the parallel ref processing code from CMS. ysr@777: ReferenceProcessor* rp = g1->ref_processor(); ysr@777: rp->process_discovered_references(soft_ref_policy, ysr@777: &g1IsAliveClosure, ysr@777: &g1KeepAliveClosure, ysr@777: &g1DrainMarkingStackClosure, ysr@777: NULL); ysr@777: assert(_markStack.overflow() || _markStack.isEmpty(), ysr@777: "mark stack should be empty (unless it overflowed)"); ysr@777: if (_markStack.overflow()) { ysr@777: set_has_overflown(); ysr@777: } ysr@777: ysr@777: rp->enqueue_discovered_references(); ysr@777: rp->verify_no_references_recorded(); ysr@777: assert(!rp->discovery_enabled(), "should have been disabled"); ysr@777: ysr@777: // Now clean up stale oops in SymbolTable and StringTable ysr@777: SymbolTable::unlink(&g1IsAliveClosure); ysr@777: StringTable::unlink(&g1IsAliveClosure); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::swapMarkBitMaps() { ysr@777: CMBitMapRO* temp = _prevMarkBitMap; ysr@777: _prevMarkBitMap = (CMBitMapRO*)_nextMarkBitMap; ysr@777: _nextMarkBitMap = (CMBitMap*) temp; ysr@777: } ysr@777: ysr@777: class CMRemarkTask: public AbstractGangTask { ysr@777: private: ysr@777: ConcurrentMark *_cm; ysr@777: ysr@777: public: ysr@777: void work(int worker_i) { ysr@777: // Since all available tasks are actually started, we should ysr@777: // only proceed if we're supposed to be actived. ysr@777: if ((size_t)worker_i < _cm->active_tasks()) { ysr@777: CMTask* task = _cm->task(worker_i); ysr@777: task->record_start_time(); ysr@777: do { ysr@777: task->do_marking_step(1000000000.0 /* something very large */); ysr@777: } while (task->has_aborted() && !_cm->has_overflown()); ysr@777: // If we overflow, then we do not want to restart. We instead ysr@777: // want to abort remark and do concurrent marking again. ysr@777: task->record_end_time(); ysr@777: } ysr@777: } ysr@777: ysr@777: CMRemarkTask(ConcurrentMark* cm) : ysr@777: AbstractGangTask("Par Remark"), _cm(cm) { } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::checkpointRootsFinalWork() { ysr@777: ResourceMark rm; ysr@777: HandleMark hm; ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: g1h->ensure_parsability(false); ysr@777: ysr@777: if (ParallelGCThreads > 0) { ysr@777: g1h->change_strong_roots_parity(); ysr@777: // this is remark, so we'll use up all available threads ysr@777: int active_workers = ParallelGCThreads; ysr@777: set_phase(active_workers, false); ysr@777: ysr@777: CMRemarkTask remarkTask(this); ysr@777: // We will start all available threads, even if we decide that the ysr@777: // active_workers will be fewer. The extra ones will just bail out ysr@777: // immediately. ysr@777: int n_workers = g1h->workers()->total_workers(); ysr@777: g1h->set_par_threads(n_workers); ysr@777: g1h->workers()->run_task(&remarkTask); ysr@777: g1h->set_par_threads(0); ysr@777: ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); ysr@777: } else { ysr@777: g1h->change_strong_roots_parity(); ysr@777: // this is remark, so we'll use up all available threads ysr@777: int active_workers = 1; ysr@777: set_phase(active_workers, false); ysr@777: ysr@777: CMRemarkTask remarkTask(this); ysr@777: // We will start all available threads, even if we decide that the ysr@777: // active_workers will be fewer. The extra ones will just bail out ysr@777: // immediately. ysr@777: remarkTask.work(0); ysr@777: ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); ysr@777: } ysr@777: ysr@777: print_stats(); ysr@777: ysr@777: if (!restart_for_overflow()) ysr@777: set_non_marking_state(); ysr@777: ysr@777: #if VERIFY_OBJS_PROCESSED ysr@777: if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) { ysr@777: gclog_or_tty->print_cr("Processed = %d, enqueued = %d.", ysr@777: _scan_obj_cl.objs_processed, ysr@777: ThreadLocalObjQueue::objs_enqueued); ysr@777: guarantee(_scan_obj_cl.objs_processed == ysr@777: ThreadLocalObjQueue::objs_enqueued, ysr@777: "Different number of objs processed and enqueued."); ysr@777: } ysr@777: #endif ysr@777: } ysr@777: ysr@777: class ReachablePrinterOopClosure: public OopClosure { ysr@777: private: ysr@777: G1CollectedHeap* _g1h; ysr@777: CMBitMapRO* _bitmap; ysr@777: outputStream* _out; ysr@777: ysr@777: public: ysr@777: ReachablePrinterOopClosure(CMBitMapRO* bitmap, outputStream* out) : ysr@777: _bitmap(bitmap), _g1h(G1CollectedHeap::heap()), _out(out) { } ysr@777: ysr@777: void do_oop(narrowOop* p) { ysr@777: guarantee(false, "NYI"); ysr@777: } ysr@777: ysr@777: void do_oop(oop* p) { ysr@777: oop obj = *p; ysr@777: const char* str = NULL; ysr@777: const char* str2 = ""; ysr@777: ysr@777: if (!_g1h->is_in_g1_reserved(obj)) ysr@777: str = "outside G1 reserved"; ysr@777: else { ysr@777: HeapRegion* hr = _g1h->heap_region_containing(obj); ysr@777: guarantee( hr != NULL, "invariant" ); ysr@777: if (hr->obj_allocated_since_prev_marking(obj)) { ysr@777: str = "over TAMS"; ysr@777: if (_bitmap->isMarked((HeapWord*) obj)) ysr@777: str2 = " AND MARKED"; ysr@777: } else if (_bitmap->isMarked((HeapWord*) obj)) ysr@777: str = "marked"; ysr@777: else ysr@777: str = "#### NOT MARKED ####"; ysr@777: } ysr@777: ysr@777: _out->print_cr(" "PTR_FORMAT" contains "PTR_FORMAT" %s%s", ysr@777: p, (void*) obj, str, str2); ysr@777: } ysr@777: }; ysr@777: ysr@777: class ReachablePrinterClosure: public BitMapClosure { ysr@777: private: ysr@777: CMBitMapRO* _bitmap; ysr@777: outputStream* _out; ysr@777: ysr@777: public: ysr@777: ReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : ysr@777: _bitmap(bitmap), _out(out) { } ysr@777: ysr@777: bool do_bit(size_t offset) { ysr@777: HeapWord* addr = _bitmap->offsetToHeapWord(offset); ysr@777: ReachablePrinterOopClosure oopCl(_bitmap, _out); ysr@777: ysr@777: _out->print_cr(" obj "PTR_FORMAT", offset %10d (marked)", addr, offset); ysr@777: oop(addr)->oop_iterate(&oopCl); ysr@777: _out->print_cr(""); ysr@777: ysr@777: return true; ysr@777: } ysr@777: }; ysr@777: ysr@777: class ObjInRegionReachablePrinterClosure : public ObjectClosure { ysr@777: private: ysr@777: CMBitMapRO* _bitmap; ysr@777: outputStream* _out; ysr@777: ysr@777: public: ysr@777: void do_object(oop o) { ysr@777: ReachablePrinterOopClosure oopCl(_bitmap, _out); ysr@777: ysr@777: _out->print_cr(" obj "PTR_FORMAT" (over TAMS)", (void*) o); ysr@777: o->oop_iterate(&oopCl); ysr@777: _out->print_cr(""); ysr@777: } ysr@777: ysr@777: ObjInRegionReachablePrinterClosure(CMBitMapRO* bitmap, outputStream* out) : ysr@777: _bitmap(bitmap), _out(out) { } ysr@777: }; ysr@777: ysr@777: class RegionReachablePrinterClosure : public HeapRegionClosure { ysr@777: private: ysr@777: CMBitMapRO* _bitmap; ysr@777: outputStream* _out; ysr@777: ysr@777: public: ysr@777: bool doHeapRegion(HeapRegion* hr) { ysr@777: HeapWord* b = hr->bottom(); ysr@777: HeapWord* e = hr->end(); ysr@777: HeapWord* t = hr->top(); ysr@777: HeapWord* p = hr->prev_top_at_mark_start(); ysr@777: _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" " ysr@777: "PTAMS: "PTR_FORMAT, b, e, t, p); ysr@777: _out->print_cr(""); ysr@777: ysr@777: ObjInRegionReachablePrinterClosure ocl(_bitmap, _out); ysr@777: hr->object_iterate_mem_careful(MemRegion(p, t), &ocl); ysr@777: ysr@777: return false; ysr@777: } ysr@777: ysr@777: RegionReachablePrinterClosure(CMBitMapRO* bitmap, ysr@777: outputStream* out) : ysr@777: _bitmap(bitmap), _out(out) { } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::print_prev_bitmap_reachable() { ysr@777: outputStream* out = gclog_or_tty; ysr@777: ysr@777: #if SEND_HEAP_DUMP_TO_FILE ysr@777: guarantee(heap_dump_file == NULL, "Protocol"); ysr@777: char fn_buf[100]; ysr@777: sprintf(fn_buf, "/tmp/dump.txt.%d", os::current_process_id()); ysr@777: heap_dump_file = fopen(fn_buf, "w"); ysr@777: fileStream fstream(heap_dump_file); ysr@777: out = &fstream; ysr@777: #endif // SEND_HEAP_DUMP_TO_FILE ysr@777: ysr@777: RegionReachablePrinterClosure rcl(_prevMarkBitMap, out); ysr@777: out->print_cr("--- ITERATING OVER REGIONS WITH PTAMS < TOP"); ysr@777: _g1h->heap_region_iterate(&rcl); ysr@777: out->print_cr(""); ysr@777: ysr@777: ReachablePrinterClosure cl(_prevMarkBitMap, out); ysr@777: out->print_cr("--- REACHABLE OBJECTS ON THE BITMAP"); ysr@777: _prevMarkBitMap->iterate(&cl); ysr@777: out->print_cr(""); ysr@777: ysr@777: #if SEND_HEAP_DUMP_TO_FILE ysr@777: fclose(heap_dump_file); ysr@777: heap_dump_file = NULL; ysr@777: #endif // SEND_HEAP_DUMP_TO_FILE ysr@777: } ysr@777: ysr@777: // This note is for drainAllSATBBuffers and the code in between. ysr@777: // In the future we could reuse a task to do this work during an ysr@777: // evacuation pause (since now tasks are not active and can be claimed ysr@777: // during an evacuation pause). This was a late change to the code and ysr@777: // is currently not being taken advantage of. ysr@777: ysr@777: class CMGlobalObjectClosure : public ObjectClosure { ysr@777: private: ysr@777: ConcurrentMark* _cm; ysr@777: ysr@777: public: ysr@777: void do_object(oop obj) { ysr@777: _cm->deal_with_reference(obj); ysr@777: } ysr@777: ysr@777: CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::deal_with_reference(oop obj) { ysr@777: if (verbose_high()) ysr@777: gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT, ysr@777: (void*) obj); ysr@777: ysr@777: ysr@777: HeapWord* objAddr = (HeapWord*) obj; ysr@777: if (_g1h->is_in_g1_reserved(objAddr)) { ysr@777: tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); ysr@777: HeapRegion* hr = _g1h->heap_region_containing(obj); ysr@777: if (_g1h->is_obj_ill(obj, hr)) { ysr@777: if (verbose_high()) ysr@777: gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered " ysr@777: "marked", (void*) obj); ysr@777: ysr@777: // we need to mark it first ysr@777: if (_nextMarkBitMap->parMark(objAddr)) { ysr@777: // No OrderAccess:store_load() is needed. It is implicit in the ysr@777: // CAS done in parMark(objAddr) above ysr@777: HeapWord* finger = _finger; ysr@777: if (objAddr < finger) { ysr@777: if (verbose_high()) ysr@777: gclog_or_tty->print_cr("[global] below the global finger " ysr@777: "("PTR_FORMAT"), pushing it", finger); ysr@777: if (!mark_stack_push(obj)) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] global stack overflow during " ysr@777: "deal_with_reference"); ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::drainAllSATBBuffers() { ysr@777: CMGlobalObjectClosure oc(this); ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: satb_mq_set.set_closure(&oc); ysr@777: ysr@777: while (satb_mq_set.apply_closure_to_completed_buffer()) { ysr@777: if (verbose_medium()) ysr@777: gclog_or_tty->print_cr("[global] processed an SATB buffer"); ysr@777: } ysr@777: ysr@777: // no need to check whether we should do this, as this is only ysr@777: // called during an evacuation pause ysr@777: satb_mq_set.iterate_closure_all_threads(); ysr@777: ysr@777: satb_mq_set.set_closure(NULL); ysr@777: guarantee( satb_mq_set.completed_buffers_num() == 0, "invariant" ); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::markPrev(oop p) { ysr@777: // Note we are overriding the read-only view of the prev map here, via ysr@777: // the cast. ysr@777: ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::clear(oop p) { ysr@777: assert(p != NULL && p->is_oop(), "expected an oop"); ysr@777: HeapWord* addr = (HeapWord*)p; ysr@777: assert(addr >= _nextMarkBitMap->startWord() || ysr@777: addr < _nextMarkBitMap->endWord(), "in a region"); ysr@777: ysr@777: _nextMarkBitMap->clear(addr); ysr@777: } ysr@777: ysr@777: void ConcurrentMark::clearRangeBothMaps(MemRegion mr) { ysr@777: // Note we are overriding the read-only view of the prev map here, via ysr@777: // the cast. ysr@777: ((CMBitMap*)_prevMarkBitMap)->clearRange(mr); ysr@777: _nextMarkBitMap->clearRange(mr); ysr@777: } ysr@777: ysr@777: HeapRegion* ysr@777: ConcurrentMark::claim_region(int task_num) { ysr@777: // "checkpoint" the finger ysr@777: HeapWord* finger = _finger; ysr@777: ysr@777: // _heap_end will not change underneath our feet; it only changes at ysr@777: // yield points. ysr@777: while (finger < _heap_end) { ysr@777: tmp_guarantee_CM( _g1h->is_in_g1_reserved(finger), "invariant" ); ysr@777: ysr@777: // is the gap between reading the finger and doing the CAS too long? ysr@777: ysr@777: HeapRegion* curr_region = _g1h->heap_region_containing(finger); ysr@777: HeapWord* bottom = curr_region->bottom(); ysr@777: HeapWord* end = curr_region->end(); ysr@777: HeapWord* limit = curr_region->next_top_at_mark_start(); ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" " ysr@777: "["PTR_FORMAT", "PTR_FORMAT"), " ysr@777: "limit = "PTR_FORMAT, ysr@777: task_num, curr_region, bottom, end, limit); ysr@777: ysr@777: HeapWord* res = ysr@777: (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger); ysr@777: if (res == finger) { ysr@777: // we succeeded ysr@777: ysr@777: // notice that _finger == end cannot be guaranteed here since, ysr@777: // someone else might have moved the finger even further ysr@777: guarantee( _finger >= end, "the finger should have moved forward" ); ysr@777: ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] we were successful with region = " ysr@777: PTR_FORMAT, task_num, curr_region); ysr@777: ysr@777: if (limit > bottom) { ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, " ysr@777: "returning it ", task_num, curr_region); ysr@777: return curr_region; ysr@777: } else { ysr@777: tmp_guarantee_CM( limit == bottom, ysr@777: "the region limit should be at bottom" ); ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, " ysr@777: "returning NULL", task_num, curr_region); ysr@777: // we return NULL and the caller should try calling ysr@777: // claim_region() again. ysr@777: return NULL; ysr@777: } ysr@777: } else { ysr@777: guarantee( _finger > finger, "the finger should have moved forward" ); ysr@777: if (verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] somebody else moved the finger, " ysr@777: "global finger = "PTR_FORMAT", " ysr@777: "our finger = "PTR_FORMAT, ysr@777: task_num, _finger, finger); ysr@777: ysr@777: // read it again ysr@777: finger = _finger; ysr@777: } ysr@777: } ysr@777: ysr@777: return NULL; ysr@777: } ysr@777: ysr@777: void ConcurrentMark::oops_do(OopClosure* cl) { ysr@777: if (_markStack.size() > 0 && verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] scanning the global marking stack, " ysr@777: "size = %d", _markStack.size()); ysr@777: // we first iterate over the contents of the mark stack... ysr@777: _markStack.oops_do(cl); ysr@777: ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: OopTaskQueue* queue = _task_queues->queue((int)i); ysr@777: ysr@777: if (queue->size() > 0 && verbose_low()) ysr@777: gclog_or_tty->print_cr("[global] scanning task queue of task %d, " ysr@777: "size = %d", i, queue->size()); ysr@777: ysr@777: // ...then over the contents of the all the task queues. ysr@777: queue->oops_do(cl); ysr@777: } ysr@777: ysr@777: // finally, invalidate any entries that in the region stack that ysr@777: // point into the collection set ysr@777: if (_regionStack.invalidate_entries_into_cset()) { ysr@777: // otherwise, any gray objects copied during the evacuation pause ysr@777: // might not be visited. ysr@777: guarantee( _should_gray_objects, "invariant" ); ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::clear_marking_state() { ysr@777: _markStack.setEmpty(); ysr@777: _markStack.clear_overflow(); ysr@777: _regionStack.setEmpty(); ysr@777: _regionStack.clear_overflow(); ysr@777: clear_has_overflown(); ysr@777: _finger = _heap_start; ysr@777: ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: OopTaskQueue* queue = _task_queues->queue(i); ysr@777: queue->set_empty(); ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::print_stats() { ysr@777: if (verbose_stats()) { ysr@777: gclog_or_tty->print_cr("---------------------------------------------------------------------"); ysr@777: for (size_t i = 0; i < _active_tasks; ++i) { ysr@777: _tasks[i]->print_stats(); ysr@777: gclog_or_tty->print_cr("---------------------------------------------------------------------"); ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: class CSMarkOopClosure: public OopClosure { ysr@777: friend class CSMarkBitMapClosure; ysr@777: ysr@777: G1CollectedHeap* _g1h; ysr@777: CMBitMap* _bm; ysr@777: ConcurrentMark* _cm; ysr@777: oop* _ms; ysr@777: jint* _array_ind_stack; ysr@777: int _ms_size; ysr@777: int _ms_ind; ysr@777: int _array_increment; ysr@777: ysr@777: bool push(oop obj, int arr_ind = 0) { ysr@777: if (_ms_ind == _ms_size) { ysr@777: gclog_or_tty->print_cr("Mark stack is full."); ysr@777: return false; ysr@777: } ysr@777: _ms[_ms_ind] = obj; ysr@777: if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind; ysr@777: _ms_ind++; ysr@777: return true; ysr@777: } ysr@777: ysr@777: oop pop() { ysr@777: if (_ms_ind == 0) return NULL; ysr@777: else { ysr@777: _ms_ind--; ysr@777: return _ms[_ms_ind]; ysr@777: } ysr@777: } ysr@777: ysr@777: bool drain() { ysr@777: while (_ms_ind > 0) { ysr@777: oop obj = pop(); ysr@777: assert(obj != NULL, "Since index was non-zero."); ysr@777: if (obj->is_objArray()) { ysr@777: jint arr_ind = _array_ind_stack[_ms_ind]; ysr@777: objArrayOop aobj = objArrayOop(obj); ysr@777: jint len = aobj->length(); ysr@777: jint next_arr_ind = arr_ind + _array_increment; ysr@777: if (next_arr_ind < len) { ysr@777: push(obj, next_arr_ind); ysr@777: } ysr@777: // Now process this portion of this one. ysr@777: int lim = MIN2(next_arr_ind, len); ysr@777: assert(!UseCompressedOops, "This needs to be fixed"); ysr@777: for (int j = arr_ind; j < lim; j++) { ysr@777: do_oop(aobj->obj_at_addr(j)); ysr@777: } ysr@777: ysr@777: } else { ysr@777: obj->oop_iterate(this); ysr@777: } ysr@777: if (abort()) return false; ysr@777: } ysr@777: return true; ysr@777: } ysr@777: ysr@777: public: ysr@777: CSMarkOopClosure(ConcurrentMark* cm, int ms_size) : ysr@777: _g1h(G1CollectedHeap::heap()), ysr@777: _cm(cm), ysr@777: _bm(cm->nextMarkBitMap()), ysr@777: _ms_size(ms_size), _ms_ind(0), ysr@777: _ms(NEW_C_HEAP_ARRAY(oop, ms_size)), ysr@777: _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)), ysr@777: _array_increment(MAX2(ms_size/8, 16)) ysr@777: {} ysr@777: ysr@777: ~CSMarkOopClosure() { ysr@777: FREE_C_HEAP_ARRAY(oop, _ms); ysr@777: FREE_C_HEAP_ARRAY(jint, _array_ind_stack); ysr@777: } ysr@777: ysr@777: void do_oop(narrowOop* p) { ysr@777: guarantee(false, "NYI"); ysr@777: } ysr@777: ysr@777: void do_oop(oop* p) { ysr@777: oop obj = *p; ysr@777: if (obj == NULL) return; ysr@777: if (obj->is_forwarded()) { ysr@777: // If the object has already been forwarded, we have to make sure ysr@777: // that it's marked. So follow the forwarding pointer. Note that ysr@777: // this does the right thing for self-forwarding pointers in the ysr@777: // evacuation failure case. ysr@777: obj = obj->forwardee(); ysr@777: } ysr@777: HeapRegion* hr = _g1h->heap_region_containing(obj); ysr@777: if (hr != NULL) { ysr@777: if (hr->in_collection_set()) { ysr@777: if (_g1h->is_obj_ill(obj)) { ysr@777: _bm->mark((HeapWord*)obj); ysr@777: if (!push(obj)) { ysr@777: gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed."); ysr@777: set_abort(); ysr@777: } ysr@777: } ysr@777: } else { ysr@777: // Outside the collection set; we need to gray it ysr@777: _cm->deal_with_reference(obj); ysr@777: } ysr@777: } ysr@777: } ysr@777: }; ysr@777: ysr@777: class CSMarkBitMapClosure: public BitMapClosure { ysr@777: G1CollectedHeap* _g1h; ysr@777: CMBitMap* _bitMap; ysr@777: ConcurrentMark* _cm; ysr@777: CSMarkOopClosure _oop_cl; ysr@777: public: ysr@777: CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) : ysr@777: _g1h(G1CollectedHeap::heap()), ysr@777: _bitMap(cm->nextMarkBitMap()), ysr@777: _oop_cl(cm, ms_size) ysr@777: {} ysr@777: ysr@777: ~CSMarkBitMapClosure() {} ysr@777: ysr@777: bool do_bit(size_t offset) { ysr@777: // convert offset into a HeapWord* ysr@777: HeapWord* addr = _bitMap->offsetToHeapWord(offset); ysr@777: assert(_bitMap->endWord() && addr < _bitMap->endWord(), ysr@777: "address out of range"); ysr@777: assert(_bitMap->isMarked(addr), "tautology"); ysr@777: oop obj = oop(addr); ysr@777: if (!obj->is_forwarded()) { ysr@777: if (!_oop_cl.push(obj)) return false; ysr@777: if (!_oop_cl.drain()) return false; ysr@777: } ysr@777: // Otherwise... ysr@777: return true; ysr@777: } ysr@777: }; ysr@777: ysr@777: ysr@777: class CompleteMarkingInCSHRClosure: public HeapRegionClosure { ysr@777: CMBitMap* _bm; ysr@777: CSMarkBitMapClosure _bit_cl; ysr@777: enum SomePrivateConstants { ysr@777: MSSize = 1000 ysr@777: }; ysr@777: bool _completed; ysr@777: public: ysr@777: CompleteMarkingInCSHRClosure(ConcurrentMark* cm) : ysr@777: _bm(cm->nextMarkBitMap()), ysr@777: _bit_cl(cm, MSSize), ysr@777: _completed(true) ysr@777: {} ysr@777: ysr@777: ~CompleteMarkingInCSHRClosure() {} ysr@777: ysr@777: bool doHeapRegion(HeapRegion* r) { ysr@777: if (!r->evacuation_failed()) { ysr@777: MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start()); ysr@777: if (!mr.is_empty()) { ysr@777: if (!_bm->iterate(&_bit_cl, mr)) { ysr@777: _completed = false; ysr@777: return true; ysr@777: } ysr@777: } ysr@777: } ysr@777: return false; ysr@777: } ysr@777: ysr@777: bool completed() { return _completed; } ysr@777: }; ysr@777: ysr@777: class ClearMarksInHRClosure: public HeapRegionClosure { ysr@777: CMBitMap* _bm; ysr@777: public: ysr@777: ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { } ysr@777: ysr@777: bool doHeapRegion(HeapRegion* r) { ysr@777: if (!r->used_region().is_empty() && !r->evacuation_failed()) { ysr@777: MemRegion usedMR = r->used_region(); ysr@777: _bm->clearRange(r->used_region()); ysr@777: } ysr@777: return false; ysr@777: } ysr@777: }; ysr@777: ysr@777: void ConcurrentMark::complete_marking_in_collection_set() { ysr@777: G1CollectedHeap* g1h = G1CollectedHeap::heap(); ysr@777: ysr@777: if (!g1h->mark_in_progress()) { ysr@777: g1h->g1_policy()->record_mark_closure_time(0.0); ysr@777: return; ysr@777: } ysr@777: ysr@777: int i = 1; ysr@777: double start = os::elapsedTime(); ysr@777: while (true) { ysr@777: i++; ysr@777: CompleteMarkingInCSHRClosure cmplt(this); ysr@777: g1h->collection_set_iterate(&cmplt); ysr@777: if (cmplt.completed()) break; ysr@777: } ysr@777: double end_time = os::elapsedTime(); ysr@777: double elapsed_time_ms = (end_time - start) * 1000.0; ysr@777: g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms); ysr@777: if (PrintGCDetails) { ysr@777: gclog_or_tty->print_cr("Mark closure took %5.2f ms.", elapsed_time_ms); ysr@777: } ysr@777: ysr@777: ClearMarksInHRClosure clr(nextMarkBitMap()); ysr@777: g1h->collection_set_iterate(&clr); ysr@777: } ysr@777: ysr@777: // The next two methods deal with the following optimisation. Some ysr@777: // objects are gray by being marked and located above the finger. If ysr@777: // they are copied, during an evacuation pause, below the finger then ysr@777: // the need to be pushed on the stack. The observation is that, if ysr@777: // there are no regions in the collection set located above the ysr@777: // finger, then the above cannot happen, hence we do not need to ysr@777: // explicitly gray any objects when copying them to below the ysr@777: // finger. The global stack will be scanned to ensure that, if it ysr@777: // points to objects being copied, it will update their ysr@777: // location. There is a tricky situation with the gray objects in ysr@777: // region stack that are being coped, however. See the comment in ysr@777: // newCSet(). ysr@777: ysr@777: void ConcurrentMark::newCSet() { ysr@777: if (!concurrent_marking_in_progress()) ysr@777: // nothing to do if marking is not in progress ysr@777: return; ysr@777: ysr@777: // find what the lowest finger is among the global and local fingers ysr@777: _min_finger = _finger; ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: CMTask* task = _tasks[i]; ysr@777: HeapWord* task_finger = task->finger(); ysr@777: if (task_finger != NULL && task_finger < _min_finger) ysr@777: _min_finger = task_finger; ysr@777: } ysr@777: ysr@777: _should_gray_objects = false; ysr@777: ysr@777: // This fixes a very subtle and fustrating bug. It might be the case ysr@777: // that, during en evacuation pause, heap regions that contain ysr@777: // objects that are gray (by being in regions contained in the ysr@777: // region stack) are included in the collection set. Since such gray ysr@777: // objects will be moved, and because it's not easy to redirect ysr@777: // region stack entries to point to a new location (because objects ysr@777: // in one region might be scattered to multiple regions after they ysr@777: // are copied), one option is to ensure that all marked objects ysr@777: // copied during a pause are pushed on the stack. Notice, however, ysr@777: // that this problem can only happen when the region stack is not ysr@777: // empty during an evacuation pause. So, we make the fix a bit less ysr@777: // conservative and ensure that regions are pushed on the stack, ysr@777: // irrespective whether all collection set regions are below the ysr@777: // finger, if the region stack is not empty. This is expected to be ysr@777: // a rare case, so I don't think it's necessary to be smarted about it. ysr@777: if (!region_stack_empty()) ysr@777: _should_gray_objects = true; ysr@777: } ysr@777: ysr@777: void ConcurrentMark::registerCSetRegion(HeapRegion* hr) { ysr@777: if (!concurrent_marking_in_progress()) ysr@777: return; ysr@777: ysr@777: HeapWord* region_end = hr->end(); ysr@777: if (region_end > _min_finger) ysr@777: _should_gray_objects = true; ysr@777: } ysr@777: ysr@777: void ConcurrentMark::disable_co_trackers() { ysr@777: if (has_aborted()) { ysr@777: if (_cleanup_co_tracker.enabled()) ysr@777: _cleanup_co_tracker.disable(); ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: CMTask* task = _tasks[i]; ysr@777: if (task->co_tracker_enabled()) ysr@777: task->disable_co_tracker(); ysr@777: } ysr@777: } else { ysr@777: guarantee( !_cleanup_co_tracker.enabled(), "invariant" ); ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) { ysr@777: CMTask* task = _tasks[i]; ysr@777: guarantee( !task->co_tracker_enabled(), "invariant" ); ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: // abandon current marking iteration due to a Full GC ysr@777: void ConcurrentMark::abort() { ysr@777: // If we're not marking, nothing to do. ysr@777: if (!G1ConcMark) return; ysr@777: ysr@777: // Clear all marks to force marking thread to do nothing ysr@777: _nextMarkBitMap->clearAll(); ysr@777: // Empty mark stack ysr@777: clear_marking_state(); ysr@777: for (int i = 0; i < (int)_max_task_num; ++i) ysr@777: _tasks[i]->clear_region_fields(); ysr@777: _has_aborted = true; ysr@777: ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: satb_mq_set.abandon_partial_marking(); ysr@777: satb_mq_set.set_active_all_threads(false); ysr@777: } ysr@777: ysr@777: static void print_ms_time_info(const char* prefix, const char* name, ysr@777: NumberSeq& ns) { ysr@777: gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).", ysr@777: prefix, ns.num(), name, ns.sum()/1000.0, ns.avg()); ysr@777: if (ns.num() > 0) { ysr@777: gclog_or_tty->print_cr("%s [std. dev = %8.2f ms, max = %8.2f ms]", ysr@777: prefix, ns.sd(), ns.maximum()); ysr@777: } ysr@777: } ysr@777: ysr@777: void ConcurrentMark::print_summary_info() { ysr@777: gclog_or_tty->print_cr(" Concurrent marking:"); ysr@777: print_ms_time_info(" ", "init marks", _init_times); ysr@777: print_ms_time_info(" ", "remarks", _remark_times); ysr@777: { ysr@777: print_ms_time_info(" ", "final marks", _remark_mark_times); ysr@777: print_ms_time_info(" ", "weak refs", _remark_weak_ref_times); ysr@777: ysr@777: } ysr@777: print_ms_time_info(" ", "cleanups", _cleanup_times); ysr@777: gclog_or_tty->print_cr(" Final counting total time = %8.2f s (avg = %8.2f ms).", ysr@777: _total_counting_time, ysr@777: (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / ysr@777: (double)_cleanup_times.num() ysr@777: : 0.0)); ysr@777: if (G1ScrubRemSets) { ysr@777: gclog_or_tty->print_cr(" RS scrub total time = %8.2f s (avg = %8.2f ms).", ysr@777: _total_rs_scrub_time, ysr@777: (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / ysr@777: (double)_cleanup_times.num() ysr@777: : 0.0)); ysr@777: } ysr@777: gclog_or_tty->print_cr(" Total stop_world time = %8.2f s.", ysr@777: (_init_times.sum() + _remark_times.sum() + ysr@777: _cleanup_times.sum())/1000.0); ysr@777: gclog_or_tty->print_cr(" Total concurrent time = %8.2f s " ysr@777: "(%8.2f s marking, %8.2f s counting).", ysr@777: cmThread()->vtime_accum(), ysr@777: cmThread()->vtime_mark_accum(), ysr@777: cmThread()->vtime_count_accum()); ysr@777: } ysr@777: ysr@777: // Closures ysr@777: // XXX: there seems to be a lot of code duplication here; ysr@777: // should refactor and consolidate the shared code. ysr@777: ysr@777: // This closure is used to mark refs into the CMS generation in ysr@777: // the CMS bit map. Called at the first checkpoint. ysr@777: ysr@777: // We take a break if someone is trying to stop the world. ysr@777: bool ConcurrentMark::do_yield_check(int worker_i) { ysr@777: if (should_yield()) { ysr@777: if (worker_i == 0) ysr@777: _g1h->g1_policy()->record_concurrent_pause(); ysr@777: cmThread()->yield(); ysr@777: if (worker_i == 0) ysr@777: _g1h->g1_policy()->record_concurrent_pause_end(); ysr@777: return true; ysr@777: } else { ysr@777: return false; ysr@777: } ysr@777: } ysr@777: ysr@777: bool ConcurrentMark::should_yield() { ysr@777: return cmThread()->should_yield(); ysr@777: } ysr@777: ysr@777: bool ConcurrentMark::containing_card_is_marked(void* p) { ysr@777: size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1); ysr@777: return _card_bm.at(offset >> CardTableModRefBS::card_shift); ysr@777: } ysr@777: ysr@777: bool ConcurrentMark::containing_cards_are_marked(void* start, ysr@777: void* last) { ysr@777: return ysr@777: containing_card_is_marked(start) && ysr@777: containing_card_is_marked(last); ysr@777: } ysr@777: ysr@777: #ifndef PRODUCT ysr@777: // for debugging purposes ysr@777: void ConcurrentMark::print_finger() { ysr@777: gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT, ysr@777: _heap_start, _heap_end, _finger); ysr@777: for (int i = 0; i < (int) _max_task_num; ++i) { ysr@777: gclog_or_tty->print(" %d: "PTR_FORMAT, i, _tasks[i]->finger()); ysr@777: } ysr@777: gclog_or_tty->print_cr(""); ysr@777: } ysr@777: #endif ysr@777: ysr@777: // Closure for iteration over bitmaps ysr@777: class CMBitMapClosure : public BitMapClosure { ysr@777: private: ysr@777: // the bitmap that is being iterated over ysr@777: CMBitMap* _nextMarkBitMap; ysr@777: ConcurrentMark* _cm; ysr@777: CMTask* _task; ysr@777: // true if we're scanning a heap region claimed by the task (so that ysr@777: // we move the finger along), false if we're not, i.e. currently when ysr@777: // scanning a heap region popped from the region stack (so that we ysr@777: // do not move the task finger along; it'd be a mistake if we did so). ysr@777: bool _scanning_heap_region; ysr@777: ysr@777: public: ysr@777: CMBitMapClosure(CMTask *task, ysr@777: ConcurrentMark* cm, ysr@777: CMBitMap* nextMarkBitMap) ysr@777: : _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { } ysr@777: ysr@777: void set_scanning_heap_region(bool scanning_heap_region) { ysr@777: _scanning_heap_region = scanning_heap_region; ysr@777: } ysr@777: ysr@777: bool do_bit(size_t offset) { ysr@777: HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset); ysr@777: tmp_guarantee_CM( _nextMarkBitMap->isMarked(addr), "invariant" ); ysr@777: tmp_guarantee_CM( addr < _cm->finger(), "invariant" ); ysr@777: ysr@777: if (_scanning_heap_region) { ysr@777: statsOnly( _task->increase_objs_found_on_bitmap() ); ysr@777: tmp_guarantee_CM( addr >= _task->finger(), "invariant" ); ysr@777: // We move that task's local finger along. ysr@777: _task->move_finger_to(addr); ysr@777: } else { ysr@777: // We move the task's region finger along. ysr@777: _task->move_region_finger_to(addr); ysr@777: } ysr@777: ysr@777: _task->scan_object(oop(addr)); ysr@777: // we only partially drain the local queue and global stack ysr@777: _task->drain_local_queue(true); ysr@777: _task->drain_global_stack(true); ysr@777: ysr@777: // if the has_aborted flag has been raised, we need to bail out of ysr@777: // the iteration ysr@777: return !_task->has_aborted(); ysr@777: } ysr@777: }; ysr@777: ysr@777: // Closure for iterating over objects, currently only used for ysr@777: // processing SATB buffers. ysr@777: class CMObjectClosure : public ObjectClosure { ysr@777: private: ysr@777: CMTask* _task; ysr@777: ysr@777: public: ysr@777: void do_object(oop obj) { ysr@777: _task->deal_with_reference(obj); ysr@777: } ysr@777: ysr@777: CMObjectClosure(CMTask* task) : _task(task) { } ysr@777: }; ysr@777: ysr@777: // Closure for iterating over object fields ysr@777: class CMOopClosure : public OopClosure { ysr@777: private: ysr@777: G1CollectedHeap* _g1h; ysr@777: ConcurrentMark* _cm; ysr@777: CMTask* _task; ysr@777: ysr@777: public: ysr@777: void do_oop(narrowOop* p) { ysr@777: guarantee(false, "NYI"); ysr@777: } ysr@777: ysr@777: void do_oop(oop* p) { ysr@777: tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant" ); ysr@777: ysr@777: oop obj = *p; ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] we're looking at location " ysr@777: "*"PTR_FORMAT" = "PTR_FORMAT, ysr@777: _task->task_id(), p, (void*) obj); ysr@777: _task->deal_with_reference(obj); ysr@777: } ysr@777: ysr@777: CMOopClosure(G1CollectedHeap* g1h, ysr@777: ConcurrentMark* cm, ysr@777: CMTask* task) ysr@777: : _g1h(g1h), _cm(cm), _task(task) { } ysr@777: }; ysr@777: ysr@777: void CMTask::setup_for_region(HeapRegion* hr) { ysr@777: tmp_guarantee_CM( hr != NULL && !hr->continuesHumongous(), ysr@777: "claim_region() should have filtered out continues humongous regions" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT, ysr@777: _task_id, hr); ysr@777: ysr@777: _curr_region = hr; ysr@777: _finger = hr->bottom(); ysr@777: update_region_limit(); ysr@777: } ysr@777: ysr@777: void CMTask::update_region_limit() { ysr@777: HeapRegion* hr = _curr_region; ysr@777: HeapWord* bottom = hr->bottom(); ysr@777: HeapWord* limit = hr->next_top_at_mark_start(); ysr@777: ysr@777: if (limit == bottom) { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] found an empty region " ysr@777: "["PTR_FORMAT", "PTR_FORMAT")", ysr@777: _task_id, bottom, limit); ysr@777: // The region was collected underneath our feet. ysr@777: // We set the finger to bottom to ensure that the bitmap ysr@777: // iteration that will follow this will not do anything. ysr@777: // (this is not a condition that holds when we set the region up, ysr@777: // as the region is not supposed to be empty in the first place) ysr@777: _finger = bottom; ysr@777: } else if (limit >= _region_limit) { ysr@777: tmp_guarantee_CM( limit >= _finger, "peace of mind" ); ysr@777: } else { ysr@777: tmp_guarantee_CM( limit < _region_limit, "only way to get here" ); ysr@777: // This can happen under some pretty unusual circumstances. An ysr@777: // evacuation pause empties the region underneath our feet (NTAMS ysr@777: // at bottom). We then do some allocation in the region (NTAMS ysr@777: // stays at bottom), followed by the region being used as a GC ysr@777: // alloc region (NTAMS will move to top() and the objects ysr@777: // originally below it will be grayed). All objects now marked in ysr@777: // the region are explicitly grayed, if below the global finger, ysr@777: // and we do not need in fact to scan anything else. So, we simply ysr@777: // set _finger to be limit to ensure that the bitmap iteration ysr@777: // doesn't do anything. ysr@777: _finger = limit; ysr@777: } ysr@777: ysr@777: _region_limit = limit; ysr@777: } ysr@777: ysr@777: void CMTask::giveup_current_region() { ysr@777: tmp_guarantee_CM( _curr_region != NULL, "invariant" ); ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT, ysr@777: _task_id, _curr_region); ysr@777: clear_region_fields(); ysr@777: } ysr@777: ysr@777: void CMTask::clear_region_fields() { ysr@777: // Values for these three fields that indicate that we're not ysr@777: // holding on to a region. ysr@777: _curr_region = NULL; ysr@777: _finger = NULL; ysr@777: _region_limit = NULL; ysr@777: ysr@777: _region_finger = NULL; ysr@777: } ysr@777: ysr@777: void CMTask::reset(CMBitMap* nextMarkBitMap) { ysr@777: guarantee( nextMarkBitMap != NULL, "invariant" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] resetting", _task_id); ysr@777: ysr@777: _nextMarkBitMap = nextMarkBitMap; ysr@777: clear_region_fields(); ysr@777: ysr@777: _calls = 0; ysr@777: _elapsed_time_ms = 0.0; ysr@777: _termination_time_ms = 0.0; ysr@777: _termination_start_time_ms = 0.0; ysr@777: ysr@777: #if _MARKING_STATS_ ysr@777: _local_pushes = 0; ysr@777: _local_pops = 0; ysr@777: _local_max_size = 0; ysr@777: _objs_scanned = 0; ysr@777: _global_pushes = 0; ysr@777: _global_pops = 0; ysr@777: _global_max_size = 0; ysr@777: _global_transfers_to = 0; ysr@777: _global_transfers_from = 0; ysr@777: _region_stack_pops = 0; ysr@777: _regions_claimed = 0; ysr@777: _objs_found_on_bitmap = 0; ysr@777: _satb_buffers_processed = 0; ysr@777: _steal_attempts = 0; ysr@777: _steals = 0; ysr@777: _aborted = 0; ysr@777: _aborted_overflow = 0; ysr@777: _aborted_cm_aborted = 0; ysr@777: _aborted_yield = 0; ysr@777: _aborted_timed_out = 0; ysr@777: _aborted_satb = 0; ysr@777: _aborted_termination = 0; ysr@777: #endif // _MARKING_STATS_ ysr@777: } ysr@777: ysr@777: bool CMTask::should_exit_termination() { ysr@777: regular_clock_call(); ysr@777: // This is called when we are in the termination protocol. We should ysr@777: // quit if, for some reason, this task wants to abort or the global ysr@777: // stack is not empty (this means that we can get work from it). ysr@777: return !_cm->mark_stack_empty() || has_aborted(); ysr@777: } ysr@777: ysr@777: // This determines whether the method below will check both the local ysr@777: // and global fingers when determining whether to push on the stack a ysr@777: // gray object (value 1) or whether it will only check the global one ysr@777: // (value 0). The tradeoffs are that the former will be a bit more ysr@777: // accurate and possibly push less on the stack, but it might also be ysr@777: // a little bit slower. ysr@777: ysr@777: #define _CHECK_BOTH_FINGERS_ 1 ysr@777: ysr@777: void CMTask::deal_with_reference(oop obj) { ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT, ysr@777: _task_id, (void*) obj); ysr@777: ysr@777: ++_refs_reached; ysr@777: ysr@777: HeapWord* objAddr = (HeapWord*) obj; ysr@777: if (_g1h->is_in_g1_reserved(objAddr)) { ysr@777: tmp_guarantee_CM( obj != NULL, "is_in_g1_reserved should ensure this" ); ysr@777: HeapRegion* hr = _g1h->heap_region_containing(obj); ysr@777: if (_g1h->is_obj_ill(obj, hr)) { ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked", ysr@777: _task_id, (void*) obj); ysr@777: ysr@777: // we need to mark it first ysr@777: if (_nextMarkBitMap->parMark(objAddr)) { ysr@777: // No OrderAccess:store_load() is needed. It is implicit in the ysr@777: // CAS done in parMark(objAddr) above ysr@777: HeapWord* global_finger = _cm->finger(); ysr@777: ysr@777: #if _CHECK_BOTH_FINGERS_ ysr@777: // we will check both the local and global fingers ysr@777: ysr@777: if (_finger != NULL && objAddr < _finger) { ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), " ysr@777: "pushing it", _task_id, _finger); ysr@777: push(obj); ysr@777: } else if (_curr_region != NULL && objAddr < _region_limit) { ysr@777: // do nothing ysr@777: } else if (objAddr < global_finger) { ysr@777: // Notice that the global finger might be moving forward ysr@777: // concurrently. This is not a problem. In the worst case, we ysr@777: // mark the object while it is above the global finger and, by ysr@777: // the time we read the global finger, it has moved forward ysr@777: // passed this object. In this case, the object will probably ysr@777: // be visited when a task is scanning the region and will also ysr@777: // be pushed on the stack. So, some duplicate work, but no ysr@777: // correctness problems. ysr@777: ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] below the global finger " ysr@777: "("PTR_FORMAT"), pushing it", ysr@777: _task_id, global_finger); ysr@777: push(obj); ysr@777: } else { ysr@777: // do nothing ysr@777: } ysr@777: #else // _CHECK_BOTH_FINGERS_ ysr@777: // we will only check the global finger ysr@777: ysr@777: if (objAddr < global_finger) { ysr@777: // see long comment above ysr@777: ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] below the global finger " ysr@777: "("PTR_FORMAT"), pushing it", ysr@777: _task_id, global_finger); ysr@777: push(obj); ysr@777: } ysr@777: #endif // _CHECK_BOTH_FINGERS_ ysr@777: } ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: void CMTask::push(oop obj) { ysr@777: HeapWord* objAddr = (HeapWord*) obj; ysr@777: tmp_guarantee_CM( _g1h->is_in_g1_reserved(objAddr), "invariant" ); ysr@777: tmp_guarantee_CM( !_g1h->is_obj_ill(obj), "invariant" ); ysr@777: tmp_guarantee_CM( _nextMarkBitMap->isMarked(objAddr), "invariant" ); ysr@777: ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj); ysr@777: ysr@777: if (!_task_queue->push(obj)) { ysr@777: // The local task queue looks full. We need to push some entries ysr@777: // to the global stack. ysr@777: ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] task queue overflow, " ysr@777: "moving entries to the global stack", ysr@777: _task_id); ysr@777: move_entries_to_global_stack(); ysr@777: ysr@777: // this should succeed since, even if we overflow the global ysr@777: // stack, we should have definitely removed some entries from the ysr@777: // local queue. So, there must be space on it. ysr@777: bool success = _task_queue->push(obj); ysr@777: tmp_guarantee_CM( success, "invariant" ); ysr@777: } ysr@777: ysr@777: statsOnly( int tmp_size = _task_queue->size(); ysr@777: if (tmp_size > _local_max_size) ysr@777: _local_max_size = tmp_size; ysr@777: ++_local_pushes ); ysr@777: } ysr@777: ysr@777: void CMTask::reached_limit() { ysr@777: tmp_guarantee_CM( _words_scanned >= _words_scanned_limit || ysr@777: _refs_reached >= _refs_reached_limit , ysr@777: "shouldn't have been called otherwise" ); ysr@777: regular_clock_call(); ysr@777: } ysr@777: ysr@777: void CMTask::regular_clock_call() { ysr@777: if (has_aborted()) ysr@777: return; ysr@777: ysr@777: // First, we need to recalculate the words scanned and refs reached ysr@777: // limits for the next clock call. ysr@777: recalculate_limits(); ysr@777: ysr@777: // During the regular clock call we do the following ysr@777: ysr@777: // (1) If an overflow has been flagged, then we abort. ysr@777: if (_cm->has_overflown()) { ysr@777: set_has_aborted(); ysr@777: return; ysr@777: } ysr@777: ysr@777: // If we are not concurrent (i.e. we're doing remark) we don't need ysr@777: // to check anything else. The other steps are only needed during ysr@777: // the concurrent marking phase. ysr@777: if (!concurrent()) ysr@777: return; ysr@777: ysr@777: // (2) If marking has been aborted for Full GC, then we also abort. ysr@777: if (_cm->has_aborted()) { ysr@777: set_has_aborted(); ysr@777: statsOnly( ++_aborted_cm_aborted ); ysr@777: return; ysr@777: } ysr@777: ysr@777: double curr_time_ms = os::elapsedVTime() * 1000.0; ysr@777: ysr@777: // (3) If marking stats are enabled, then we update the step history. ysr@777: #if _MARKING_STATS_ ysr@777: if (_words_scanned >= _words_scanned_limit) ysr@777: ++_clock_due_to_scanning; ysr@777: if (_refs_reached >= _refs_reached_limit) ysr@777: ++_clock_due_to_marking; ysr@777: ysr@777: double last_interval_ms = curr_time_ms - _interval_start_time_ms; ysr@777: _interval_start_time_ms = curr_time_ms; ysr@777: _all_clock_intervals_ms.add(last_interval_ms); ysr@777: ysr@777: if (_cm->verbose_medium()) { ysr@777: gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, " ysr@777: "scanned = %d%s, refs reached = %d%s", ysr@777: _task_id, last_interval_ms, ysr@777: _words_scanned, ysr@777: (_words_scanned >= _words_scanned_limit) ? " (*)" : "", ysr@777: _refs_reached, ysr@777: (_refs_reached >= _refs_reached_limit) ? " (*)" : ""); ysr@777: } ysr@777: #endif // _MARKING_STATS_ ysr@777: ysr@777: // (4) We check whether we should yield. If we have to, then we abort. ysr@777: if (_cm->should_yield()) { ysr@777: // We should yield. To do this we abort the task. The caller is ysr@777: // responsible for yielding. ysr@777: set_has_aborted(); ysr@777: statsOnly( ++_aborted_yield ); ysr@777: return; ysr@777: } ysr@777: ysr@777: // (5) We check whether we've reached our time quota. If we have, ysr@777: // then we abort. ysr@777: double elapsed_time_ms = curr_time_ms - _start_time_ms; ysr@777: if (elapsed_time_ms > _time_target_ms) { ysr@777: set_has_aborted(); ysr@777: _has_aborted_timed_out = true; ysr@777: statsOnly( ++_aborted_timed_out ); ysr@777: return; ysr@777: } ysr@777: ysr@777: // (6) Finally, we check whether there are enough completed STAB ysr@777: // buffers available for processing. If there are, we abort. ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers", ysr@777: _task_id); ysr@777: // we do need to process SATB buffers, we'll abort and restart ysr@777: // the marking task to do so ysr@777: set_has_aborted(); ysr@777: statsOnly( ++_aborted_satb ); ysr@777: return; ysr@777: } ysr@777: } ysr@777: ysr@777: void CMTask::recalculate_limits() { ysr@777: _real_words_scanned_limit = _words_scanned + words_scanned_period; ysr@777: _words_scanned_limit = _real_words_scanned_limit; ysr@777: ysr@777: _real_refs_reached_limit = _refs_reached + refs_reached_period; ysr@777: _refs_reached_limit = _real_refs_reached_limit; ysr@777: } ysr@777: ysr@777: void CMTask::decrease_limits() { ysr@777: // This is called when we believe that we're going to do an infrequent ysr@777: // operation which will increase the per byte scanned cost (i.e. move ysr@777: // entries to/from the global stack). It basically tries to decrease the ysr@777: // scanning limit so that the clock is called earlier. ysr@777: ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] decreasing limits", _task_id); ysr@777: ysr@777: _words_scanned_limit = _real_words_scanned_limit - ysr@777: 3 * words_scanned_period / 4; ysr@777: _refs_reached_limit = _real_refs_reached_limit - ysr@777: 3 * refs_reached_period / 4; ysr@777: } ysr@777: ysr@777: void CMTask::move_entries_to_global_stack() { ysr@777: // local array where we'll store the entries that will be popped ysr@777: // from the local queue ysr@777: oop buffer[global_stack_transfer_size]; ysr@777: ysr@777: int n = 0; ysr@777: oop obj; ysr@777: while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) { ysr@777: buffer[n] = obj; ysr@777: ++n; ysr@777: } ysr@777: ysr@777: if (n > 0) { ysr@777: // we popped at least one entry from the local queue ysr@777: ysr@777: statsOnly( ++_global_transfers_to; _local_pops += n ); ysr@777: ysr@777: if (!_cm->mark_stack_push(buffer, n)) { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id); ysr@777: set_has_aborted(); ysr@777: } else { ysr@777: // the transfer was successful ysr@777: ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack", ysr@777: _task_id, n); ysr@777: statsOnly( int tmp_size = _cm->mark_stack_size(); ysr@777: if (tmp_size > _global_max_size) ysr@777: _global_max_size = tmp_size; ysr@777: _global_pushes += n ); ysr@777: } ysr@777: } ysr@777: ysr@777: // this operation was quite expensive, so decrease the limits ysr@777: decrease_limits(); ysr@777: } ysr@777: ysr@777: void CMTask::get_entries_from_global_stack() { ysr@777: // local array where we'll store the entries that will be popped ysr@777: // from the global stack. ysr@777: oop buffer[global_stack_transfer_size]; ysr@777: int n; ysr@777: _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n); ysr@777: tmp_guarantee_CM( n <= global_stack_transfer_size, ysr@777: "we should not pop more than the given limit" ); ysr@777: if (n > 0) { ysr@777: // yes, we did actually pop at least one entry ysr@777: ysr@777: statsOnly( ++_global_transfers_from; _global_pops += n ); ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] popped %d entries from the global stack", ysr@777: _task_id, n); ysr@777: for (int i = 0; i < n; ++i) { ysr@777: bool success = _task_queue->push(buffer[i]); ysr@777: // We only call this when the local queue is empty or under a ysr@777: // given target limit. So, we do not expect this push to fail. ysr@777: tmp_guarantee_CM( success, "invariant" ); ysr@777: } ysr@777: ysr@777: statsOnly( int tmp_size = _task_queue->size(); ysr@777: if (tmp_size > _local_max_size) ysr@777: _local_max_size = tmp_size; ysr@777: _local_pushes += n ); ysr@777: } ysr@777: ysr@777: // this operation was quite expensive, so decrease the limits ysr@777: decrease_limits(); ysr@777: } ysr@777: ysr@777: void CMTask::drain_local_queue(bool partially) { ysr@777: if (has_aborted()) ysr@777: return; ysr@777: ysr@777: // Decide what the target size is, depending whether we're going to ysr@777: // drain it partially (so that other tasks can steal if they run out ysr@777: // of things to do) or totally (at the very end). ysr@777: size_t target_size; ysr@777: if (partially) ysr@777: target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize); ysr@777: else ysr@777: target_size = 0; ysr@777: ysr@777: if (_task_queue->size() > target_size) { ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] draining local queue, target size = %d", ysr@777: _task_id, target_size); ysr@777: ysr@777: oop obj; ysr@777: bool ret = _task_queue->pop_local(obj); ysr@777: while (ret) { ysr@777: statsOnly( ++_local_pops ); ysr@777: ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id, ysr@777: (void*) obj); ysr@777: ysr@777: tmp_guarantee_CM( _g1h->is_in_g1_reserved((HeapWord*) obj), ysr@777: "invariant" ); ysr@777: ysr@777: scan_object(obj); ysr@777: ysr@777: if (_task_queue->size() <= target_size || has_aborted()) ysr@777: ret = false; ysr@777: else ysr@777: ret = _task_queue->pop_local(obj); ysr@777: } ysr@777: ysr@777: if (_cm->verbose_high()) ysr@777: gclog_or_tty->print_cr("[%d] drained local queue, size = %d", ysr@777: _task_id, _task_queue->size()); ysr@777: } ysr@777: } ysr@777: ysr@777: void CMTask::drain_global_stack(bool partially) { ysr@777: if (has_aborted()) ysr@777: return; ysr@777: ysr@777: // We have a policy to drain the local queue before we attempt to ysr@777: // drain the global stack. ysr@777: tmp_guarantee_CM( partially || _task_queue->size() == 0, "invariant" ); ysr@777: ysr@777: // Decide what the target size is, depending whether we're going to ysr@777: // drain it partially (so that other tasks can steal if they run out ysr@777: // of things to do) or totally (at the very end). Notice that, ysr@777: // because we move entries from the global stack in chunks or ysr@777: // because another task might be doing the same, we might in fact ysr@777: // drop below the target. But, this is not a problem. ysr@777: size_t target_size; ysr@777: if (partially) ysr@777: target_size = _cm->partial_mark_stack_size_target(); ysr@777: else ysr@777: target_size = 0; ysr@777: ysr@777: if (_cm->mark_stack_size() > target_size) { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] draining global_stack, target size %d", ysr@777: _task_id, target_size); ysr@777: ysr@777: while (!has_aborted() && _cm->mark_stack_size() > target_size) { ysr@777: get_entries_from_global_stack(); ysr@777: drain_local_queue(partially); ysr@777: } ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] drained global stack, size = %d", ysr@777: _task_id, _cm->mark_stack_size()); ysr@777: } ysr@777: } ysr@777: ysr@777: // SATB Queue has several assumptions on whether to call the par or ysr@777: // non-par versions of the methods. this is why some of the code is ysr@777: // replicated. We should really get rid of the single-threaded version ysr@777: // of the code to simplify things. ysr@777: void CMTask::drain_satb_buffers() { ysr@777: if (has_aborted()) ysr@777: return; ysr@777: ysr@777: // We set this so that the regular clock knows that we're in the ysr@777: // middle of draining buffers and doesn't set the abort flag when it ysr@777: // notices that SATB buffers are available for draining. It'd be ysr@777: // very counter productive if it did that. :-) ysr@777: _draining_satb_buffers = true; ysr@777: ysr@777: CMObjectClosure oc(this); ysr@777: SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set(); ysr@777: if (ParallelGCThreads > 0) ysr@777: satb_mq_set.set_par_closure(_task_id, &oc); ysr@777: else ysr@777: satb_mq_set.set_closure(&oc); ysr@777: ysr@777: // This keeps claiming and applying the closure to completed buffers ysr@777: // until we run out of buffers or we need to abort. ysr@777: if (ParallelGCThreads > 0) { ysr@777: while (!has_aborted() && ysr@777: satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) { ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); ysr@777: statsOnly( ++_satb_buffers_processed ); ysr@777: regular_clock_call(); ysr@777: } ysr@777: } else { ysr@777: while (!has_aborted() && ysr@777: satb_mq_set.apply_closure_to_completed_buffer()) { ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id); ysr@777: statsOnly( ++_satb_buffers_processed ); ysr@777: regular_clock_call(); ysr@777: } ysr@777: } ysr@777: ysr@777: if (!concurrent() && !has_aborted()) { ysr@777: // We should only do this during remark. ysr@777: if (ParallelGCThreads > 0) ysr@777: satb_mq_set.par_iterate_closure_all_threads(_task_id); ysr@777: else ysr@777: satb_mq_set.iterate_closure_all_threads(); ysr@777: } ysr@777: ysr@777: _draining_satb_buffers = false; ysr@777: ysr@777: tmp_guarantee_CM( has_aborted() || ysr@777: concurrent() || ysr@777: satb_mq_set.completed_buffers_num() == 0, "invariant" ); ysr@777: ysr@777: if (ParallelGCThreads > 0) ysr@777: satb_mq_set.set_par_closure(_task_id, NULL); ysr@777: else ysr@777: satb_mq_set.set_closure(NULL); ysr@777: ysr@777: // again, this was a potentially expensive operation, decrease the ysr@777: // limits to get the regular clock call early ysr@777: decrease_limits(); ysr@777: } ysr@777: ysr@777: void CMTask::drain_region_stack(BitMapClosure* bc) { ysr@777: if (has_aborted()) ysr@777: return; ysr@777: ysr@777: tmp_guarantee_CM( _region_finger == NULL, ysr@777: "it should be NULL when we're not scanning a region" ); ysr@777: ysr@777: if (!_cm->region_stack_empty()) { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] draining region stack, size = %d", ysr@777: _task_id, _cm->region_stack_size()); ysr@777: ysr@777: MemRegion mr = _cm->region_stack_pop(); ysr@777: // it returns MemRegion() if the pop fails ysr@777: statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); ysr@777: ysr@777: while (mr.start() != NULL) { ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] we are scanning region " ysr@777: "["PTR_FORMAT", "PTR_FORMAT")", ysr@777: _task_id, mr.start(), mr.end()); ysr@777: tmp_guarantee_CM( mr.end() <= _cm->finger(), ysr@777: "otherwise the region shouldn't be on the stack" ); ysr@777: assert(!mr.is_empty(), "Only non-empty regions live on the region stack"); ysr@777: if (_nextMarkBitMap->iterate(bc, mr)) { ysr@777: tmp_guarantee_CM( !has_aborted(), ysr@777: "cannot abort the task without aborting the bitmap iteration" ); ysr@777: ysr@777: // We finished iterating over the region without aborting. ysr@777: regular_clock_call(); ysr@777: if (has_aborted()) ysr@777: mr = MemRegion(); ysr@777: else { ysr@777: mr = _cm->region_stack_pop(); ysr@777: // it returns MemRegion() if the pop fails ysr@777: statsOnly(if (mr.start() != NULL) ++_region_stack_pops ); ysr@777: } ysr@777: } else { ysr@777: guarantee( has_aborted(), "currently the only way to do so" ); ysr@777: ysr@777: // The only way to abort the bitmap iteration is to return ysr@777: // false from the do_bit() method. However, inside the ysr@777: // do_bit() method we move the _region_finger to point to the ysr@777: // object currently being looked at. So, if we bail out, we ysr@777: // have definitely set _region_finger to something non-null. ysr@777: guarantee( _region_finger != NULL, "invariant" ); ysr@777: ysr@777: // The iteration was actually aborted. So now _region_finger ysr@777: // points to the address of the object we last scanned. If we ysr@777: // leave it there, when we restart this task, we will rescan ysr@777: // the object. It is easy to avoid this. We move the finger by ysr@777: // enough to point to the next possible object header (the ysr@777: // bitmap knows by how much we need to move it as it knows its ysr@777: // granularity). ysr@777: MemRegion newRegion = ysr@777: MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end()); ysr@777: ysr@777: if (!newRegion.is_empty()) { ysr@777: if (_cm->verbose_low()) { ysr@777: gclog_or_tty->print_cr("[%d] pushing unscanned region" ysr@777: "[" PTR_FORMAT "," PTR_FORMAT ") on region stack", ysr@777: _task_id, ysr@777: newRegion.start(), newRegion.end()); ysr@777: } ysr@777: // Now push the part of the region we didn't scan on the ysr@777: // region stack to make sure a task scans it later. ysr@777: _cm->region_stack_push(newRegion); ysr@777: } ysr@777: // break from while ysr@777: mr = MemRegion(); ysr@777: } ysr@777: _region_finger = NULL; ysr@777: } ysr@777: ysr@777: // We only push regions on the region stack during evacuation ysr@777: // pauses. So if we come out the above iteration because we region ysr@777: // stack is empty, it will remain empty until the next yield ysr@777: // point. So, the guarantee below is safe. ysr@777: guarantee( has_aborted() || _cm->region_stack_empty(), ysr@777: "only way to exit the loop" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] drained region stack, size = %d", ysr@777: _task_id, _cm->region_stack_size()); ysr@777: } ysr@777: } ysr@777: ysr@777: void CMTask::print_stats() { ysr@777: gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d", ysr@777: _task_id, _calls); ysr@777: gclog_or_tty->print_cr(" Elapsed time = %1.2lfms, Termination time = %1.2lfms", ysr@777: _elapsed_time_ms, _termination_time_ms); ysr@777: gclog_or_tty->print_cr(" Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", ysr@777: _step_times_ms.num(), _step_times_ms.avg(), ysr@777: _step_times_ms.sd()); ysr@777: gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", ysr@777: _step_times_ms.maximum(), _step_times_ms.sum()); ysr@777: ysr@777: #if _MARKING_STATS_ ysr@777: gclog_or_tty->print_cr(" Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms", ysr@777: _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(), ysr@777: _all_clock_intervals_ms.sd()); ysr@777: gclog_or_tty->print_cr(" max = %1.2lfms, total = %1.2lfms", ysr@777: _all_clock_intervals_ms.maximum(), ysr@777: _all_clock_intervals_ms.sum()); ysr@777: gclog_or_tty->print_cr(" Clock Causes (cum): scanning = %d, marking = %d", ysr@777: _clock_due_to_scanning, _clock_due_to_marking); ysr@777: gclog_or_tty->print_cr(" Objects: scanned = %d, found on the bitmap = %d", ysr@777: _objs_scanned, _objs_found_on_bitmap); ysr@777: gclog_or_tty->print_cr(" Local Queue: pushes = %d, pops = %d, max size = %d", ysr@777: _local_pushes, _local_pops, _local_max_size); ysr@777: gclog_or_tty->print_cr(" Global Stack: pushes = %d, pops = %d, max size = %d", ysr@777: _global_pushes, _global_pops, _global_max_size); ysr@777: gclog_or_tty->print_cr(" transfers to = %d, transfers from = %d", ysr@777: _global_transfers_to,_global_transfers_from); ysr@777: gclog_or_tty->print_cr(" Regions: claimed = %d, Region Stack: pops = %d", ysr@777: _regions_claimed, _region_stack_pops); ysr@777: gclog_or_tty->print_cr(" SATB buffers: processed = %d", _satb_buffers_processed); ysr@777: gclog_or_tty->print_cr(" Steals: attempts = %d, successes = %d", ysr@777: _steal_attempts, _steals); ysr@777: gclog_or_tty->print_cr(" Aborted: %d, due to", _aborted); ysr@777: gclog_or_tty->print_cr(" overflow: %d, global abort: %d, yield: %d", ysr@777: _aborted_overflow, _aborted_cm_aborted, _aborted_yield); ysr@777: gclog_or_tty->print_cr(" time out: %d, SATB: %d, termination: %d", ysr@777: _aborted_timed_out, _aborted_satb, _aborted_termination); ysr@777: #endif // _MARKING_STATS_ ysr@777: } ysr@777: ysr@777: /***************************************************************************** ysr@777: ysr@777: The do_marking_step(time_target_ms) method is the building block ysr@777: of the parallel marking framework. It can be called in parallel ysr@777: with other invocations of do_marking_step() on different tasks ysr@777: (but only one per task, obviously) and concurrently with the ysr@777: mutator threads, or during remark, hence it eliminates the need ysr@777: for two versions of the code. When called during remark, it will ysr@777: pick up from where the task left off during the concurrent marking ysr@777: phase. Interestingly, tasks are also claimable during evacuation ysr@777: pauses too, since do_marking_step() ensures that it aborts before ysr@777: it needs to yield. ysr@777: ysr@777: The data structures that is uses to do marking work are the ysr@777: following: ysr@777: ysr@777: (1) Marking Bitmap. If there are gray objects that appear only ysr@777: on the bitmap (this happens either when dealing with an overflow ysr@777: or when the initial marking phase has simply marked the roots ysr@777: and didn't push them on the stack), then tasks claim heap ysr@777: regions whose bitmap they then scan to find gray objects. A ysr@777: global finger indicates where the end of the last claimed region ysr@777: is. A local finger indicates how far into the region a task has ysr@777: scanned. The two fingers are used to determine how to gray an ysr@777: object (i.e. whether simply marking it is OK, as it will be ysr@777: visited by a task in the future, or whether it needs to be also ysr@777: pushed on a stack). ysr@777: ysr@777: (2) Local Queue. The local queue of the task which is accessed ysr@777: reasonably efficiently by the task. Other tasks can steal from ysr@777: it when they run out of work. Throughout the marking phase, a ysr@777: task attempts to keep its local queue short but not totally ysr@777: empty, so that entries are available for stealing by other ysr@777: tasks. Only when there is no more work, a task will totally ysr@777: drain its local queue. ysr@777: ysr@777: (3) Global Mark Stack. This handles local queue overflow. During ysr@777: marking only sets of entries are moved between it and the local ysr@777: queues, as access to it requires a mutex and more fine-grain ysr@777: interaction with it which might cause contention. If it ysr@777: overflows, then the marking phase should restart and iterate ysr@777: over the bitmap to identify gray objects. Throughout the marking ysr@777: phase, tasks attempt to keep the global mark stack at a small ysr@777: length but not totally empty, so that entries are available for ysr@777: popping by other tasks. Only when there is no more work, tasks ysr@777: will totally drain the global mark stack. ysr@777: ysr@777: (4) Global Region Stack. Entries on it correspond to areas of ysr@777: the bitmap that need to be scanned since they contain gray ysr@777: objects. Pushes on the region stack only happen during ysr@777: evacuation pauses and typically correspond to areas covered by ysr@777: GC LABS. If it overflows, then the marking phase should restart ysr@777: and iterate over the bitmap to identify gray objects. Tasks will ysr@777: try to totally drain the region stack as soon as possible. ysr@777: ysr@777: (5) SATB Buffer Queue. This is where completed SATB buffers are ysr@777: made available. Buffers are regularly removed from this queue ysr@777: and scanned for roots, so that the queue doesn't get too ysr@777: long. During remark, all completed buffers are processed, as ysr@777: well as the filled in parts of any uncompleted buffers. ysr@777: ysr@777: The do_marking_step() method tries to abort when the time target ysr@777: has been reached. There are a few other cases when the ysr@777: do_marking_step() method also aborts: ysr@777: ysr@777: (1) When the marking phase has been aborted (after a Full GC). ysr@777: ysr@777: (2) When a global overflow (either on the global stack or the ysr@777: region stack) has been triggered. Before the task aborts, it ysr@777: will actually sync up with the other tasks to ensure that all ysr@777: the marking data structures (local queues, stacks, fingers etc.) ysr@777: are re-initialised so that when do_marking_step() completes, ysr@777: the marking phase can immediately restart. ysr@777: ysr@777: (3) When enough completed SATB buffers are available. The ysr@777: do_marking_step() method only tries to drain SATB buffers right ysr@777: at the beginning. So, if enough buffers are available, the ysr@777: marking step aborts and the SATB buffers are processed at ysr@777: the beginning of the next invocation. ysr@777: ysr@777: (4) To yield. when we have to yield then we abort and yield ysr@777: right at the end of do_marking_step(). This saves us from a lot ysr@777: of hassle as, by yielding we might allow a Full GC. If this ysr@777: happens then objects will be compacted underneath our feet, the ysr@777: heap might shrink, etc. We save checking for this by just ysr@777: aborting and doing the yield right at the end. ysr@777: ysr@777: From the above it follows that the do_marking_step() method should ysr@777: be called in a loop (or, otherwise, regularly) until it completes. ysr@777: ysr@777: If a marking step completes without its has_aborted() flag being ysr@777: true, it means it has completed the current marking phase (and ysr@777: also all other marking tasks have done so and have all synced up). ysr@777: ysr@777: A method called regular_clock_call() is invoked "regularly" (in ysr@777: sub ms intervals) throughout marking. It is this clock method that ysr@777: checks all the abort conditions which were mentioned above and ysr@777: decides when the task should abort. A work-based scheme is used to ysr@777: trigger this clock method: when the number of object words the ysr@777: marking phase has scanned or the number of references the marking ysr@777: phase has visited reach a given limit. Additional invocations to ysr@777: the method clock have been planted in a few other strategic places ysr@777: too. The initial reason for the clock method was to avoid calling ysr@777: vtime too regularly, as it is quite expensive. So, once it was in ysr@777: place, it was natural to piggy-back all the other conditions on it ysr@777: too and not constantly check them throughout the code. ysr@777: ysr@777: *****************************************************************************/ ysr@777: ysr@777: void CMTask::do_marking_step(double time_target_ms) { ysr@777: guarantee( time_target_ms >= 1.0, "minimum granularity is 1ms" ); ysr@777: guarantee( concurrent() == _cm->concurrent(), "they should be the same" ); ysr@777: ysr@777: guarantee( concurrent() || _cm->region_stack_empty(), ysr@777: "the region stack should have been cleared before remark" ); ysr@777: guarantee( _region_finger == NULL, ysr@777: "this should be non-null only when a region is being scanned" ); ysr@777: ysr@777: G1CollectorPolicy* g1_policy = _g1h->g1_policy(); ysr@777: guarantee( _task_queues != NULL, "invariant" ); ysr@777: guarantee( _task_queue != NULL, "invariant" ); ysr@777: guarantee( _task_queues->queue(_task_id) == _task_queue, "invariant" ); ysr@777: ysr@777: guarantee( !_claimed, ysr@777: "only one thread should claim this task at any one time" ); ysr@777: ysr@777: // OK, this doesn't safeguard again all possible scenarios, as it is ysr@777: // possible for two threads to set the _claimed flag at the same ysr@777: // time. But it is only for debugging purposes anyway and it will ysr@777: // catch most problems. ysr@777: _claimed = true; ysr@777: ysr@777: _start_time_ms = os::elapsedVTime() * 1000.0; ysr@777: statsOnly( _interval_start_time_ms = _start_time_ms ); ysr@777: ysr@777: double diff_prediction_ms = ysr@777: g1_policy->get_new_prediction(&_marking_step_diffs_ms); ysr@777: _time_target_ms = time_target_ms - diff_prediction_ms; ysr@777: ysr@777: // set up the variables that are used in the work-based scheme to ysr@777: // call the regular clock method ysr@777: _words_scanned = 0; ysr@777: _refs_reached = 0; ysr@777: recalculate_limits(); ysr@777: ysr@777: // clear all flags ysr@777: clear_has_aborted(); ysr@777: _has_aborted_timed_out = false; ysr@777: _draining_satb_buffers = false; ysr@777: ysr@777: ++_calls; ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, " ysr@777: "target = %1.2lfms >>>>>>>>>>", ysr@777: _task_id, _calls, _time_target_ms); ysr@777: ysr@777: // Set up the bitmap and oop closures. Anything that uses them is ysr@777: // eventually called from this method, so it is OK to allocate these ysr@777: // statically. ysr@777: CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap); ysr@777: CMOopClosure oop_closure(_g1h, _cm, this); ysr@777: set_oop_closure(&oop_closure); ysr@777: ysr@777: if (_cm->has_overflown()) { ysr@777: // This can happen if the region stack or the mark stack overflows ysr@777: // during a GC pause and this task, after a yield point, ysr@777: // restarts. We have to abort as we need to get into the overflow ysr@777: // protocol which happens right at the end of this task. ysr@777: set_has_aborted(); ysr@777: } ysr@777: ysr@777: // First drain any available SATB buffers. After this, we will not ysr@777: // look at SATB buffers before the next invocation of this method. ysr@777: // If enough completed SATB buffers are queued up, the regular clock ysr@777: // will abort this task so that it restarts. ysr@777: drain_satb_buffers(); ysr@777: // ...then partially drain the local queue and the global stack ysr@777: drain_local_queue(true); ysr@777: drain_global_stack(true); ysr@777: ysr@777: // Then totally drain the region stack. We will not look at ysr@777: // it again before the next invocation of this method. Entries on ysr@777: // the region stack are only added during evacuation pauses, for ysr@777: // which we have to yield. When we do, we abort the task anyway so ysr@777: // it will look at the region stack again when it restarts. ysr@777: bitmap_closure.set_scanning_heap_region(false); ysr@777: drain_region_stack(&bitmap_closure); ysr@777: // ...then partially drain the local queue and the global stack ysr@777: drain_local_queue(true); ysr@777: drain_global_stack(true); ysr@777: ysr@777: do { ysr@777: if (!has_aborted() && _curr_region != NULL) { ysr@777: // This means that we're already holding on to a region. ysr@777: tmp_guarantee_CM( _finger != NULL, ysr@777: "if region is not NULL, then the finger " ysr@777: "should not be NULL either" ); ysr@777: ysr@777: // We might have restarted this task after an evacuation pause ysr@777: // which might have evacuated the region we're holding on to ysr@777: // underneath our feet. Let's read its limit again to make sure ysr@777: // that we do not iterate over a region of the heap that ysr@777: // contains garbage (update_region_limit() will also move ysr@777: // _finger to the start of the region if it is found empty). ysr@777: update_region_limit(); ysr@777: // We will start from _finger not from the start of the region, ysr@777: // as we might be restarting this task after aborting half-way ysr@777: // through scanning this region. In this case, _finger points to ysr@777: // the address where we last found a marked object. If this is a ysr@777: // fresh region, _finger points to start(). ysr@777: MemRegion mr = MemRegion(_finger, _region_limit); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] we're scanning part " ysr@777: "["PTR_FORMAT", "PTR_FORMAT") " ysr@777: "of region "PTR_FORMAT, ysr@777: _task_id, _finger, _region_limit, _curr_region); ysr@777: ysr@777: // Let's iterate over the bitmap of the part of the ysr@777: // region that is left. ysr@777: bitmap_closure.set_scanning_heap_region(true); ysr@777: if (mr.is_empty() || ysr@777: _nextMarkBitMap->iterate(&bitmap_closure, mr)) { ysr@777: // We successfully completed iterating over the region. Now, ysr@777: // let's give up the region. ysr@777: giveup_current_region(); ysr@777: regular_clock_call(); ysr@777: } else { ysr@777: guarantee( has_aborted(), "currently the only way to do so" ); ysr@777: // The only way to abort the bitmap iteration is to return ysr@777: // false from the do_bit() method. However, inside the ysr@777: // do_bit() method we move the _finger to point to the ysr@777: // object currently being looked at. So, if we bail out, we ysr@777: // have definitely set _finger to something non-null. ysr@777: guarantee( _finger != NULL, "invariant" ); ysr@777: ysr@777: // Region iteration was actually aborted. So now _finger ysr@777: // points to the address of the object we last scanned. If we ysr@777: // leave it there, when we restart this task, we will rescan ysr@777: // the object. It is easy to avoid this. We move the finger by ysr@777: // enough to point to the next possible object header (the ysr@777: // bitmap knows by how much we need to move it as it knows its ysr@777: // granularity). ysr@777: move_finger_to(_nextMarkBitMap->nextWord(_finger)); ysr@777: } ysr@777: } ysr@777: // At this point we have either completed iterating over the ysr@777: // region we were holding on to, or we have aborted. ysr@777: ysr@777: // We then partially drain the local queue and the global stack. ysr@777: // (Do we really need this?) ysr@777: drain_local_queue(true); ysr@777: drain_global_stack(true); ysr@777: ysr@777: // Read the note on the claim_region() method on why it might ysr@777: // return NULL with potentially more regions available for ysr@777: // claiming and why we have to check out_of_regions() to determine ysr@777: // whether we're done or not. ysr@777: while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) { ysr@777: // We are going to try to claim a new region. We should have ysr@777: // given up on the previous one. ysr@777: tmp_guarantee_CM( _curr_region == NULL && ysr@777: _finger == NULL && ysr@777: _region_limit == NULL, "invariant" ); ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id); ysr@777: HeapRegion* claimed_region = _cm->claim_region(_task_id); ysr@777: if (claimed_region != NULL) { ysr@777: // Yes, we managed to claim one ysr@777: statsOnly( ++_regions_claimed ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] we successfully claimed " ysr@777: "region "PTR_FORMAT, ysr@777: _task_id, claimed_region); ysr@777: ysr@777: setup_for_region(claimed_region); ysr@777: tmp_guarantee_CM( _curr_region == claimed_region, "invariant" ); ysr@777: } ysr@777: // It is important to call the regular clock here. It might take ysr@777: // a while to claim a region if, for example, we hit a large ysr@777: // block of empty regions. So we need to call the regular clock ysr@777: // method once round the loop to make sure it's called ysr@777: // frequently enough. ysr@777: regular_clock_call(); ysr@777: } ysr@777: ysr@777: if (!has_aborted() && _curr_region == NULL) { ysr@777: tmp_guarantee_CM( _cm->out_of_regions(), ysr@777: "at this point we should be out of regions" ); ysr@777: } ysr@777: } while ( _curr_region != NULL && !has_aborted()); ysr@777: ysr@777: if (!has_aborted()) { ysr@777: // We cannot check whether the global stack is empty, since other iveresov@778: // tasks might be pushing objects to it concurrently. We also cannot iveresov@778: // check if the region stack is empty because if a thread is aborting iveresov@778: // it can push a partially done region back. iveresov@778: tmp_guarantee_CM( _cm->out_of_regions(), ysr@777: "at this point we should be out of regions" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] all regions claimed", _task_id); ysr@777: ysr@777: // Try to reduce the number of available SATB buffers so that ysr@777: // remark has less work to do. ysr@777: drain_satb_buffers(); ysr@777: } ysr@777: ysr@777: // Since we've done everything else, we can now totally drain the ysr@777: // local queue and global stack. ysr@777: drain_local_queue(false); ysr@777: drain_global_stack(false); ysr@777: ysr@777: // Attempt at work stealing from other task's queues. ysr@777: if (!has_aborted()) { ysr@777: // We have not aborted. This means that we have finished all that ysr@777: // we could. Let's try to do some stealing... ysr@777: ysr@777: // We cannot check whether the global stack is empty, since other iveresov@778: // tasks might be pushing objects to it concurrently. We also cannot iveresov@778: // check if the region stack is empty because if a thread is aborting iveresov@778: // it can push a partially done region back. ysr@777: guarantee( _cm->out_of_regions() && ysr@777: _task_queue->size() == 0, "only way to reach here" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] starting to steal", _task_id); ysr@777: ysr@777: while (!has_aborted()) { ysr@777: oop obj; ysr@777: statsOnly( ++_steal_attempts ); ysr@777: ysr@777: if (_cm->try_stealing(_task_id, &_hash_seed, obj)) { ysr@777: if (_cm->verbose_medium()) ysr@777: gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully", ysr@777: _task_id, (void*) obj); ysr@777: ysr@777: statsOnly( ++_steals ); ysr@777: ysr@777: tmp_guarantee_CM( _nextMarkBitMap->isMarked((HeapWord*) obj), ysr@777: "any stolen object should be marked" ); ysr@777: scan_object(obj); ysr@777: ysr@777: // And since we're towards the end, let's totally drain the ysr@777: // local queue and global stack. ysr@777: drain_local_queue(false); ysr@777: drain_global_stack(false); ysr@777: } else { ysr@777: break; ysr@777: } ysr@777: } ysr@777: } ysr@777: ysr@777: // We still haven't aborted. Now, let's try to get into the ysr@777: // termination protocol. ysr@777: if (!has_aborted()) { ysr@777: // We cannot check whether the global stack is empty, since other iveresov@778: // tasks might be concurrently pushing objects on it. We also cannot iveresov@778: // check if the region stack is empty because if a thread is aborting iveresov@778: // it can push a partially done region back. ysr@777: guarantee( _cm->out_of_regions() && ysr@777: _task_queue->size() == 0, "only way to reach here" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id); ysr@777: ysr@777: _termination_start_time_ms = os::elapsedVTime() * 1000.0; ysr@777: // The CMTask class also extends the TerminatorTerminator class, ysr@777: // hence its should_exit_termination() method will also decide ysr@777: // whether to exit the termination protocol or not. ysr@777: bool finished = _cm->terminator()->offer_termination(this); ysr@777: double termination_end_time_ms = os::elapsedVTime() * 1000.0; ysr@777: _termination_time_ms += ysr@777: termination_end_time_ms - _termination_start_time_ms; ysr@777: ysr@777: if (finished) { ysr@777: // We're all done. ysr@777: ysr@777: if (_task_id == 0) { ysr@777: // let's allow task 0 to do this ysr@777: if (concurrent()) { ysr@777: guarantee( _cm->concurrent_marking_in_progress(), "invariant" ); ysr@777: // we need to set this to false before the next ysr@777: // safepoint. This way we ensure that the marking phase ysr@777: // doesn't observe any more heap expansions. ysr@777: _cm->clear_concurrent_marking_in_progress(); ysr@777: } ysr@777: } ysr@777: ysr@777: // We can now guarantee that the global stack is empty, since ysr@777: // all other tasks have finished. ysr@777: guarantee( _cm->out_of_regions() && ysr@777: _cm->region_stack_empty() && ysr@777: _cm->mark_stack_empty() && ysr@777: _task_queue->size() == 0 && ysr@777: !_cm->has_overflown() && ysr@777: !_cm->mark_stack_overflow() && ysr@777: !_cm->region_stack_overflow(), ysr@777: "only way to reach here" ); ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id); ysr@777: } else { ysr@777: // Apparently there's more work to do. Let's abort this task. It ysr@777: // will restart it and we can hopefully find more things to do. ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id); ysr@777: ysr@777: set_has_aborted(); ysr@777: statsOnly( ++_aborted_termination ); ysr@777: } ysr@777: } ysr@777: ysr@777: // Mainly for debugging purposes to make sure that a pointer to the ysr@777: // closure which was statically allocated in this frame doesn't ysr@777: // escape it by accident. ysr@777: set_oop_closure(NULL); ysr@777: double end_time_ms = os::elapsedVTime() * 1000.0; ysr@777: double elapsed_time_ms = end_time_ms - _start_time_ms; ysr@777: // Update the step history. ysr@777: _step_times_ms.add(elapsed_time_ms); ysr@777: ysr@777: if (has_aborted()) { ysr@777: // The task was aborted for some reason. ysr@777: ysr@777: statsOnly( ++_aborted ); ysr@777: ysr@777: if (_has_aborted_timed_out) { ysr@777: double diff_ms = elapsed_time_ms - _time_target_ms; ysr@777: // Keep statistics of how well we did with respect to hitting ysr@777: // our target only if we actually timed out (if we aborted for ysr@777: // other reasons, then the results might get skewed). ysr@777: _marking_step_diffs_ms.add(diff_ms); ysr@777: } ysr@777: ysr@777: if (_cm->has_overflown()) { ysr@777: // This is the interesting one. We aborted because a global ysr@777: // overflow was raised. This means we have to restart the ysr@777: // marking phase and start iterating over regions. However, in ysr@777: // order to do this we have to make sure that all tasks stop ysr@777: // what they are doing and re-initialise in a safe manner. We ysr@777: // will achieve this with the use of two barrier sync points. ysr@777: ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] detected overflow", _task_id); ysr@777: ysr@777: _cm->enter_first_sync_barrier(_task_id); ysr@777: // When we exit this sync barrier we know that all tasks have ysr@777: // stopped doing marking work. So, it's now safe to ysr@777: // re-initialise our data structures. At the end of this method, ysr@777: // task 0 will clear the global data structures. ysr@777: ysr@777: statsOnly( ++_aborted_overflow ); ysr@777: ysr@777: // We clear the local state of this task... ysr@777: clear_region_fields(); ysr@777: ysr@777: // ...and enter the second barrier. ysr@777: _cm->enter_second_sync_barrier(_task_id); ysr@777: // At this point everything has bee re-initialised and we're ysr@777: // ready to restart. ysr@777: } ysr@777: ysr@777: if (_cm->verbose_low()) { ysr@777: gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, " ysr@777: "elapsed = %1.2lfms <<<<<<<<<<", ysr@777: _task_id, _time_target_ms, elapsed_time_ms); ysr@777: if (_cm->has_aborted()) ysr@777: gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========", ysr@777: _task_id); ysr@777: } ysr@777: } else { ysr@777: if (_cm->verbose_low()) ysr@777: gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, " ysr@777: "elapsed = %1.2lfms <<<<<<<<<<", ysr@777: _task_id, _time_target_ms, elapsed_time_ms); ysr@777: } ysr@777: ysr@777: _claimed = false; ysr@777: } ysr@777: ysr@777: CMTask::CMTask(int task_id, ysr@777: ConcurrentMark* cm, ysr@777: CMTaskQueue* task_queue, ysr@777: CMTaskQueueSet* task_queues) ysr@777: : _g1h(G1CollectedHeap::heap()), ysr@777: _co_tracker(G1CMGroup), ysr@777: _task_id(task_id), _cm(cm), ysr@777: _claimed(false), ysr@777: _nextMarkBitMap(NULL), _hash_seed(17), ysr@777: _task_queue(task_queue), ysr@777: _task_queues(task_queues), ysr@777: _oop_closure(NULL) { ysr@777: guarantee( task_queue != NULL, "invariant" ); ysr@777: guarantee( task_queues != NULL, "invariant" ); ysr@777: ysr@777: statsOnly( _clock_due_to_scanning = 0; ysr@777: _clock_due_to_marking = 0 ); ysr@777: ysr@777: _marking_step_diffs_ms.add(0.5); ysr@777: }